import requests
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import ipycytoscape
import networkx as nx
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import ipycytoscape
import yaml
# plt.switch_backend('module://ipykernel.pylab.backend_inline')
from IPython.display import display
def TCT_help(func):
print(func.__doc__)
# list all functions in TCT
def list_functions():
import inspect
functions = []
for name, obj in inspect.getmembers(__import__(__name__)):
if inspect.isfunction(obj):
functions.append(name)
return functions
# used. Jan 5, 2024
[docs]
def get_Translator_APIs():
'''
Get a list of Translator APIs from the smart-api.info and return the detailed information for each API in a data frame and the list of API names.
Examples
--------
>>> Translator_KP_info,APInames= TCT.get_SmartAPI_Translator_KP_info()
'''
Translator_APIs = []
#Translator_apps_url = "https://smart-api.info/api/query?q=tags.name:translator&fields=info,_meta,tags&meta=1&size=500"
Translator_apps_url = "https://dev.smart-api.info/api/query?q=tags.name:translator&fields=info,_meta,tags&meta=1&size=500"
Translator_apps = requests.get(Translator_apps_url).json()['hits']
for app in Translator_apps:
Translator_APIs.append(app['info']['title'])
return Translator_APIs
# used May 30, 2025
# used May 30, 2025
"""This is the root URL for the resource."""
URL = 'https://smart-api.info/api/query?q=tags.name:translator'
[docs]
def get_SmartAPI_Translator_KP_info():
"""
Get the SmartAPI Translator KP info from the smart-api.info API.
Returns a DataFrame with the SmartAPI Translator KP info.
Examples
--------
>>> Translator_KP_info,APInames = get_SmartAPI_Translator_KP_info('AML')
"""
import requests
import json
import yaml
import pandas as pd
# several APIs should be excluded:
#https://smart-api.info/ui/ac9c2ad11c5c442a1a1271223468ced1
# Get x-bte smartapi specs
url = "https://smart-api.info/api/query?q=tags.name:translator AND tags.name:trapi&size=1000&sort=_seq_no&raw=1&fields=paths,servers,tags,components.x-bte*,info,_meta"
response = requests.get(url)
try:
response.raise_for_status()
except Exception:
print(f"error downloading smartapi specs: {response.status_code}")
exit()
content = json.loads(response.content)
smartapis = content["hits"]
id_list = []
title_list = []
prod_url_list = []
ci_url_list = []
test_url_list = []
for api in smartapis:
ci_found = False
test_found = False
prod_found = False
for i in range(len(api['servers'])):
server = api['servers'][i]
if 'x-maturity' not in server:
print(f"Skipping server without x-maturity: {server}")
else:
if server['x-maturity'] == 'production':
# if prod_ur is not ars-prod.transltr.io
if server['url'] == 'https://ars-prod.transltr.io':
prod_url = server['url'] + '/ars/api/submit/'
else:
# if prod_url does not end with /, add '/query/' to the end
if server['url'].endswith('/'):
prod_url = server['url'] + 'query/'
else:
# if prod_url does not end with /, add '/query/' to the end
prod_url = server['url'] + '/query/'
prod_found = True
if server['x-maturity'] == 'staging' or server['x-maturity'] == 'development':
# if ci_url is not ars.ci.transltr.io
if server['url'] == 'https://ars.ci.transltr.io':
ci_url = server['url'] + '/ars/api/submit/'
else:
# if ci_url does not end with /, add '/query/' to the end
if server['url'].endswith('/'):
ci_url = server['url'] + 'query/'
else:
# if ci_url does not end with /, add '/query/' to the end
ci_url = server['url'] + '/query/'
ci_found = True
if server['x-maturity'] == 'testing':
# if test_url is not ars-test.transltr.io
if server['url'] == 'https://ars.test.transltr.io':
test_url = server['url'] + '/ars/api/submit/'
else:
# if test_url does not end with /, add '/query/' to the end
if server['url'].endswith('/'):
test_url = server['url'] + 'query/'
else:
# if test_url does not end with /, add '/query/' to the end
test_url = server['url'] + '/query/'
test_found = True
if not (prod_found or ci_found or test_found):
print(api['info']['title'])
print(f"Skipping server without production, staging or testing: {server}")
else:
id_list.append('https://smart-api.info/ui/'+api['_id'])
title_list.append(api['info']['title'])
if prod_found:
prod_url_list.append(prod_url)
else:
prod_url = prod_url_list.append(None)
if ci_found:
ci_url_list.append(ci_url)
else:
ci_url = ci_url_list.append(None)
if test_found:
test_url_list.append(test_url)
else:
test_url = test_url_list.append(None)
# write all the smartapis to a dataframe
smartapi_df = pd.DataFrame({
'id': id_list,
'title': title_list,
'prod_url': prod_url_list,
'ci_url': ci_url_list,
'test_url': test_url_list,
})
#smartapi_df = smartapi_df.set_index('id')
# remove the excluded APIs from the dataframe
#excluded_APIs = ['https://smart-api.info/ui/ac9c2ad11c5c442a1a1271223468ced1',#RaMP]
#smartapi_df = smartapi_df[~smartapi_df['id'].isin(excluded_APIs)]
API_names = {}
for i in range(len(smartapi_df)):
if prod_url_list[i] is not None:
#API_names[smartapi_df['title'][i]] = smartapi_df['prod_url'][i] + 'query/'
API_names[smartapi_df['title'].values[i]] = prod_url_list[i]
else:
API_names[smartapi_df['title'].values[i]] = ci_url_list[i]
return smartapi_df, API_names
# used Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
def list_Translator_APIs():
APInames = {
'Sri-name-resolver':'https://name-lookup.ci.transltr.io/query/', #https://smart-api.info/ui/9995fed757acd034ef099dbb483c4c82
#'Monarch API':'https://api-v3.monarchinitiative.org/query/' #https://smart-api.info/ui/d22b657426375a5295e7da8a303b9893
#Complex Portal Web Service : #https://smart-api.info/ui/326eb1e437303bee27d3cef29227125d
'Sri-answer-appraiser(Trapi v1.5.0)':'https://answerappraiser.renci.org/get_appraisal/', #https://smart-api.info/ui/6dcc5454fe4e0095090d8a956781c438
#LitVar API : dca415f2d792976af9d642b7e73f7a41
#CTD API : 0212611d1c670f9107baf00b77f0889a
#EBI Proteins API : 43af91b3d7cae43591083bff9d75c6dd
#Ontology Lookup Service API : 1c056ffc7ed0dd1229e71c4752239465
'Cqs(Trapi v1.5.0)':'https://cqs-dev.apps.renci.org/query/', #https://smart-api.info/ui/c359a127dc8824d90cef436d3dce71d4
'Workflow-runner(Trapi v1.5.0)':'https://translator-workflow-runner.renci.org/query/', #https://smart-api.info/ui/6a3507ad6f709844d1b2b89691898a93
'Automat-monarchinitiative(Trapi v1.5.0)':'https://automat.ci.transltr.io/monarch-kg/query/',#https://smart-api.info/ui/6b88f83127513bd350e6962218ea84f4
#QuickGO API : 1f277e1563fcfd124bfae2cc3c4bcdec
#RaMP API v1.0.1 : ac9c2ad11c5c442a1a1271223468ced1 # need to check carefully.
'Connections Hypothesis Provider API':'https://chp-api.transltr.io/query/', #https://smart-api.info/ui/412af63e15b73e5a30778aac84ce313f
'Automat-genome-alliance(Trapi v1.5.0)' :'https://automat.ci.transltr.io/genome-alliance/query/', #https://smart-api.info/ui/b4c868db33b95b4890faeeefd5800552
'mediKanren' : 'https://medikanren-trapi.transltr.io/query/', #https://smart-api.info/ui/c563a58be4aacb68d10ba0ceb6b52255
'Automat-hgnc(Trapi v1.5.0)':'https://automat.transltr.io/hgnc/query/', #'https://smart-api.info/ui/8671309d2b94e413a4c1f9a9f82e4660'
'Automat-hmdb(Trapi v1.5.0)':'https://automat.transltr.io/hmdb/query/' ,# 0a1c0f46f4950b82b1aa7dad27aad10a
'Automat-gwas-catalog(Trapi v1.5.0)' :'https://automat.transltr.io/gwas-catalog/query/', #349fed5531c094c33f10c071efe9d0de
'Automat-gtopdb(Trapi v1.5.0)': 'https://automat.transltr.io/gtopdb/query/',# 759df287a21c30cd514df323be02a84b
'Autonomous Relay System (ARS) TRAPI' : 'https://ars-prod.transltr.io/ars/api/submit/', #4c12efd48ced755ac4b72b1922202ec2
'Automat-robokop(Trapi v1.5.0)' : 'https://automat.transltr.io/robokopkg/query/',# 4f9c8853b721ef1f14ecee6d92fc19b5
'Automat-binding-db(Trapi v1.5.0)': 'https://automat.transltr.io/binding-db/query/', #a9d6ee341d8ea4c7d3ae9ed0941cb274
'Automat-ehr-may-treat-kp(Trapi v1.5.0)' : 'https://automat.renci.org/ehr-may-treat-kp/query/',#eb4e66886fe5c178ae41977cea2c6307
#Automat-gtex(Trapi v1.5.0) : eef72049e4e01c020b7799f711e0e65b,
#Automat-pharos(Trapi v1.5.0) : 1f057c53d42694686369f0e542f965c6
#Automat-reactome(Trapi v1.5.0) : 61b41c5d9b90eb8ad16e037f9a87d593
#Sri-node-normalizer(Trapi v1.5.0) : 1c2eb8d02b4796c6a657c3363c0657dc
#Automat-human-goa(Trapi v1.5.0) : cb7a43d444cb3dcbe8e3c78d314334cf
#Automat-cam-kp(Trapi v1.5.0) : 7ab0209ea8590341d8e5d0166cac3d2f
#Automat-viral-proteome(Trapi v1.5.0) : 2aca41fc6c3dc426ec6583d42603be02
#Aragorn(Trapi v1.5.0) : 1dad992a6ce8f680e59a5ea09d90670d
#Automat-drug-central(Trapi v1.5.0) : 673b9fc76973dfa5fe3ed151fdbfc807
#Automat-ubergraph(Trapi v1.5.0) : dde0552a37fc136526216148ff7594a0
#Automat-string-db(Trapi v1.5.0) : 7984a621a28c109c5c09f65fed0e7ea7
#Automat-hetionet(Trapi v1.5.0) : a5fe24f987331b58191e67598118f369
#Automat-ctd(Trapi v1.5.0) : f82c01b15c46e024212c1a3271aaef0b
#Automat-intact(Trapi v1.5.0) : b4023595664163e0aec5e825da150e16
#Automat-ehr-clinical-connections-kp(Trapi v1.5.0) : 6f4dd91bc56fce4f597bc44153cf418e
#Automat-icees-kg(Trapi v1.5.0) : c64d583402f21cc85810d33befe49c86
#Automat-panther(Trapi v1.5.0) : 3f78d3fb8a7a577fbc7cc0a913ac3fc5
#Biolink Lookup : 02f84c50043e94970316568439b7b384
'COHD TRAPI' : 'https://cohd-api.transltr.io/api/query/', ##d4290b6b5741e6da6cc6a6f42e0cfdb5
#'Text Mined Cooccurrence API' : "https://cooccurence.ci.transltr.io/query/", #aa9c668df9d217409891cc7afb7ac039
'Text Mined Cooccurrence API' : "https//cooccurrence.transltr.io/query", #71fa2e0f0f1fe1ec67f4ddb719db5ef3
#BioThings Rhea API : 03283cc2b21c077be6794e1704b1d230
#SmartAPI API : 27a5b60716c3a401f2c021a5b718c5b1
#MyDisease.info API : 671b45c0301c8624abbd26ae78449ca2
#MyVariant.info API : 09c8782d9f4027712e65b95424adba79
#BioThings UBERON API : ec6d76016ef40f284359d17fbf78df20
#OpenPredict API : 025600054bd8d6fb14ee66ee9d4a9830
#MyGene.info API : 59dce17363dce279d389100834e43648
#Answer-coalesce(Trapi v1.5.0) : fe8bb783ff710ab4e176f38c5f7777af
#BioThings HPO API : a5b0ec6bfde5008984d4b6cde402d61f
#Drug Approvals KP - TRAPI 1.5.0 : edc04feaf16c12424737988ce2e90d60
#Gene-List Network Enrichment Analysis : 5c8740542b4444d4f85c2e23c670b952
#MolePro : 1901bab8d33bb70b124f400ec1cfdba3
#Multiomics KP - TRAPI 1.5.0 : 1b6de23ed3c4e0713b20794477ba1e39
#Microbiome KP - TRAPI 1.5.0 : a8be4ea3fe8fa80a952ead0b3c5e4bc1
#BioThings GO Biological Process API : cc857d5b7c8b7609b5bbb38ff990bfff
#imProving Agent for TRAPI 1.5 : 415c3b1a85ead4ceb58caf00dee9b24e
#Clinical Trials KP - TRAPI 1.5.0 : e51073371d7049b9643e1edbdd61bcbd
#BioThings EBIgene2phenotype API : 1f47552dabd67351d4c625adb0a10d00
#BioThings RARe-SOURCE API : b772ebfbfa536bba37764d7fddb11d6f
#PharmGKB REST API : bde72db681ec0b8f9eeb67bb6b8dd72c
#BioThings DDInter API : 00fb85fc776279163199e6c50f6ddfc6
#MyChem.info API : 8f08d1446e0bb9c2b323713ce83e2bd3
#BioThings BindingDB API : 38e9e5169a72aee3659c9ddba956790d
#BioThings PFOCR API : edeb26858bd27d0322af93e7a9e08761
#BioThings MGIgene2phenotype API : 77ed27f111262d0289ed4f4071faa619
#BioThings FooDB API : f1b8f64c316a01d1722f0fb842499fe5
#Genetics Data Provider for NCATS Biomedical Translator Reasoners : db981dff8d93dcb0cfab5dbee8afbb40
#BioThings GO Molecular Function API : 34bad236d77bea0a0ee6c6cba5be54a6
#BioThings BioPlanet Pathway-Disease API : 55a223c6c6e0291dbd05f2faf27d16f4
#BioThings DISEASES API : a7f784626a426d054885a5f33f17d3f8
#BioThings BioPlanet Pathway-Gene API : b99c6dd64abcefe87dcd0a51c249ee6d
#BioThings GO Cellular Component API : f339b28426e7bf72028f60feefcd7465
#SPOKE KP for TRAPI 1.5 : 7f70cdfaeb801501da08dacc294e8b9f
#BioThings IDISK API : 32f36164fabed5d3abe6c2fd899c9418
#BioThings FoodData Central API : 895ec14a3650ec7ad85959a2d1554e2f
#BioThings AGR API : 68f12100e74342ae0dd5013d5f453194
#Translator Annotation Service : 5a4c41bf2076b469a0e9cfcf2f2b8f29
#BioThings InnateDB API : e9eb40ff7ad712e4e6f4f04b964b5966
#BioThings repoDB API : 1138c3297e8e403b6ac10cff5609b319
#BioThings GTRx API : 316eab811fd9ef1097df98bcaa9f7361
#BioThings Explorer (BTE) TRAPI : dc91716f44207d2e1287c727f281d339
#RTX KG2 - TRAPI 1.5.0 : a6b575139cfd429b0a87f825a625d036
#BioThings SuppKG API : b48c34df08d16311e3bca06b135b828d
#Knowledge Collaboratory API : 8601da411b8681dbbc32239ceb0f1a55
##Service Provider TRAPI : 36f82f05705c317bac17ddae3a0ea2f0
#Multiomics EHR Risk KP API : d86a24f6027ffe778f84ba10a7a1861a
#Multiomics Wellness KP API : 02af7d098ab304e80d6f4806c3527027
#BioThings DGIdb API : e3edd325c76f2992a111b43a907a4870
#BioThings SEMMEDDB API : 1d288b3a3caf75d541ffaae3aab386c8
'Multiomics BigGIM-DrugResponse KP API' : 'https://biothings.ci.transltr.io/biggim_drugresponse_kg/query/', #adf20dd6ff23dfe18e8e012bde686e31
#Biothings Therapeutic Target Database API : e481efd21f8e8c1deac05662439c2294
#Text Mining Targeted Association API : 978fe380a147a8641caf72320862697b
'ARAX Translator Reasoner - TRAPI 1.5.0' : 'https://arax.transltr.io/api/arax/v1.4/query/', # 03e63fbd5ed251bce08cb5801b6b169b
'Automat-ctd(Trapi v1.4.0)':"https://automat.transltr.io/ctd/1.4/query",
#'Automat-sri-reference-kg(Trapi v1.4.0)':"",
#'Autonomous Relay System (ARS) TRAPI':"",
#'BioLink API':"",
#'BioThings AGR API':"",
#'BioThings BioPlanet Pathway-Gene API':"",
#'BioThings DDInter API':"",
'BioThings Explorer (BTE) TRAPI':"https://bte.transltr.io/v1/query",
#'BioThings FooDB API':"",
#'BioThings FoodData Central API':"",
#'BioThings GO Biological Process API':"",
#'BioThings InnateDB API':"", # not in TRAPI standard
#'BioThings RARe-SOURCE API':"",
#'BioThings repoDB API':"",
#'Biolink Lookup':"",
'Biothings Therapeutic Target Database API':"https://biothings.ncats.io/ttd/query",
#'COHD TRAPI':"https://cohd-api.transltr.io/api/query",
#'Complex Portal Web Service':"",
#'Curated Query Service':"",
#'EBI Proteins API':"",
#'Gene-List Network Enrichment Analysis':"",
#'Knowledge Collaboratory API':"",
#'LitVar API':"",
#'RaMP API v1.0.1':"",
#'SmartAPI API':"",
#'Sri-answer-appraiser(Trapi v1.4.0)':"",
#'Sri-name-resolver':"",
#'Sri-node-normalizer(Trapi v1.3.0)':"",
#'Sri-node-normalizer(Trapi v1.4.0)':"",
#'Translator Annotation Service':"",
#'Workflow-runner(Trapi v1.4.0)':"https://translator-workflow-runner.transltr.io/query",
#'imProving Agent for TRAPI 1.4':"",
#'mediKanren':'https://medikanren-trapi.transltr.io/query', #ARA
#"BigGIM_BMG":"http://127.0.0.1:8000/find_path_by_predicate",
"Aragorn(Trapi v1.4.0)":"https://aragorn.transltr.io/aragorn/query",
#"ARAX Translator Reasoner - TRAPI 1.4.0":"https://arax.transltr.io/api/arax/v1.4/asyncquery",
"ARAX Translator Reasoner - TRAPI 1.4.0":"https://arax.transltr.io/api/arax/v1.4/query",
"RTX KG2 - TRAPI 1.4.0":"https://arax.ncats.io/api/rtxkg2/v1.4/query",
"SPOKE KP for TRAPI 1.4":"https://spokekp.transltr.io/api/v1.4/query",
"Multiomics BigGIM-DrugResponse KP API":"https://bte.transltr.io/v1/smartapi/adf20dd6ff23dfe18e8e012bde686e31/query",
#"Multiomics BigGIM-DrugResponse KP API":"https://bte.test.transltr.io/v1/smartapi/adf20dd6ff23dfe18e8e012bde686e31/query",
"Multiomics ClinicalTrials KP":"https://api.bte.ncats.io/v1/smartapi/d86a24f6027ffe778f84ba10a7a1861a/query",
"Multiomics Wellness KP API":"https://api.bte.ncats.io/v1/smartapi/02af7d098ab304e80d6f4806c3527027/query",
"Multiomics EHR Risk KP API":"https://api.bte.ncats.io/v1/smartapi/d86a24f6027ffe778f84ba10a7a1861a/query",
"Biothings Explorer (BTE)":"https://bte.transltr.io/v1/query",
"Service Provider TRAPI":"https://api.bte.ncats.io/v1/smartapi/978fe380a147a8641caf72320862697b/query",
"Explanatory-agent":"https://explanatory-agent-creative.azurewebsites.net/ARA/v1.3/asyncquery", #403 error
"MolePro":"https://translator.broadinstitute.org/molepro/trapi/v1.4/query",
"Genetics KP":"https://genetics-kp.transltr.io/genetics_provider/trapi/v1.4/query",
"medikanren-unsecret":"https://medikanren-trapi.transltr.io/query",
"Text Mined Cooccurrence API":"https://api.bte.ncats.io/v1/smartapi/978fe380a147a8641caf72320862697b/query",
"OpenPredict API":"https://openpredict.transltr.io/query",
"Agrkb(Trapi v1.4.0)":"https://automat.transltr.io/genome-alliance/1.4/query",
"Automat-biolink(Trapi v1.4.0)": "https://automat.renci.org/biolink/1.4/query",
"Automat-cam-kp(Trapi v1.4.0)": "https://automat.ci.transltr.io/cam-kp/1.4/query?limit=100",
#"Automat-ctd(Trapi v1.4.0)": "https://automat.renci.org/drugcentral/1.4/query",
"Automat-drug-central(Trapi v1.4.0)": "https://automat.ci.renci.org/drugcentral/1.4/query",
"Automat-gtex(Trapi v1.4.0)":"https://automat.renci.org/gtex/1.4/query",
"Automat-gtopdb(Trapi v1.4.0)": "https://automat.renci.org/gtopdb/1.4/query",
"Automat-gwas-catalog(Trapi v1.4.0)": "https://automat.renci.org/gwas-catalog/1.4/query",
"Automat-hetio(Trapi v1.4.0)": "https://automat.ci.transltr.io/hetio/1.4/query",
"Automat-hgnc(Trapi v1.4.0)": "https://automat.renci.org/hgnc/1.4/query",
"Automat-hmdb(Trapi v1.4.0)": "https://automat.renci.org/hmdb/1.4/query",
"Automat-human-goa(Trapi v1.4.0)": "https://automat.renci.org/human-goa/1.4/query",
"Automat-icees-kg(Trapi v1.4.0)": "https://automat.renci.org/icees-kg/1.4/query",
"Automat-intact(Trapi v1.4.0)": "https://automat.renci.org/intact/1.4/query",
"Automat-panther(Trapi v1.4.0)": "https://automat.renci.org/panther/1.4/query",
"Automat-pharos(Trapi v1.4.0)": "https://automat.renci.org/pharos/1.4/query",
"Automat-robokop(Trapi v1.4.0)": "https://ars-prod.transltr.io/ara-robokop/api/runquery", #doesn't work
"Automat-sri-reference-kp(Trapi v1.4.0)": "https://automat.ci.transltr.io/sri-reference-kp/1.4/query", #doesn't work
"Automat-string-db(Trapi v1.4.0)": "https://automat.ci.transltr.io/string-db/1.4/query",
"Automat-ubergraph(Trapi v1.4.0)": "https://automat.ci.transltr.io/ubergraph/1.4/query",
"Automat-ubergraph-nonredundant(Trapi v1.4.0)": "https://automat.ci.transltr.io/ubergraph-nonredundant/1.4/query",
"Automat-viral-proteome(Trapi v1.4.0)": "https://automat.ci.transltr.io/viral-proteome/1.4/query",
"CTD API":"https://automat.ci.transltr.io/ctd/1.4/query",
"Connections Hypothesis Provider API":"https://chp-api.transltr.io/query", #no knowledge_graph is defined in the response
"MyGene.info API":"https://api.bte.ncats.io/v1/smartapi/59dce17363dce279d389100834e43648/query", #check with chunlei
"MyDisease.info API":"https://api.bte.ncats.io/v1/smartapi/671b45c0301c8624abbd26ae78449ca2/query", #check with chunlei
"MyChem.info API":"https://api.bte.ncats.io/v1/8f08d1446e0bb9c2b323713ce83e2bd3/query", #check with chunlei
"MyVariant.info API":"https://api.bte.ncats.io/v1/59dce17363dce279d389100834e43648/query", #check with chunlei
"Ontology Lookup Service API":"https://api.bte.ncats.io/v1/1c056ffc7ed0dd1229e71c4752239465/query", #check with chunlei
"PharmGKB REST API":"https://api.bte.ncats.io/v1/bde72db681ec0b8f9eeb67bb6b8dd72c/query", #need to check with chunlei/Andrew
"QuickGO API":"https://api.bte.ncats.io/v1/1f277e1563fcfd124bfae2cc3c4bcdec/query",#pathways
#"RaMP API v1.0.1":"",
"Text Mining Targeted Association API":"https://api.bte.ncats.io/v1/smartapi/978fe380a147a8641caf72320862697b/query",
"BioThings BindingDB API":"https://api.bte.ncats.io/v1/smartapi/38e9e5169a72aee3659c9ddba956790d/query",
"BioThings BioPlanet Pathway-Disease API":"https://api.bte.ncats.io/v1/smartapi/55a223c6c6e0291dbd05f2faf27d16f4/query",
"BioThings DDinter API":"https://api.bte.ncats.io/v1/smartapi/00fb85fc776279163199e6c50f6ddfc6/query",
"BioThings DGIdb API":"https://api.bte.ncats.io/v1/smartapi/e3edd325c76f2992a111b43a907a4870/query",
"BioThings DISEASES API":"https://api.bte.ncats.io/v1/smartapi/a7f784626a426d054885a5f33f17d3f8/query",
"BioThings EBIgene2phenotype API":"https://api.bte.ncats.io/v1/smartapi/1f47552dabd67351d4c625adb0a10d00/query",
"BioThings Biological Process API":"https://api.bte.ncats.io/v1/smartapi/cc857d5b7c8b7609b5bbb38ff990bfff/query",
"BioThings GO Cellular Component API":"https://api.bte.ncats.io/v1/smartapi/f339b28426e7bf72028f60feefcd7465/query",
"BioThings GO Molecular Function API":"https://api.bte.ncats.io/v1/smartapi/34bad236d77bea0a0ee6c6cba5be54a6/query",
"BioThings GTRx API":"https://api.bte.ncats.io/v1/smartapi/316eab811fd9ef1097df98bcaa9f7361/query",
"BioThings HPO API": "https://api.bte.ncats.io/v1/smartapi/d7d1cc9bbe04ad9936076ca5aea904fe/query",
"BioThings IDISK API":"https://api.bte.ncats.io/v1/smartapi/32f36164fabed5d3abe6c2fd899c9418/query",
"BioThings MGIgene2phenotype API":"https://api.bte.ncats.io/v1/smartapi/77ed27f111262d0289ed4f4071faa619/query",
"BioThings PFOCR API":"https://api.bte.ncats.io/v1/smartapi/edeb26858bd27d0322af93e7a9e08761/query",
"Biothings RARe-SOURCE API":"https://api.bte.ncats.io/v1/smartapi/b772ebfbfa536bba37764d7fddb11d6f/query",
"BioThings Rhea API":"https://api.bte.ncats.io/v1/smartapi/03283cc2b21c077be6794e1704b1d230/query",
"BioThings SEMMEDDB API":"https://api.bte.ncats.io/v1/smartapi/1d288b3a3caf75d541ffaae3aab386c8/query",
"BioThings SuppKG API":"https://api.bte.ncats.io/v1/smartapi/b48c34df08d16311e3bca06b135b828d/query",
"BioThings UBERON API":"https://api.bte.ncats.io/v1/smartapi/ec6d76016ef40f284359d17fbf78df20/query",
}
return(APInames)
# used. Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
[docs]
def select_API(sub_list,obj_list, metaKG):
'''
selects the APIs that can connect the given subject and object categories in the meta knowledge graph.
sub_list = ["biolink:Gene", "biolink:Protein"]
obj_list = ["biolink:Gene", "biolink:Disease"]
---------
Example:
>>> sub_list = ["biolink:Gene", "biolink:Protein"]
>>> obj_list = ["biolink:Gene", "biolink:Disease"]
>>>
>>> Translator_KP_info,APInames= translator_kpinfo.get_translator_kp_info()
>>> print(len(Translator_KP_info))
>>> metaKG = translator_metakg.get_KP_metadata(APInames)
>>> print(metaKG.shape)
>>> APInames,metaKG = translator_metakg.add_plover_API(APInames, metaKG)
>>> selected_apis = select_API(sub_list, obj_list, metaKG)
>>> print(selected_apis)
'''
new_sub_list = sub_list
new_obj_list = obj_list
#for item in sub_list:
# new_sub_list.append(item.split(":")[1])
#for item in obj_list:
# new_obj_list.append(item.split(":")[1])
#metaKG = pd.read_csv("KP_metadata.csv")
df1 = metaKG.loc[(metaKG['Subject'].isin(new_sub_list)) & (metaKG['Object'].isin(new_obj_list))]
df2 = metaKG.loc[(metaKG['Subject'].isin(new_obj_list)) & (metaKG['Object'].isin(new_sub_list))]
df = pd.concat([df1,df2])
return(list(set(df['API'].values)))
# used. Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
[docs]
def select_concept(sub_list,obj_list,metaKG):
'''
Selects the predicates connecting the given subject and object categories in the meta knowledge graph.
'''
#result_df = pd.read_csv("KP_metadata.csv")
df1 = metaKG.loc[(metaKG['Subject'].isin(sub_list)) & (metaKG['Object'].isin(obj_list))]
df2 = metaKG.loc[(metaKG['Subject'].isin(obj_list)) & (metaKG['Object'].isin(sub_list))]
df = pd.concat([df1,df2])
return(set(list(df['Predicate'])))
[docs]
def sele_predicates_API(input_node1_category,input_node2_category,metaKG, APInames):
'''
Selects predicates, APIs, and API URLs for the given input node categories.
-----------
Example:
>>> sele_predicates, sele_APIs, API_URLs = sele_predicates_API(input_node1_category,input_node2_category,metaKG, APInames)
'''
sele_predicates = list(set(select_concept(sub_list=input_node1_category,
obj_list=input_node2_category,
metaKG=metaKG)))
sele_APIs = select_API(sub_list=input_node1_category,
obj_list=input_node2_category,
metaKG=metaKG)
API_URLs = get_Translator_API_URL(sele_APIs, APInames)
if len(sele_predicates) == 0:
print("No predicates found for the given categories.")
if len(sele_APIs) == 0:
print("No APIs found for the given categories.")
if len(API_URLs) == 0:
print("No API URLs found for the given categories.")
return sele_predicates, sele_APIs, API_URLs
# used. Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
def get_Translator_API_URL(API_sele, APInames):
API_URL = []
#API_URL = {}
for name in API_sele:
if name in APInames.keys():
API_URL.append(APInames[name])
#API_URL[name] = APInames[name]
else:
print(name + " : API name not found")
return API_URL
# select APIs based on the predicates. Dec 10, 2023
def filter_APIs(sele_predicates, metaKG):
if sele_predicates == []:
sele_API_URL = list(metaKG['KG_category'].unique())
else:
sele_API_URL = list(metaKG.loc[metaKG['KG_category'].isin(sele_predicates)]['URL'].unique())
return sele_API_URL
[docs]
def select_predicates_inKP(sub_list,obj_list,KPname,metaKG):
'''sub_list = ["biolink:Gene", "biolink:Protein"]
obj_list = ["biolink:Gene", "biolink:Disease"]
KPname = "" # it should be one of the names in APInames
'''
new_sub_list = []
new_obj_list = []
for item in sub_list:
new_sub_list.append(item.split(":")[1])
for item in obj_list:
new_obj_list.append(item.split(":")[1])
#result_df = pd.read_csv("KP_metadata.csv")
df1 = metaKG.loc[(metaKG['Subject'].isin(new_sub_list)) & (metaKG['Object'].isin(new_obj_list)) & (metaKG['API']==KPname)]
df2 = metaKG.loc[(metaKG['Subject'].isin(new_obj_list)) & (metaKG['Object'].isin(new_sub_list)) & (metaKG['API']==KPname)]
df = pd.concat([df1,df2])
temp_set = (set(list(df['KG_category'])))
final_set = []
for concept in temp_set:
#final_set.append("biolink:"+concept.split("-")[1])
final_set.append(concept)
return(final_set)
#def Generate_Gene_id_map():
# id_file = open("../metaData/Homo_sapiens.gene_info", "r")
# Gene_id_map = {}
# for line in id_file:
# line = line.strip()
# Gene_id_map["NCBIGene:"+line.split("\t")[1]] = line.split("\t")[2]
# id_file.close()
# return(Gene_id_map)
# Used. Jan 5, 2024
def ID_convert_to_preferred_name_nodeNormalizer(id_list):
dic_id_map = {}
unrecoglized_ids = []
recoglized_ids = []
# To convert a CURIE to a preferred name, you don't need NameLookup at all -- NodeNorm can
# do this by itself!
NODENORM_BASE_URL = "https://nodenorm.transltr.io" # Adjust this if you need NodeNorm TEST, CI or DEV.
NODENORM_BATCH_LIMIT = 900 # Adjust this if you start getting errors from NodeNorm.
NODENORM_GENE_PROTEIN_CONFLATION = True # Change to False if you don't want gene/protein conflation.
NODENORM_DRUG_CHEMICAL_CONFLATION = False # Change to True if you want drug/chemical conflation.
# split id_list into batches of at most NODENORM_BATCH_LIMIT entries
for index in range(0, len(id_list), NODENORM_BATCH_LIMIT):
id_sublist = id_list[index:index + NODENORM_BATCH_LIMIT]
# print(f"id_sublist: {id_sublist}")
# Query NodeNorm with https://nodenorm.transltr.io/docs#/default/get_normalized_node_handler_get_normalized_nodes_get
response = requests.post(NODENORM_BASE_URL + '/get_normalized_nodes', json={
"curies": id_sublist,
"description": False, # Change to True if you want descriptions from any identifiers we know about.
"conflate": NODENORM_GENE_PROTEIN_CONFLATION,
"drug_chemical_conflate": NODENORM_DRUG_CHEMICAL_CONFLATION,
})
if not response.ok:
raise RuntimeError("Error: NodeNorm request failed with status code " + str(response.status_code))
results = response.json()
for curie in id_sublist:
if curie in results and results[curie]:
identifier = results[curie].get('id', {})
if 'identifier' in identifier and identifier['identifier'] != curie:
recoglized_ids.append(curie)
#print(f"NodeNorm normalized {curie} to {identifier['identifier']} " +
# f"with gene-protein conflation {NODENORM_GENE_PROTEIN_CONFLATION} and " +
# f"with drug-chemical conflation {NODENORM_DRUG_CHEMICAL_CONFLATION}.")
label = identifier.get('label')
dic_id_map[curie] = label
if not label:
print(curie + ": no preferred name")
dic_id_map[curie] = curie
else:
unrecoglized_ids.append(curie)
dic_id_map[curie] = curie
if len(unrecoglized_ids) > 0:
print("NodeNorm does not know about these identifiers: " + ",".join(unrecoglized_ids))
return dic_id_map
def visulization_one_hop_ranking_input_as_list(result_ranked_by_primary_infores,result_parsed ,
num_of_nodes = 20,
input_query = "NCBIGene:3845",
fontsize = 6,
title_fontsize = 12,
output_png1="NE_heatmap1.png",
output_png2="NE_heatmap2.png"
):
# edited Dec 5, 2023
predicates_list = []
primary_infore_list = []
aggregator_infore_list = []
from io import BytesIO
for i in range(0, result_ranked_by_primary_infores.shape[0]):
oupput_node = result_ranked_by_primary_infores['output_node'][i]
type_of_node = result_ranked_by_primary_infores['type_of_nodes'][i]
if type_of_node == 'object':
subject = input_query
object = oupput_node
else:
subject = oupput_node
object = input_query
predicates_list = predicates_list + result_parsed[subject + "_" + object]['predicate']
primary_infore_list = primary_infore_list + result_parsed[subject + "_" + object]['primary_knowledge_source']
if 'aggregator_knowledge_source' in result_parsed[subject + "_" + object]:
aggregator_infore_list = aggregator_infore_list + result_parsed[subject + "_" + object]['aggregator_knowledge_source']
aggregator_infore_list = list(set(aggregator_infore_list))
predicates_list = list(set(predicates_list))
primary_infore_list = list(set(primary_infore_list))
predicates_by_nodes = {}
for predict in predicates_list:
predicates_by_nodes[predict] = []
primary_infore_by_nodes = {}
for predict in primary_infore_list:
primary_infore_by_nodes[predict] = []
aggregator_infore_by_nodes = {}
for predict in aggregator_infore_list:
aggregator_infore_by_nodes[predict] = []
names = []
for i in range(0, result_ranked_by_primary_infores.shape[0]):
#for i in range(0, 10):
input_nodes = result_ranked_by_primary_infores['input_node'].values[i]
oupput_node = result_ranked_by_primary_infores['output_node'].values[i]
names.append(oupput_node)
type_of_node = result_ranked_by_primary_infores['type_of_nodes'].values[i]
if type_of_node == 'object':
subject = input_query
object = oupput_node
else:
subject = oupput_node
object = input_query
new_id = subject + "_" + object
cur_primary_infore = result_parsed[new_id]['primary_knowledge_source']
for predict in primary_infore_list:
if predict in cur_primary_infore:
primary_infore_by_nodes[predict].append(1)
else:
primary_infore_by_nodes[predict].append(0)
cur_predicates = result_parsed[new_id]['predicate']
for predict in predicates_list:
if predict in cur_predicates:
predicates_by_nodes[predict].append(1)
else:
predicates_by_nodes[predict].append(0)
#convert = False
#for item in colnames:
# if 'NCBIGene' in item:
# convert = True
#if convert:
#Gene_id_map = Gene_id_converter(colnames, "http://127.0.0.1:8000/query_name_by_id") # option 1
#Gene_id_map = Generate_Gene_id_map() # option 2
dic_id_map = ID_convert_to_preferred_name_nodeNormalizer(names)
new_colnames = []
for item in names:
if item in dic_id_map:
new_colnames.append(dic_id_map[item])
else:
new_colnames.append(item)
#else:
# new_colnames = colnames
primary_infore_by_nodes_df = pd.DataFrame(primary_infore_by_nodes)
primary_infore_by_nodes_df.index = new_colnames
primary_infore_by_nodes_df = primary_infore_by_nodes_df.T
predicates_by_nodes_df = pd.DataFrame(predicates_by_nodes)
predicates_by_nodes_df.index = new_colnames
predicates_by_nodes_df = predicates_by_nodes_df.T
plot_heatmap(primary_infore_by_nodes_df, num_of_nodes, fontsize, title_fontsize,output_png1)
plot_heatmap(predicates_by_nodes_df, num_of_nodes, fontsize, title_fontsize,output_png2)
return(predicates_by_nodes_df)
# Used. Jan 5, 2024
def visulization_one_hop_ranking(result_ranked_by_primary_infores,result_parsed ,
num_of_nodes = 20,
input_query = "NCBIGene:3845",
fontsize = 6,
title_fontsize = 12,
output_png1="NE_heatmap1.png",
output_png2="NE_heatmap2.png"
):
# edited Dec 5, 2023
predicates_list = []
primary_infore_list = []
aggregator_infore_list = []
from io import BytesIO
for i in range(0, result_ranked_by_primary_infores.shape[0]):
oupput_node = result_ranked_by_primary_infores['output_node'][i]
type_of_node = result_ranked_by_primary_infores['type_of_nodes'][i]
if type_of_node == 'object':
subject = input_query
object = oupput_node
else:
subject = oupput_node
object = input_query
predicates_list = predicates_list + result_parsed[subject + "_" + object]['predicate']
primary_infore_list = primary_infore_list + result_parsed[subject + "_" + object]['primary_knowledge_source']
if 'aggregator_knowledge_source' in result_parsed[subject + "_" + object]:
aggregator_infore_list = aggregator_infore_list + result_parsed[subject + "_" + object]['aggregator_knowledge_source']
aggregator_infore_list = list(set(aggregator_infore_list))
predicates_list = list(set(predicates_list))
primary_infore_list = list(set(primary_infore_list))
predicates_by_nodes = {}
for predict in predicates_list:
predicates_by_nodes[predict] = []
primary_infore_by_nodes = {}
for predict in primary_infore_list:
primary_infore_by_nodes[predict] = []
aggregator_infore_by_nodes = {}
for predict in aggregator_infore_list:
aggregator_infore_by_nodes[predict] = []
names = []
for i in range(0, result_ranked_by_primary_infores.shape[0]):
#for i in range(0, 10):
oupput_node = result_ranked_by_primary_infores['output_node'].values[i]
names.append(oupput_node)
type_of_node = result_ranked_by_primary_infores['type_of_nodes'].values[i]
if type_of_node == 'object':
subject = input_query
object = oupput_node
else:
subject = oupput_node
object = input_query
new_id = subject + "_" + object
cur_primary_infore = result_parsed[new_id]['primary_knowledge_source']
for predict in primary_infore_list:
if predict in cur_primary_infore:
primary_infore_by_nodes[predict].append(1)
else:
primary_infore_by_nodes[predict].append(0)
cur_predicates = result_parsed[new_id]['predicate']
for predict in predicates_list:
if predict in cur_predicates:
predicates_by_nodes[predict].append(1)
else:
predicates_by_nodes[predict].append(0)
#convert = False
#for item in colnames:
# if 'NCBIGene' in item:
# convert = True
#if convert:
#Gene_id_map = Gene_id_converter(colnames, "http://127.0.0.1:8000/query_name_by_id") # option 1
#Gene_id_map = Generate_Gene_id_map() # option 2
dic_id_map = ID_convert_to_preferred_name_nodeNormalizer(names)
new_colnames = []
for item in names:
if item in dic_id_map:
new_colnames.append(dic_id_map[item])
else:
new_colnames.append(item)
#else:
# new_colnames = colnames
primary_infore_by_nodes_df = pd.DataFrame(primary_infore_by_nodes)
primary_infore_by_nodes_df.index = new_colnames
primary_infore_by_nodes_df = primary_infore_by_nodes_df.T
predicates_by_nodes_df = pd.DataFrame(predicates_by_nodes)
predicates_by_nodes_df.index = new_colnames
predicates_by_nodes_df = predicates_by_nodes_df.T
plot_heatmap(primary_infore_by_nodes_df, num_of_nodes, fontsize, title_fontsize,output_png1)
plot_heatmap(predicates_by_nodes_df, num_of_nodes, fontsize, title_fontsize,output_png2)
return(predicates_by_nodes_df)
def plot_heatmap(predicates_by_nodes_df,num_of_nodes = 20,
fontsize = 6,
title_fontsize = 10,
output_png="NE_heatmap.png"):
#matplotlib.use('Agg')
#title = "Ranking of one-hop nodes by primary infores"
#ylab = "infores"
df = predicates_by_nodes_df.iloc[:,0:num_of_nodes]
colnames = list(df.columns)
# create teh figure and subplot
fig = plt.figure( figsize=(0.8+df.shape[1]*0.11,3.5),dpi = 300)
ax = fig.add_subplot(111)
# create the heatmap
# heatmap with border
p1 = sns.heatmap(df, cmap="Blues", cbar=False, ax=ax, linecolor='grey', linewidth=0.2)
# Adjust font size for x and y tick labels
p1.set_xticklabels(p1.get_xticklabels(), rotation=90, fontsize=fontsize)
p1.set_yticklabels(p1.get_yticklabels(), fontsize=fontsize)
#p1.set_title(title)
#p1.set_ylabel(ylab)
print(p1.get_xticklabels())
# set xticklabels with colnames
#p1.set_xticklabels(colnames, rotation=90, fontsize = fontsize)
plt.xticks(ticks=range(len(df.columns)), labels=df.columns)
# set title font size
p1.title.set_size(title_fontsize)
plt.show()
# save the figure
#plt.savefig(output_png, bbox_inches='tight', dpi=300)
def plot_heatmap_ui(predicates_by_nodes_df,num_of_nodes = 20,
fontsize = 6,
title_fontsize = 10,
output_png="NE_heatmap.png"):
title = "Ranking of one-hop nodes by primary infores"
ylab = "infores"
df = predicates_by_nodes_df.iloc[:,0:num_of_nodes]
colnames = list(df.columns)
# create teh figure and subplot
fig = plt.figure( figsize=(0.8+df.shape[1]*0.1,3.5),dpi = 100)
ax = fig.add_subplot(111)
# create the heatmap
# heatmap with border
p1 = sns.heatmap(df, cmap="Blues", cbar=False, ax=ax, linecolor='grey', linewidth=0.2)
p1.set_title(title)
p1.set_ylabel(ylab)
print(p1.get_xticklabels())
# set xticklabels with colnames
#p1.set_xticklabels(colnames, rotation=90, fontsize = fontsize)
plt.xticks(ticks=range(len(df.columns)), labels=df.columns)
# set title font size
p1.title.set_size(title_fontsize)
# plt.show()
# save the figure
plt.savefig(output_png, bbox_inches='tight', dpi=300)
# used. Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
def Gene_id_converter(id_list, API_url):
id_list_new = []
for id in id_list:
if id.startswith("NCBIGene:"):
id = id.replace("NCBIGene:", "NCBIGene")
id_list_new.append(id)
query_json = {
"message": {
"query_graph": {
"nodes": {
"n0": {
"categories": ["Gene"],
"ids": id_list_new
},
"n1": {
"categories": [
"string"
],
"ids": [
"string"
]
}
},
"edges": {
"e1": {
"predicates": [
"string"
]
}
}
}
}
}
response = requests.post(API_url, json=query_json)
result = {}
if response.status_code == 200:
result = response.json()
return(result)
# used. Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
[docs]
def Neiborhood_finder(input_node, node2_categories, APInames, metaKG, API_predicates, input_node_category = []):
"""
This function is used to find the neighborhood of a given input node with intermediate categories.
--------------
Parameters:
input_node (str): The input node, can be a gene name, protein name, or any other identifier.
node2_categories (list): A list of intermediate categories to be used in the neighborhood finding process.
APInames (dict): A dictionary containing the names of the APIs to be used.
metaKG (DataFrame): The metadata knowledge graph containing information about the APIs and their predicates.
API_predicates (dict): A dictionary containing the predicates for each API.
input_node_category (list): Optional. A list of categories for the input node. If empty, it will be derived from the input node's types.
--------------
Returns:
input_node_id (str): The curie id of the input node.
result (dict): The result of the query for the input node.
result_parsed (DataFrame): The parsed results for the input node.
result_ranked_by_primary_infores (DataFrame): The ranked results based on primary infores.
--------------
Example:
>>> input_node_id, result, result_parsed, result_ranked_by_primary_infores1 = Neiborhood_finder('Ovarian cancer',
node2_categories = ['biolink:SmallMolecule', 'biolink:Drug', 'biolink:ChemicalEntity'],
APInames = APInames,
metaKG = metaKG,
API_predicates = API_predicates)
--------------
"""
from TCT import name_resolver
from TCT import translator_metakg
from TCT import translator_kpinfo
from TCT import translator_query
# Step 1: Resolve the input node to get its curie id and categories
input_node_info = name_resolver.lookup(input_node)
input_node_id = input_node_info.curie
print(input_node_id)
if len(input_node_category) == 0:
input_node_category = input_node_info.types
else:
input_node_category = list(set(input_node_category).intersection(set(input_node_info.types)))
if len(input_node_category) == 0:
input_node_category = input_node_info.types
# Step 2: Select predicates and APIs based on the intermediate categories
sele_predicates, sele_APIs, API_URLs = sele_predicates_API(input_node_category,
node2_categories,
metaKG, APInames)
# Step 3: Format the query JSON for the input node
query_json = format_query_json([input_node_id], [],
[input_node_category],
node2_categories,
sele_predicates)
# Step 4: Query the APIs in parallel
result = translator_query.parallel_api_query(query_json=query_json,
select_APIs= sele_APIs,
APInames=APInames,
API_predicates=API_predicates,
max_workers=len(sele_APIs))
result_parsed = parse_KG(result)
# Step 7: Ranking the results. This ranking method is based on the number of unique
# primary infores. It can only be used to rank the results with one defined node.
result_ranked_by_primary_infores1 = rank_by_primary_infores(result_parsed, input_node_id) # input_node1_id is the curie id of the
return input_node_id, result, result_parsed, result_ranked_by_primary_infores1
[docs]
def Path_finder(input_node1, input_node2, intermediate_categories, APInames, metaKG, API_predicates, input_node1_category = [], input_node2_category = []):
"""
This function is used to find paths between two input nodes with intermediate categories.
--------------
Parameters:
input_node1 (str): The first input node, can be a gene name, protein name, or any other identifier.
input_node2 (str): The second input node, can be a gene name, protein name, or any other identifier.
intermediate_categories (list): A list of intermediate categories to be used in the path finding process.
--------------
Returns:
paths (DataFrame): A DataFrame containing the paths found between the two input nodes.
input_node1_id (str): The curie id of the first input node.
input_node2_id (str): The curie id of the second input node.
result1 (dict): The result of the query for the first input node.
result2 (dict): The result of the query for the second input node.
result_parsed1 (DataFrame): The parsed results for the first input node.
result_parsed2 (DataFrame): The parsed results for the second input node.
result_ranked_by_primary_infores1 (DataFrame): The ranked results for the first input node based on primary infores.
result_ranked_by_primary_infores2 (DataFrame): The ranked results for the second
--------------
Example:
>>> paths, input_node1_id, input_node2_id, result1, result2, result_parsed1, result_parsed2, result_ranked_by_primary_infores1, result_ranked_by_primary_infores2 = Path_finder('WNT7B', 'NPM1', ['biolink:Gene', 'biolink:Protein'])
--------------
"""
from TCT import name_resolver
from TCT import translator_metakg
from TCT import translator_kpinfo
from TCT import translator_query
input_node1_info = name_resolver.lookup(input_node1)
input_node1_id = input_node1_info.curie
print(input_node1_id)
input_node1_list = [input_node1_id]
if len(input_node1_category) == 0:
input_node1_category = input_node1_info.types
else:
input_node1_category = list(set(input_node1_category).intersection(set(input_node1_info.types)))
if len(input_node1_category) == 0:
input_node1_category = input_node1_info.types
input_node2_info = name_resolver.lookup(input_node2)
input_node2_id = input_node2_info.curie
print(input_node2_id)
input_node2_list = [input_node2_id]
if len(input_node2_category) == 0:
input_node2_category = input_node2_info.types
else:
input_node2_category = list(set(input_node2_category).intersection(set(input_node2_info.types)))
if len(input_node2_category) == 0:
input_node2_category = input_node2_info.types
# Step 5: Select predicates and APIs based on the intermediate categories
sele_predicates1, sele_APIs1, API_URLs1 = sele_predicates_API(input_node1_category,
intermediate_categories,
metaKG, APInames)
sele_predicates2, sele_APIs2, API_URLs2 = sele_predicates_API(input_node2_category,
intermediate_categories,
metaKG, APInames)
query_json1 = format_query_json(input_node1_list, # a list of identifiers for input node1
[], # id list for the intermediate node, it can be empty list if only want to query node1
input_node1_category, # a list of categories of input node1
intermediate_categories, # a list of categories of the intermediate node
sele_predicates1) # a list of predicates
query_json2 = format_query_json(input_node2_list, # a list of identifiers for input node2
[], # id list for the intermediate node, it can be empty list if only want to query node2
input_node2_category, # a list of categories of input node2
intermediate_categories, # a list of categories of the intermediate node
sele_predicates2) # a list of predicates
result1 = translator_query.parallel_api_query(query_json=query_json1,
select_APIs = sele_APIs1,
APInames=APInames,
API_predicates=API_predicates,
max_workers=len(sele_APIs1))
result2 = translator_query.parallel_api_query(query_json=query_json2,
select_APIs = sele_APIs2,
APInames=APInames,
API_predicates=API_predicates,
max_workers=len(sele_APIs2))
result_parsed1 = parse_KG(result1)
# Step 7: Ranking the results. This ranking method is based on the number of unique
# primary infores. It can only be used to rank the results with one defined node.
result_ranked_by_primary_infores1 = rank_by_primary_infores(result_parsed1, input_node1_id) # input_node1_id is the curie id of the
result_parsed2 = parse_KG(result2)
result_ranked_by_primary_infores2 = rank_by_primary_infores(result_parsed2, input_node2_id) # input_node2_id is the curie id of the
possible_paths = len(set(result_ranked_by_primary_infores1['output_node']).intersection(set(result_ranked_by_primary_infores2['output_node'])))
print("Number of possible paths: ", possible_paths)
paths = merge_ranking_by_number_of_infores(result_ranked_by_primary_infores1, result_ranked_by_primary_infores2,
top_n = 30,
fontsize=10,
title_fontsize=12,)
return paths, input_node1_id, input_node2_id, result1, result2, result_parsed1, result_parsed2, result_ranked_by_primary_infores1, result_ranked_by_primary_infores2
# used. Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
# used. Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
[docs]
def parse_KG(result):
'''
subject_object
subject
object
predicate
primary_knowledge_sources
aggregator_knowledge_sources
subject_predicate_object_primary_knowledge_sources_aggregator_knowledge_sources
'''
# edited Dec 5, 2023
result_parsed = {}
for i in result:
subject_object = result[i]['subject'] + "_" + result[i]['object']
object_subject = result[i]['object'] + "_" + result[i]['subject']
#result_parsed["predicate"].append(result[i]['predicate'])
#result_parsed["sources"].append(result[i]['sources'])
#result_parsed["subject"].append(result[i]['subject'])
#result_parsed["object"].append(result[i]['object'])
if subject_object not in result_parsed:
result_parsed[subject_object] = {}
result_parsed[subject_object]['predicate'] = [result[i]['predicate']]
result_parsed[subject_object]['subject'] = result[i]['subject']
result_parsed[subject_object]['object'] = result[i]['object']
for j in result[i]['sources']:
if j['resource_role'] == 'primary_knowledge_source':
result_parsed[subject_object]['primary_knowledge_source'] = [j['resource_id']]
evidence = result[i]['subject'] + "_" + result[i]['predicate'] + "_" + result[i]['object'] + "_" + j['resource_id']
if j['resource_role'] == 'aggregator_knowledge_source':
result_parsed[subject_object]['aggregator_knowledge_source'] = [j['resource_id']]
evidence = evidence + "_" + j['resource_id']
result_parsed[subject_object]['evidence'] = [evidence]
else: # subject_object in result_parsed:
result_parsed[subject_object]['predicate'].append(result[i]['predicate'])
for j in result[i]['sources']:
if j['resource_role'] == 'primary_knowledge_source':
result_parsed[subject_object]['primary_knowledge_source'].append(j['resource_id'])
evidence = result[i]['subject'] + "_" + result[i]['predicate'] + "_" + result[i]['object'] + "_" + j['resource_id']
if j['resource_role'] == 'aggregator_knowledge_source':
if 'aggregator_knowledge_source' not in result_parsed[subject_object]:
result_parsed[subject_object]['aggregator_knowledge_source'] = [j['resource_id']]
else:
result_parsed[subject_object]['aggregator_knowledge_source'].append(j['resource_id'])
evidence = evidence + "_" + j['resource_id']
result_parsed[subject_object]['evidence'].append(evidence)
return(result_parsed)
# parse network results. Dec 10, 2023
def parse_network_result(result, input_node1_list):
dic_nodes = {}
for i in result:
subject = result[i]['subject']
object = result[i]['object']
predicate = result[i]['predicate']
sources = result[i]['sources']
if subject == object:
continue
if subject in dic_nodes:
dic_nodes[subject].append(object)
else:
dic_nodes[subject] = [object]
if object in dic_nodes:
dic_nodes[object].append(subject)
else:
dic_nodes[object] = [subject]
dic_remain_nodes = {}
dic_with_input_nodes = {}
for i in dic_nodes:
if i in input_node1_list:
dic_remain_nodes[i] = dic_nodes[i]
else:
continue
for i in dic_remain_nodes:
for j in dic_nodes[i]:
if j in dic_with_input_nodes:
dic_with_input_nodes[j].append(i)
else:
dic_with_input_nodes[j] = [i]
for i in dic_with_input_nodes:
dic_with_input_nodes[i] = list(set(dic_with_input_nodes[i]))
for i in dic_with_input_nodes:
if len(set(dic_with_input_nodes[i])) > 1: #
#print(i, set(dic_with_input_nodes[i]))
if i not in dic_remain_nodes:
dic_remain_nodes[i] = dic_with_input_nodes[i]
else:
continue
dic_remain_nodes_final = {}
for i in dic_remain_nodes:
dic_remain_nodes_final[i] = set(dic_remain_nodes[i]).intersection(set(dic_remain_nodes.keys()))
subject_nodes = []
object_nodes = []
for i in dic_remain_nodes_final:
for j in dic_remain_nodes_final[i]:
subject_nodes.append(i)
object_nodes.append(j)
result_df = pd.DataFrame({'Subject':subject_nodes, 'Object':object_nodes})
#result_df.to_csv('result_df.csv', index=False)
return result_df
# parse results to a dictionary. Dec 5, 2023
# used. Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
[docs]
def rank_by_primary_infores(result_parsed, input_node):
''' Editd Dec 5, 2023'''
rank_df = pd.DataFrame()
output_nodes = []
Num_of_primary_infores = []
type_of_nodes = []
unique_predicates = []
for i in result_parsed:
curr_predict = result_parsed[i]['predicate']
subject = result_parsed[i]['subject']
object = result_parsed[i]['object']
if subject == input_node:
output_nodes.append(object)
type_of_nodes.append('object')
Num_of_primary_infores.append(len(set(result_parsed[i]['primary_knowledge_source'])))
unique_predicates.append(curr_predict)
elif object == input_node:
output_nodes.append(subject)
type_of_nodes.append('subject')
unique_predicates.append(curr_predict)
Num_of_primary_infores.append(len(set(result_parsed[i]['primary_knowledge_source'])))
colnames = output_nodes
names = colnames
dic_id_map = ID_convert_to_preferred_name_nodeNormalizer(names)
new_colnames = []
for item in names:
if item in dic_id_map:
new_colnames.append(dic_id_map[item])
else:
new_colnames.append(item)
rank_df['output_node'] = output_nodes
rank_df['Name'] = new_colnames
rank_df['Num_of_primary_infores'] = Num_of_primary_infores
rank_df['type_of_nodes'] = type_of_nodes
rank_df['unique_predicates'] = unique_predicates
rank_df_ranked = rank_df.sort_values(by=['Num_of_primary_infores'], ascending=False)
return(rank_df_ranked)
# used. Dec 5, 2023 (Example_query_rank_the_path.ipynb)
def merge_by_ranking_index(result_ranked_by_primary_infores,
result_ranked_by_primary_infores2,
top_n = 20,
title_fontsize = 12,
fontsize = 12,
):
dic_rank1 = {}
for i in range(0, result_ranked_by_primary_infores.shape[0]):
dic_rank1[result_ranked_by_primary_infores['output_node'][i]] = 1 - i / result_ranked_by_primary_infores.shape[0]
dic_rank2 = {}
for i in range(0, result_ranked_by_primary_infores2.shape[0]):
dic_rank2[result_ranked_by_primary_infores2['output_node'][i]] = 1 - i / result_ranked_by_primary_infores2.shape[0]
merged_nodes = set(dic_rank1.keys()).intersection(set(dic_rank2.keys()))
dic_merged_rank = {}
for node in merged_nodes:
dic_merged_rank[node] = dic_rank1[node] * dic_rank2[node]
result_ranked = pd.DataFrame.from_dict(dic_merged_rank, orient='index', columns=['score'])
result_ranked = result_ranked.sort_values(by=['score'], ascending=False)
result_ranked = result_ranked.reset_index()
result_ranked.columns = ['output_node', 'score']
result_xy_sorted = result_ranked
result_xy_sorted.index = result_ranked['output_node']
#convert = False
colnames = result_xy_sorted.index.to_list()
names = colnames
dic_id_map = ID_convert_to_preferred_name_nodeNormalizer(names)
new_colnames = []
for item in names:
if item in dic_id_map:
new_colnames.append(dic_id_map[item])
else:
new_colnames.append(item)
result_xy_sorted.index = new_colnames
result_xy_sorted = result_xy_sorted.sort_values(by=['score'], ascending=False)
sns.set(style="whitegrid")
plt.figure(figsize=(5,5), dpi = 300)
ax = sns.barplot(x=result_xy_sorted.iloc[0:top_n].index, y=result_xy_sorted.iloc[0:top_n]['score'], color='grey')
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha="center", fontsize=fontsize)
ax.set_ylabel("Ranking score")
ax.title.set_size(title_fontsize)
plt.tight_layout()
#plt.show()
return result_xy_sorted
def merge_ranking_by_number_of_infores(result_ranked_by_primary_infores,
result_ranked_by_primary_infores1,
top_n = 30,
fontsize = 12,
title_fontsize = 12,
output_png = "NE_heatmap.png"
):
overlapped = (set(result_ranked_by_primary_infores1['output_node']).intersection(set(result_ranked_by_primary_infores['output_node'])))
x = result_ranked_by_primary_infores.loc[result_ranked_by_primary_infores['output_node'].isin(overlapped)]
y = result_ranked_by_primary_infores1.loc[result_ranked_by_primary_infores1['output_node'].isin(overlapped)]
dic_x = {}
for i in range(x.shape[0]):
dic_x[x.iloc[i]['output_node']] = x.iloc[i]['Num_of_primary_infores']/np.max(x['Num_of_primary_infores'])
dic_y = {}
for i in range(y.shape[0]):
dic_y[y.iloc[i]['output_node']] = y.iloc[i]['Num_of_primary_infores']/np.max(y['Num_of_primary_infores'])
predicts_list1 = []
predicts_list2 = []
dic_xy = {}
for i in overlapped:
#print(result_ranked_by_primary_infores[result_ranked_by_primary_infores['output_node'] == i]['unique_predicates'])
dic_xy[i] = dic_x[i] * dic_y[i]
predicts_list1.append('; '.join(list(set(result_ranked_by_primary_infores[result_ranked_by_primary_infores['output_node'] == i]['unique_predicates'].values[0]))))
predicts_list2.append('; '.join(list(result_ranked_by_primary_infores1[result_ranked_by_primary_infores1['output_node'] == i]['unique_predicates'].values[0])))
result_xy = pd.DataFrame.from_dict(dic_xy, orient='index', columns=['score'])
result_xy['output_node'] = result_xy.index
# convert the output_node to preferred name
#result_xy["output_node_name"] = new_colnames
result_xy['predictes1'] = predicts_list1
result_xy['predictes2'] = predicts_list2
result_xy_sorted = result_xy.sort_values(by=['score'], ascending=False)
convert = False
colnames = result_xy_sorted.index.to_list()
names = colnames
dic_id_map = ID_convert_to_preferred_name_nodeNormalizer(names)
new_colnames = []
for item in names:
if item in dic_id_map:
new_colnames.append(dic_id_map[item])
else:
new_colnames.append(item)
result_xy_sorted.index = new_colnames
result_xy_sorted['output_node_name'] = new_colnames
x = result_xy_sorted.iloc[0:top_n].index
y = result_xy_sorted.iloc[0:top_n]['score']
plot_path_bar(x,y,fontsize, title_fontsize, output_png=output_png)
return result_xy_sorted
def plot_path_bar(x,
y,
fontsize = 8,
title_fontsize = 10,
output_png="NE_heatmap.png"):
#matplotlib.use('Agg')
title = "Bridging nodes"
fig = plt.figure(figsize=(5,5), dpi = 300)
ax = fig.add_subplot(111)
ax = sns.barplot(x=x, y=y, color='grey')
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha="center", fontsize=fontsize)
ax.set_ylabel("Ranking score")
ax.title.set_size(title_fontsize)
# save the figure
plt.savefig(output_png, bbox_inches='tight', dpi=300)
# Sri-name-resolver Used Dec 5, 2023 (Example_query_one_hop_with_category.ipynb)
def get_curie(name):
response = requests.get("https://name-lookup.transltr.io/lookup", params={
'string': name,
'autocomplete': False
})
if response.status_code == 200:
result = response.json()
if len(result) != 0:
return(result[0]['curie'])
else:
return(name)
else:
return(name)
# annotate gene pairs or a list of genes. Feb 25, 2024
def get_pair_annotation(result, input_node_list):
pairs_found = {}
for i in result.keys():
if result[i]['subject'] in input_node_list and result[i]['object'] in input_node_list and result[i]['subject'] != result[i]['object']:
pairs_found[i] = result[i]
return pairs_found
def parse_pair_annotation(pairs_found, input_node_list):
edge_list = []
names = ID_convert_to_preferred_name_nodeNormalizer(input_node_list)
dic_names = {}
for i in input_node_list:
dic_names[i] = names[i]
for i in pairs_found.keys():
primary_source = ''
for source in pairs_found[i]['sources']:
if source['resource_role'] == 'primary_knowledge_source':
primary_source = source['resource_id']
break
edge_list.append([pairs_found[i]['subject'],dic_names[pairs_found[i]['subject']], pairs_found[i]['predicate'], pairs_found[i]['object'], dic_names[pairs_found[i]['object']], primary_source ])
return edge_list
#used
def query_chatGPT4(customized_input):
message=[{"role": "user",
"content": customized_input}]
response = openai.ChatCompletion.create(
#model="gpt-3.5-turbo",
model="gpt-4",
max_tokens=1000,
temperature=1.2,
messages = message)
#print(len(response.choices[0].message.content.split(" ")))
return(response.choices[0].message.content)
def query_chatGPT(customized_input):
message=[{"role": "user",
"content": customized_input}]
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
#model="gpt-4",
max_tokens=1000,
temperature=1.2,
messages = message)
#print(len(response.choices[0].message.content.split(" ")))
return(response.choices[0].message.content)
# to be removed
def query_KP_all(subject_ids, object_ids, subject_categories, object_categories, predicates, API_list,metaKG, APInames):
#APInames = API_list
if len(API_list) == 0:
API_list = select_API(subject_categories,object_categories,metaKG)
else:
API_list = list(APInames.keys())
result_dict = {}
result_concept = {}
# Query individual KP
# Needs parallel query
for API_sele in API_list:
print(API_sele)
if len(predicates)==0:
predicates_used = select_predicates_inKP(subject_categories,object_categories,API_sele,metaKG)
else:
predicates_used = predicates
query_json = format_query_json(subject_ids, object_ids, subject_categories, object_categories, predicates_used)
print(query_json)
try:
kg_output = query_KP(APInames[API_sele],query_json)
except:
print("Connection Error")
kg_output = None
if kg_output is not None:
# if kg_output is a dictionary
if type(kg_output) == dict and 'nodes' in kg_output.keys():
if len(kg_output['nodes']) >0:
print("Found: " + str(len(kg_output['edges'].keys())) + " nodes in " + API_sele)
print(predicates_used)
result_concept[API_sele] = predicates_used
result_dict[API_sele] = kg_output
return(result_dict, result_concept)
# to be removed
def parse_result_old( API_keys_sele, API_keys_Not_include, predicates_forAnalysis,result_dic):
Temp_APIkey = []
Temp_subject_key = []
Temp_object_key = []
Temp_predicate_key = []
Temp_infores_key = []
API_keys_forAnalysis = []
ALL_APIs_in_result = list(result_dic.keys())
print(ALL_APIs_in_result)
if len(API_keys_sele) == 0:
API_keys_forAnalysis = ALL_APIs_in_result
else:
API_keys_forAnalysis = list(set(ALL_APIs_in_result).intersection(set(API_keys_sele)))
if len(API_keys_Not_include) != 0:
API_keys_forAnalysis = list(set(API_keys_forAnalysis) - set(API_keys_Not_include))
print(API_keys_forAnalysis)
for API_key in API_keys_forAnalysis:
cur_API_outputKeys = list(result_dic[API_key]['edges'].keys())
for i in range(0, len(cur_API_outputKeys)):
curr_key = i
curr_graph = (result_dic[API_key]['edges'][cur_API_outputKeys[curr_key]])
predicate = (curr_graph['predicate'])
if predicate != "biolink:subclass_of":
infores = (curr_graph['sources'][0]['resource_id'])
subject = (curr_graph['subject'])
if subject.startswith("CL:"):
subject = "CL" + subject.split(":")[1]
object = (curr_graph['object'])
if object.startswith("CL:"):
object = "CL" + object.split(":")[1]
#exclude subclass_of
Temp_APIkey.append(API_key)
Temp_subject_key.append(subject)
Temp_object_key.append(object)
Temp_predicate_key.append(predicate)
Temp_infores_key.append(infores)
#Temp_APIkey.append(API_key)
#Temp_subject_key.append(subject)
#Temp_object_key.append(object)
#Temp_predicate_key.append(predicate)
#Temp_infores_key.append(infores)
Temp_result_df = pd.DataFrame({'API': Temp_APIkey,
'Subject': Temp_subject_key,
"Object":Temp_object_key,
"Predicate":Temp_predicate_key,
"Infores":Temp_infores_key})
Temp_result_df.drop_duplicates(inplace=True)
Temp_result_df = Temp_result_df.loc[Temp_result_df['API'].isin(API_keys_forAnalysis)]
if len(predicates_forAnalysis) != 0:
Temp_result_df = Temp_result_df.loc[Temp_result_df['Predicate'].isin(predicates_forAnalysis)]
return(Temp_result_df)
# to be removed
def ranking_result_by_predicates_object(Temp_result_df):
object_val_list = Temp_result_df['Object'].value_counts().index.tolist()
object_val_value = Temp_result_df['Object'].value_counts().values.tolist()
dic_rank = {}
for i in range(0,len(object_val_list)):
dic_rank[object_val_list[i]] = object_val_value[i]
sorted_dic = sorted(dic_rank.items(), key=lambda x: x[1], reverse=True)
return(sorted_dic)
# to be removed
def ranking_result_by_predicates_subject(Temp_result_df):
subject_val_list = Temp_result_df['Subject'].value_counts().index.tolist()
subject_val_list = Temp_result_df['Subject'].value_counts().values.tolist()
dic_rank = {}
for i in range(0,len(subject_val_list)):
dic_rank[subject_val_list[i]] = subject_val_list[i]
sorted_dic = sorted(dic_rank.items(), key=lambda x: x[1], reverse=True)
return(sorted_dic)
# to be removed
def get_ranking_by_predicates(sorted_dic, Temp_result_df, Top):
#item_ranking = []
dic_ranking = {}
if Top > len(sorted_dic):
Top = len(sorted_dic)
for i in range(1,Top):
#item_ranking.append(sorted_dic[i][0])
sele_result = sorted_dic[i][0]
dic_ranking[sorted_dic[i][0]] = list(set(list(pd.concat([Temp_result_df.loc[Temp_result_df['Object'].isin([sele_result])], Temp_result_df.loc[Temp_result_df['Subject'].isin([sele_result])]], axis=0)['Predicate'])))
return(dic_ranking)
# to be removed
def get_ranking_by_infores(sorted_dic, Temp_result_df, Top):
#item_ranking = []
dic_ranking = {}
if Top > len(sorted_dic):
Top = len(sorted_dic)
for i in range(1,Top):
#item_ranking.append(sorted_dic[i][0])
sele_result = sorted_dic[i][0]
dic_ranking[sorted_dic[i][0]] = list(set(list(pd.concat([Temp_result_df.loc[Temp_result_df['Object'].isin([sele_result])], Temp_result_df.loc[Temp_result_df['Subject'].isin([sele_result])]], axis=0)['Infores'])))
return(dic_ranking)
# to be removed
def get_ranking_by_kp(sorted_dic, Temp_result_df, Top):
#item_ranking = []
dic_ranking = {}
if Top > len(sorted_dic):
Top = len(sorted_dic)
for i in range(1,Top):
#item_ranking.append(sorted_dic[i][0])
sele_result = sorted_dic[i][0]
dic_ranking[sorted_dic[i][0]] = list(set(list(pd.concat([Temp_result_df.loc[Temp_result_df['Object'].isin([sele_result])], Temp_result_df.loc[Temp_result_df['Subject'].isin([sele_result])]], axis=0)['API'])))
return(dic_ranking)
# to be revised
def connecting_two_dots_two_hops(sorted_dic1, sorted_dic):
intermediate = []
normalized_rank = []
rank1 = 0
for i in sorted_dic1:
gene1 = i[0]
rank1 = rank1 + 1
rank2 = 0
for j in sorted_dic:
gene2 = j[0]
rank2 = rank2 + 1
if gene1 == gene2:
normlized_rank1 = rank1/(len(sorted_dic1) -1)
normlized_rank2 = rank2/(len(sorted_dic) -1)
new_order = normlized_rank1 * normlized_rank2
intermediate.append(gene2)
normalized_rank.append(new_order)
res_df = pd.DataFrame({"node":intermediate, "normalized_rank":normalized_rank})
res_df.sort_values(by=['normalized_rank'], inplace=True, ascending=True)
res_df.reset_index(inplace=True, drop=True)
return(res_df)
def select_result_to_analysis(sele_genes,Temp_result_df, Temp_result_df1 ):
print(sele_genes)
for_plot = pd.concat([Temp_result_df1.loc[Temp_result_df1['Object'].isin(sele_genes)],
Temp_result_df.loc[Temp_result_df['Object'].isin(sele_genes)]], axis=0)
return(for_plot)
# need revision
def find_path_by_two_ends(subject1_ids,
subject1_categories,
predicates1,
object_categories,
subject2_ids,
subject2_categories,
predicates2,
API_list1,
API_list2,
API1_keys_forAnalysis,
API1_keys_NotforAnalysis,
API2_keys_forAnalysis,
API2_keys_NotforAnalysis,
metaKG,
APInames
):
result_dic_node1, result_concept_node1 = query_KP_all(subject1_ids, [], subject1_categories, object_categories, predicates1, API_list1, metaKG, APInames)
result_dic_node2, result_concept_node2 = query_KP_all(subject2_ids, [], subject2_categories, object_categories, predicates2, API_list2, metaKG, APInames)
Temp_result_df1 = parse_result(API1_keys_forAnalysis,API1_keys_NotforAnalysis, result_concept_node1, result_dic_node1)
sorted_dic1 = ranking_result_by_predicates_object(Temp_result_df1)
dic_ranking1 = get_ranking_by_infores(sorted_dic1, Temp_result_df1, 20)
Temp_result_df2 = parse_result(API2_keys_forAnalysis,API2_keys_NotforAnalysis, result_concept_node2, result_dic_node2)
sorted_dic2 = ranking_result_by_predicates_object(Temp_result_df2)
dic_ranking2 = get_ranking_by_infores(sorted_dic2, Temp_result_df2, 20)
connection_nodes_df = connecting_two_dots_two_hops(sorted_dic1, sorted_dic2)
# bind all results in to a dictionary
result = {"connection_nodes_df":connection_nodes_df,
"dic_ranking1":dic_ranking1,
"dic_ranking2":dic_ranking2,
"Temp_result_df1":Temp_result_df1,
"Temp_result_df2":Temp_result_df2,
"result_dic_node1":result_dic_node1,
"result_dic_node2":result_dic_node2,
"result_concept_node1":result_concept_node1,
"result_concept_node2":result_concept_node2}
#return(connection_nodes_df, dic_ranking1, dic_ranking2, Temp_result_df1, Temp_result_df2,result_dic_node1, result_dic_node2, result_concept_node1, result_concept_node2)
return(result)
def select_result_to_analysis(sele_genes,Temp_result_df1, Temp_result_df2 ):
print("selected_path: "+ ';'.join(sele_genes))
for_plot = pd.concat([ Temp_result_df1.loc[Temp_result_df1['Object'].isin(sele_genes)],
Temp_result_df2.loc[Temp_result_df2['Object'].isin(sele_genes)]], axis=0)
return(for_plot)
def plot_graph_by_predicates(for_plot):
graph = nx.from_pandas_edgelist(for_plot,
source='Subject',
target='Object',
edge_attr=["Predicate"],
create_using=nx.MultiDiGraph)
graph_style = [{'selector': 'node[id]',
'style': {
'font-family': 'helvetica',
'font-size': '14px',
'text-valign': 'center',
'label': 'data(id)',
}},
{'selector': 'node',
'style': {
'background-color': 'lightblue',
'shape': 'round-rectangle',
'width': '5em',
}},
{'selector': 'edge[Predicate]',
'style': {
'label': 'data(Predicate)',
'font-size': '12px',
}},
{"selector": "edge.directed",
"style": {
"curve-style": "bezier",
"target-arrow-shape": "triangle",
}},
{"selector": "edge",
"style": {
"curve-style": "bezier",
}},
]
undirected = ipycytoscape.CytoscapeWidget()
undirected.graph.add_graph_from_networkx(graph)
undirected.set_layout(title='Path', nodeSpacing=80, edgeLengthVal=50, )
undirected.set_style(graph_style)
display(undirected)
return()
def plot_graph_by_infores(for_plot):
graph = nx.from_pandas_edgelist(for_plot,
source='Subject',
target='Object',
edge_attr=["Infores"],
create_using=nx.MultiDiGraph)
graph_style = [{'selector': 'node[id]',
'style': {
'font-family': 'helvetica',
'font-size': '14px',
'text-valign': 'center',
'label': 'data(id)',
}},
{'selector': 'node',
'style': {
'background-color': 'lightblue',
'shape': 'round-rectangle',
'width': '5em',
}},
{'selector': 'edge[Infores]',
'style': {
'label': 'data(Infores)',
'font-size': '12px',
}},
{"selector": "edge.directed",
"style": {
"curve-style": "bezier",
"target-arrow-shape": "triangle",
}},
{"selector": "edge",
"style": {
"curve-style": "bezier",
}},
]
undirected = ipycytoscape.CytoscapeWidget()
undirected.graph.add_graph_from_networkx(graph)
undirected.set_layout(title='Path', nodeSpacing=80, edgeLengthVal=50, )
undirected.set_style(graph_style)
display(undirected)
return(0)
def plot_graph_by_API(for_plot):
graph = nx.from_pandas_edgelist(for_plot,
source='Subject',
target='Object',
edge_attr=["API"],
create_using=nx.MultiDiGraph)
graph_style = [{'selector': 'node[id]',
'style': {
'font-family': 'helvetica',
'font-size': '14px',
'text-valign': 'center',
'label': 'data(id)',
}},
{'selector': 'node',
'style': {
'background-color': 'lightblue',
'shape': 'round-rectangle',
'width': '5em',
}},
{'selector': 'edge[API]',
'style': {
'label': 'data(API)',
'font-size': '12px',
}},
{"selector": "edge.directed",
"style": {
"curve-style": "bezier",
"target-arrow-shape": "triangle",
}},
{"selector": "edge",
"style": {
"curve-style": "bezier",
}},
]
undirected = ipycytoscape.CytoscapeWidget()
undirected.graph.add_graph_from_networkx(graph)
undirected.set_layout(title='Path', nodeSpacing=80, edgeLengthVal=50, )
undirected.set_style(graph_style)
display(undirected)
return(0)
def load_json_template():
query_json_temp = {
"message": {
"query_graph": {
"nodes": {
"n0": {
"ids":[],
"categories":["biolink:category"]
},
"n1": {
"categories":["biolink:category"]
}
},
"edges": {
"e1": {
"subject": "n0",
"object": "n1",
"predicates": ["biolink:predicates"]
}
}
}
}
}
return(query_json_temp)
def extract_json(txt):
import json
lft = txt.find('{')
while lft != -1:
rgt = txt.find('}', lft+1)
while rgt != -1:
substr = txt[lft:rgt+1]
try:
jsn = json.loads(substr)
return jsn
except Exception as e:
rgt = txt.find('}', rgt+1)
lft = txt.find('{', lft+1)
return None
def TRAPI_json_validation(query_json_cur_clean, ALL_predicates, ALL_categories):
if 'message' not in query_json_cur_clean.keys():
print('message is missing')
else:
if 'query_graph' not in query_json_cur_clean['message'].keys():
print('query_graph is missing')
else:
if 'edges' not in query_json_cur_clean['message']['query_graph'].keys():
print('edges is missing')
else:
if 'e1' not in query_json_cur_clean['message']['query_graph']['edges'].keys():
print('e1 is missing')
else:
if 'predicates' not in query_json_cur_clean['message']['query_graph']['edges']['e1'].keys():
print('predicates is missing')
else:
if len(set(query_json_cur_clean['message']['query_graph']['edges']['e1']['predicates']).intersection(set(ALL_predicates))) == 0:
print('predicates is not in the KG')
else:
print("Predicates ok!")
if 'nodes' not in query_json_cur_clean['message']['query_graph'].keys():
print('nodes is missing')
else:
if 'n0' not in query_json_cur_clean['message']['query_graph']['nodes'].keys():
print('n0 is missing')
else:
if 'categories' not in query_json_cur_clean['message']['query_graph']['nodes']['n0'].keys():
print('categories is missing')
else:
if len(set(query_json_cur_clean['message']['query_graph']['nodes']['n0']['categories']).intersection(set(ALL_categories))) == 0:
print('categories is not in the KG')
else:
print("node0 category OK!")
if 'n1' not in query_json_cur_clean['message']['query_graph']['nodes'].keys():
print('n1 is missing')
else:
if 'categories' not in query_json_cur_clean['message']['query_graph']['nodes']['n1'].keys():
print('categories is missing')
else:
if len(set(query_json_cur_clean['message']['query_graph']['nodes']['n1']['categories']).intersection(set(ALL_categories))) == 0:
print('categories is not in the KG')
else:
print("node1 category OK!")
return()
def format_id(query_json_cur_clean):
if 'ids' in query_json_cur_clean['message']['query_graph']['nodes']['n0'].keys():
input_nodes = query_json_cur_clean['message']['query_graph']['nodes']['n0']['ids']
input_node1_id = []
if len(input_nodes) > 0:
for i in input_nodes:
input_node1_id.append(get_curie(i))
print(input_node1_id)
query_json_cur_clean['message']['query_graph']['nodes']['n0']['ids'] = input_node1_id
if 'ids' in query_json_cur_clean['message']['query_graph']['nodes']['n1'].keys():
input_nodes2 = query_json_cur_clean['message']['query_graph']['nodes']['n1']['ids']
input_node2_id = []
if len(input_nodes2) > 0:
for i in input_nodes2:
input_node2_id.append(get_curie(i))
print(input_node2_id)
query_json_cur_clean['message']['query_graph']['nodes']['n1']['ids'] = input_node2_id
return(query_json_cur_clean)
def query_chatGPT(customized_input, model="gpt-3.5-turbo"):
message = [{"role": "user", "content": customized_input}]
response = openai.chat.completions.create(
model=model,
max_tokens=1000,
temperature=0.3,
messages=message,
)
# print(len(response.choices[0].message.content.split(" ")))
return response.choices[0].message.content
def query_chatGPT4(customized_input):
return query_chatGPT(customized_input, "gpt-4")
def ask_chatGPT(prompt_text):
response = query_chatGPT(prompt_text)
return response
def ask_chatGPT4(prompt_text):
response = query_chatGPT4(prompt_text)
return response
def find_similar_predicates(query_json_cur_clean, ALL_predicates):
current_predicates = query_json_cur_clean['message']['query_graph']['edges']['e1']['predicates']
output = ask_chatGPT4("The predicates in the KG are: " + ','.join(ALL_predicates) + ". The predicates in the current query are: " + ','.join(current_predicates) + ". What predicates are similar to the predicates in the current query?")
return(output)
def find_similar_category(query_json_cur_clean, ALL_categories):
current_predicates1 = query_json_cur_clean['message']['query_graph']['nodes']['n0']['categories']
current_predicates2 = query_json_cur_clean['message']['query_graph']['nodes']['n1']['categories']
output = ask_chatGPT4("The categories in the KG are: " + ','.join(ALL_categories) + ". The category in the current query are: " + ','.join(current_predicates1 + current_predicates2) + ". What categories are similar to the categories in the current query?")
return(output)
def visulize_path(input_node1_id, intermediate_node, input_node3_id, result, result2):
forplot_subject = []
forplot_object = []
forplot_predicate = []
forplot_Infores = []
for k in result.keys():
if (result[k]['object'] == intermediate_node and result[k]['subject'] == input_node1_id) or (result[k]['subject'] == intermediate_node and result[k]['object'] == input_node1_id) :
forplot_subject.append(result[k]['subject'])
forplot_object.append(result[k]['object'])
#forplot_predicate.append(result[k]['predicate'].split(':')[1])
cur_sources_list = []
sources = result[k]['sources']
for s in sources:
cur_source = s['resource_id']
cur_sources_list.append(cur_source)
forplot_Infores.append(cur_sources_list)
forplot_predicate.append(result[k]['predicate'].split(':')[1] + "::" + cur_sources_list[0])
for k in result2.keys():
if (result2[k]['object'] == intermediate_node and result2[k]['subject'] ==input_node3_id ) or (result2[k]['subject'] == intermediate_node and result2[k]['object'] ==input_node3_id) :
forplot_subject.append(result2[k]['subject'])
forplot_object.append(result2[k]['object'])
#forplot_predicate.append(result2[k]['predicate'].split(':')[1])
cur_sources_list = []
sources = result2[k]['sources']
for s in sources:
cur_source = s['resource_id']
cur_sources_list.append(cur_source)
forplot_Infores.append(cur_sources_list)
forplot_predicate.append(result2[k]['predicate'].split(':')[1] + "::" + cur_sources_list[0])
forplot = pd.DataFrame({"Subject":forplot_subject, "Object":forplot_object, "Predicates":forplot_predicate})
# get preferred name
subject_name = list(forplot["Subject"] )
object_name = list(forplot["Object"])
dic_id_map = ID_convert_to_preferred_name_nodeNormalizer(subject_name+ object_name)
new_subject_name = []
for item in subject_name:
if item in dic_id_map:
new_subject_name.append(dic_id_map[item])
else:
new_subject_name.append(item)
new_object_name = []
for item in object_name:
if item in dic_id_map:
new_object_name.append(dic_id_map[item])
else:
new_object_name.append(item)
forplot['Subject_name'] = new_subject_name
forplot['Object_name'] = new_object_name
forplot = forplot.drop_duplicates()
# add two columns for forplot named check1 = Subject_name + '::' + Predicates + '::' + Object_name, and check2 = Object_name + '::' + Predicates + '::' + Subject_name
# if check1 is equal to check2, then drop one of them
forplot['check1'] = forplot['Subject_name'] + '::' + forplot['Predicates'] + '::' + forplot['Object_name']
forplot['check2'] = forplot['Object_name'] + '::' + forplot['Predicates'] + '::' + forplot['Subject_name']
# check if check1 is equal to check2, if so, drop one of them
to_be_dropped = []
check1_list = list(forplot['check1'].values)
check2_list = list(forplot['check2'].values)
for i in range(0,len(check1_list)-1):
for j in range(i, len(check1_list)):
if check1_list[i] == check2_list[j] and check2_list[i] == check1_list[j]:
to_be_dropped.append(i)
break
#break
to_be_dropped
forplot = forplot.drop(to_be_dropped, axis=0)
# remove the check1 and check2 columns
forplot = forplot.drop(['check1', 'check2'], axis=1)
forplot = forplot.reset_index(drop=True)
graph = nx.from_pandas_edgelist(forplot, source='Subject_name', target='Object_name', edge_attr=[ 'Predicates'], create_using=nx.MultiGraph)
graph_style = [{'selector': 'node[id]',
'style': {
'font-family': 'Arial',
'font-size': '12px',
'text-valign': 'center',
'label': 'data(id)',
}},
{'selector': 'node',
'style': {
'background-color': 'lightblue',
'shape': 'round-rectangle',
'width': '3em',
}},
{'selector': 'edge[Predicates]',
'style': {
'label': 'data(Predicates)',
'font-size': '8px',
}},
{"selector": "edge.directed",
"style": {
"curve-style": "bezier",
"target-arrow-shape": "triangle",
}},
{"selector": "edge",
"style": {
"curve-style": "bezier",
}},
]
pathgraph = ipycytoscape.CytoscapeWidget()
pathgraph.graph.add_graph_from_networkx(graph)
pathgraph.set_layout(title='Path', nodeSpacing=80, edgeLengthVal=50, )
pathgraph.set_style(graph_style)
display(pathgraph)
return(forplot)
def get_similar_category(query_json_cur_clean, KG_category):
similar_category_text = find_similar_category(query_json_cur_clean, KG_category)
words = similar_category_text.split(' ')
similar_category = []
for word in words:
if word.startswith('biolink:') :
potential_similar_category = word.strip(',').strip(')')
if potential_similar_category in KG_category:
similar_category.append(potential_similar_category)
for category in query_json_cur_clean['message']['query_graph']['nodes']['n0']['categories']:
if category in KG_category:
similar_category.append(category)
for category in query_json_cur_clean['message']['query_graph']['nodes']['n1']['categories']:
if category in KG_category:
similar_category.append(category)
similar_category = similar_category + KG_category
return similar_category
def get_similar_predicate(query_json_cur_clean, All_predicates):
similar_predicate_text = find_similar_predicates(query_json_cur_clean, All_predicates)
similar_predicate_text
lines = similar_predicate_text.split('\n')
words = []
for line in lines:
cur_words = line.split(' ')
words = words + cur_words
similar_predicate = []
for word in words:
if word.startswith('biolink:') :
similar_predicate.append(word)
for predicate in query_json_cur_clean['message']['query_graph']['edges']['e1']['predicates']:
similar_predicate.append(predicate)
similar_predicate = list(set(similar_predicate))
similar_predicate
return similar_predicate
# I've added this code so I could test ID_convert_to_preferred_name_nodeNormalizer()
# without changing any of the rest of the code. Please delete it once you understand
# the other changes I've made!
if __name__ == "__main__":
# Check function on some identifier lists.
result1 = ID_convert_to_preferred_name_nodeNormalizer([])
if result1 != {}:
raise RuntimeError("ID_convert_to_preferred_name_nodeNormalizer([]) should equal []")
result2 = ID_convert_to_preferred_name_nodeNormalizer(['UBERON:0000201'])
if result2 != {'UBERON:0000201': 'endothelial blood brain barrier'}:
raise RuntimeError(f"Incorrect result: {result2}")
result3 = ID_convert_to_preferred_name_nodeNormalizer(['MESH:D005183', 'UBERON:0000201'])
if result3 != {
'UBERON:0000201': 'endothelial blood brain barrier',
'MESH:D005183': 'Failure to Thrive'
}:
raise RuntimeError(f"Incorrect result: {result3}")
result4 = ID_convert_to_preferred_name_nodeNormalizer(['CHEBI:45863', 'PUBCHEM.COMPOUND:31703'])
if result4 != {
'CHEBI:45863': 'Paclitaxel',
'PUBCHEM.COMPOUND:31703': 'Doxorubicin'
}:
raise RuntimeError(f"Incorrect result: {result4}")
result5 = ID_convert_to_preferred_name_nodeNormalizer([
'RXCUI:258355',
'PUBCHEM.COMPOUND:4261',
'PUBCHEM.COMPOUND:49850262',
'PUBCHEM.COMPOUND:50992434',
'PUBCHEM.COMPOUND:135539077',
'RXCUI:1430268',
'CHEBI:90942',
'CHEBI:110200',
'CHEBI:63996',
'CHEBI:6716',
'CHEBI:14222',
'PUBCHEM.COMPOUND:25141092',
'PUBCHEM.COMPOUND:36314',
'PUBCHEM.COMPOUND:31703',
'PUBCHEM.COMPOUND:3385',
'PUBCHEM.COMPOUND:126941',
'PUBCHEM.COMPOUND:135410875',
'PUBCHEM.COMPOUND:148124',
'PUBCHEM.COMPOUND:5311497',
'PUBCHEM.COMPOUND:107935',
'PUBCHEM.COMPOUND:387447',
'PUBCHEM.COMPOUND:518605',
'PUBCHEM.COMPOUND:5282165',
'PUBCHEM.COMPOUND:5591',
'PUBCHEM.COMPOUND:1322',
'PUBCHEM.COMPOUND:95170',
'PUBCHEM.COMPOUND:2286',
'PUBCHEM.COMPOUND:1242',
'PUBCHEM.COMPOUND:8461',
'PUBCHEM.COMPOUND:11813',
'PUBCHEM.COMPOUND:1530',
'PUBCHEM.COMPOUND:11831',
'PUBCHEM.COMPOUND:7577',
'PUBCHEM.COMPOUND:73864',
'PUBCHEM.COMPOUND:4521392',
'PUBCHEM.COMPOUND:47289',
'PUBCHEM.COMPOUND:1983',
'PUBCHEM.COMPOUND:12035',
'PUBCHEM.COMPOUND:449459',
'PUBCHEM.COMPOUND:186907',
'PUBCHEM.COMPOUND:449171',
'PUBCHEM.COMPOUND:7290',
'PUBCHEM.COMPOUND:2236',
'PUBCHEM.COMPOUND:5359596',
'PUBCHEM.COMPOUND:6918483',
'PUBCHEM.COMPOUND:23667548',
'PUBCHEM.COMPOUND:2256',
'PUBCHEM.COMPOUND:62306',
'PUBCHEM.COMPOUND:2336',
'PUBCHEM.COMPOUND:6623',
'PUBCHEM.COMPOUND:66166',
'PUBCHEM.COMPOUND:12111',
'PUBCHEM.COMPOUND:6626',
'PUBCHEM.COMPOUND:16682746',
'PUBCHEM.COMPOUND:5360373',
'PUBCHEM.COMPOUND:7961',
'PUBCHEM.COMPOUND:2478',
'PUBCHEM.COMPOUND:264',
'PUBCHEM.COMPOUND:23973',
'PUBCHEM.COMPOUND:2519',
'PUBCHEM.COMPOUND:5280453',
'PUBCHEM.COMPOUND:5943',
'PUBCHEM.COMPOUND:1203',
'PUBCHEM.COMPOUND:135411',
'PUBCHEM.COMPOUND:154413',
'PUBCHEM.COMPOUND:40470',
'PUBCHEM.COMPOUND:2730',
'PUBCHEM.COMPOUND:29131',
'PUBCHEM.COMPOUND:9171',
'PUBCHEM.COMPOUND:5702198',
'PUBCHEM.COMPOUND:2797',
'PUBCHEM.COMPOUND:24463',
'PUBCHEM.COMPOUND:323',
'PUBCHEM.COMPOUND:40585',
'PUBCHEM.COMPOUND:451668',
'PUBCHEM.COMPOUND:40024',
'PUBCHEM.COMPOUND:3017',
'PUBCHEM.COMPOUND:3026',
'PUBCHEM.COMPOUND:5371560',
'PUBCHEM.COMPOUND:969491',
'PUBCHEM.COMPOUND:8343',
'PUBCHEM.COMPOUND:5921',
'PUBCHEM.COMPOUND:448537',
'PUBCHEM.COMPOUND:6124',
'PUBCHEM.COMPOUND:8346',
'PUBCHEM.COMPOUND:5757',
'PUBCHEM.COMPOUND:702',
'PUBCHEM.COMPOUND:5991',
'PUBCHEM.COMPOUND:11',
'PUBCHEM.COMPOUND:3346',
'PUBCHEM.COMPOUND:3397',
'PUBCHEM.COMPOUND:135398658',
'PUBCHEM.COMPOUND:14101198',
'PUBCHEM.COMPOUND:104741',
'PUBCHEM.COMPOUND:8029',
'PUBCHEM.COMPOUND:60750',
'PUBCHEM.COMPOUND:5280961',
'PUBCHEM.COMPOUND:9898639',
'PUBCHEM.COMPOUND:637566',
'PUBCHEM.COMPOUND:3474',
'PUBCHEM.COMPOUND:23985',
'PUBCHEM.COMPOUND:3616',
'PUBCHEM.COMPOUND:42890',
'PUBCHEM.COMPOUND:3715',
'PUBCHEM.COMPOUND:6912226',
'PUBCHEM.COMPOUND:3779',
'PUBCHEM.COMPOUND:9812710',
'PUBCHEM.COMPOUND:46907787',
'PUBCHEM.COMPOUND:25195294',
'PUBCHEM.COMPOUND:896',
'PUBCHEM.COMPOUND:10836',
'PUBCHEM.COMPOUND:4098',
'PUBCHEM.COMPOUND:13709',
'PUBCHEM.COMPOUND:1349907',
'PUBCHEM.COMPOUND:1674',
'PUBCHEM.COMPOUND:4156',
'PUBCHEM.COMPOUND:7456',
'PUBCHEM.COMPOUND:6010',
'PUBCHEM.COMPOUND:8575',
'PUBCHEM.COMPOUND:4449',
'PUBCHEM.COMPOUND:4122',
'PUBCHEM.COMPOUND:442530',
'PUBCHEM.COMPOUND:9887053',
'PUBCHEM.COMPOUND:991',
'PUBCHEM.COMPOUND:854',
'PUBCHEM.COMPOUND:74483',
'PUBCHEM.COMPOUND:9554',
'PUBCHEM.COMPOUND:5794',
'PUBCHEM.COMPOUND:5694',
'PUBCHEM.COMPOUND:15032',
'PUBCHEM.COMPOUND:657298',
'PUBCHEM.COMPOUND:14942',
'PUBCHEM.COMPOUND:5280343',
'PUBCHEM.COMPOUND:5035',
'PUBCHEM.COMPOUND:6758',
'PUBCHEM.COMPOUND:5186',
'PUBCHEM.COMPOUND:1091',
'PUBCHEM.COMPOUND:133538',
'PUBCHEM.COMPOUND:7305',
'PUBCHEM.COMPOUND:5323',
'PUBCHEM.COMPOUND:5329102',
'PUBCHEM.COMPOUND:5284461',
'PUBCHEM.COMPOUND:24857286',
'PUBCHEM.COMPOUND:6410',
'PUBCHEM.COMPOUND:5995',
'PUBCHEM.COMPOUND:6618',
'PUBCHEM.COMPOUND:15625',
'PUBCHEM.COMPOUND:27924',
'PUBCHEM.COMPOUND:2723949',
'PUBCHEM.COMPOUND:26042',
'PUBCHEM.COMPOUND:60700',
'PUBCHEM.COMPOUND:444795',
'PUBCHEM.COMPOUND:6575',
'PUBCHEM.COMPOUND:5564',
'PUBCHEM.COMPOUND:11089',
'PUBCHEM.COMPOUND:65411',
'PUBCHEM.COMPOUND:23964',
'PUBCHEM.COMPOUND:3121',
'PUBCHEM.COMPOUND:14969',
'PUBCHEM.COMPOUND:39676',
'PUBCHEM.COMPOUND:2116',
'MESH:D014874',
'PUBCHEM.COMPOUND:23994',
'PUBCHEM.COMPOUND:11626560',
'PUBCHEM.COMPOUND:57379345',
'PUBCHEM.COMPOUND:5328940',
'PUBCHEM.COMPOUND:49846579',
'PUBCHEM.COMPOUND:25134326',
'PUBCHEM.COMPOUND:9829523',
'PUBCHEM.COMPOUND:25183872',
'PUBCHEM.COMPOUND:49806720',
'PUBCHEM.COMPOUND:71731823',
'PUBCHEM.COMPOUND:5311',
'PUBCHEM.COMPOUND:5281855',
'PUBCHEM.COMPOUND:5291',
'PUBCHEM.COMPOUND:135430309',
'PUBCHEM.COMPOUND:447077',
'PUBCHEM.COMPOUND:5284616',
'PUBCHEM.COMPOUND:11707110',
'PUBCHEM.COMPOUND:10184653',
'TTD.DRUG:D0Z1OR',
'UNII:334895S862',
'PUBCHEM.COMPOUND:176870',
'PUBCHEM.COMPOUND:6049',
'PUBCHEM.COMPOUND:440473',
'PUBCHEM.COMPOUND:175',
'PUBCHEM.COMPOUND:457193',
'PUBCHEM.COMPOUND:1548943',
'PUBCHEM.COMPOUND:36462',
'PUBCHEM.COMPOUND:5281672',
'PUBCHEM.COMPOUND:6057',
'PUBCHEM.COMPOUND:78165',
'PUBCHEM.COMPOUND:6518',
'PUBCHEM.COMPOUND:5360696',
'PUBCHEM.COMPOUND:6253',
'PUBCHEM.COMPOUND:30323',
'PUBCHEM.COMPOUND:2733526',
'PUBCHEM.COMPOUND:3108',
'PUBCHEM.COMPOUND:10366136',
'PUBCHEM.COMPOUND:3220',
'PUBCHEM.COMPOUND:7405',
'PUBCHEM.COMPOUND:656894',
'PUBCHEM.COMPOUND:10607',
'PUBCHEM.COMPOUND:135398748',
'PUBCHEM.COMPOUND:8771',
'PUBCHEM.COMPOUND:679',
'PUBCHEM.COMPOUND:6918638',
'PUBCHEM.COMPOUND:44259',
'PUBCHEM.COMPOUND:444732',
'PUBCHEM.COMPOUND:6029',
'PUBCHEM.COMPOUND:34755',
'PUBCHEM.COMPOUND:5284627',
'PUBCHEM.COMPOUND:5957',
'PUBCHEM.COMPOUND:2353',
'PUBCHEM.COMPOUND:72277',
'PUBCHEM.COMPOUND:46191454',
'PUBCHEM.COMPOUND:8988',
'PUBCHEM.COMPOUND:977',
'DRUGBANK:DB12182',
'PUBCHEM.COMPOUND:23725625',
'PUBCHEM.COMPOUND:4212',
'PUBCHEM.COMPOUND:91766',
'PUBCHEM.COMPOUND:65359',
'PUBCHEM.COMPOUND:441923',
'PUBCHEM.COMPOUND:445154',
'PUBCHEM.COMPOUND:65064',
'PUBCHEM.COMPOUND:11338033',
'PUBCHEM.COMPOUND:9444',
'PUBCHEM.COMPOUND:30751'
])
if result5 != {
'RXCUI:258355': 'Rapamune', 'PUBCHEM.COMPOUND:4261': 'Entinostat', 'PUBCHEM.COMPOUND:49850262': 'Tubastatin A',
'PUBCHEM.COMPOUND:50992434': 'Trametinib dimethyl sulfoxide', 'PUBCHEM.COMPOUND:135539077': 'Luminespib',
'RXCUI:1430268': 'Gilotrif', 'CHEBI:90942': 'Ixazomib', 'CHEBI:110200': 'CHEBI:110200',
'CHEBI:63996': 'SKF 83959 hydrobromide', 'CHEBI:6716': 'Medroxyprogesterone acetate',
'CHEBI:14222': 'CHEBI:14222', 'PUBCHEM.COMPOUND:25141092': 'Entrectinib',
'PUBCHEM.COMPOUND:36314': 'Paclitaxel', 'PUBCHEM.COMPOUND:31703': 'Doxorubicin',
'PUBCHEM.COMPOUND:3385': 'Fluorouracil', 'PUBCHEM.COMPOUND:126941': 'Methotrexate',
'PUBCHEM.COMPOUND:135410875': 'Pemetrexed', 'PUBCHEM.COMPOUND:148124': 'Docetaxel',
'PUBCHEM.COMPOUND:5311497': 'Vinorelbine', 'PUBCHEM.COMPOUND:107935': 'Deguelin',
'PUBCHEM.COMPOUND:387447': 'Bortezomib',
'PUBCHEM.COMPOUND:518605': '2,4,6,8,9,10-Hexaoxa-1,3,5,7-tetraarsatricyclo[3.3.1.13,7]decane',
'PUBCHEM.COMPOUND:5282165': 'Josamycin', 'PUBCHEM.COMPOUND:5591': 'Troglitazone',
'PUBCHEM.COMPOUND:1322': '1,2-Dimethylhydrazine',
'PUBCHEM.COMPOUND:95170': "2,2',4,4'-Tetrabromodiphenyl ether",
'PUBCHEM.COMPOUND:2286': 'Bisphenol A diglycidyl ether',
'PUBCHEM.COMPOUND:1242': '2,3,4,5-Tetrahydro-7,8-dihydroxy-1-phenyl-1H-3-benzazepine',
'PUBCHEM.COMPOUND:8461': '2,4-Dinitrotoluene', 'PUBCHEM.COMPOUND:11813': '2,6-Dinitrotoluene',
'PUBCHEM.COMPOUND:1530': '2-Amino-1-methyl-6-phenylimidazo(4,5-b)pyridine',
'PUBCHEM.COMPOUND:11831': '2-Nitrofluorene', 'PUBCHEM.COMPOUND:7577': "4,4'-Methylenedianiline",
'PUBCHEM.COMPOUND:73864': 'Bisphenol AF',
'PUBCHEM.COMPOUND:4521392': '4-(4-(benzo[d][1,3]dioxol-5-yl)-5-(pyridin-2-yl)-1H-imidazol-2-yl)benzamide',
'PUBCHEM.COMPOUND:47289': '4-(N-Nitrosomethylamino)-1-(3-pyridyl)-1-butanone',
'PUBCHEM.COMPOUND:1983': 'Acetaminophen', 'PUBCHEM.COMPOUND:12035': 'Acetylcysteine',
'PUBCHEM.COMPOUND:449459': 'Afimoxifene', 'PUBCHEM.COMPOUND:186907': 'Aflatoxin B1',
'PUBCHEM.COMPOUND:449171': 'Alitretinoin', 'PUBCHEM.COMPOUND:7290': '3-Chloro-1,2-propanediol',
'PUBCHEM.COMPOUND:2236': 'Aristolochic acid', 'PUBCHEM.COMPOUND:5359596': 'Arsenic',
'PUBCHEM.COMPOUND:6918483': 'Artenimol', 'PUBCHEM.COMPOUND:23667548': 'Sodium Ascorbate',
'PUBCHEM.COMPOUND:2256': 'Atrazine', 'PUBCHEM.COMPOUND:62306': 'Benoxacor',
'PUBCHEM.COMPOUND:2336': 'Benzo[a]pyrene', 'PUBCHEM.COMPOUND:6623': 'Bisphenol A',
'PUBCHEM.COMPOUND:66166': 'Bisphenol B', 'PUBCHEM.COMPOUND:12111': "4,4'-Methylenediphenol",
'PUBCHEM.COMPOUND:6626': "4,4'-Sulfonyldiphenol", 'PUBCHEM.COMPOUND:16682746': 'Tributyltin oxide',
'PUBCHEM.COMPOUND:5360373': 'Bleomycin', 'PUBCHEM.COMPOUND:7961': 'Bromobenzene',
'PUBCHEM.COMPOUND:2478': 'Busulfan', 'PUBCHEM.COMPOUND:264': 'Butyric Acid',
'PUBCHEM.COMPOUND:23973': 'Cadmium', 'PUBCHEM.COMPOUND:2519': 'Caffeine',
'PUBCHEM.COMPOUND:5280453': 'Calcitriol', 'PUBCHEM.COMPOUND:5943': 'Carbon Tetrachloride',
'PUBCHEM.COMPOUND:1203': '2-(3,4-Dihydroxyphenyl)chroman-3,5,7-triol',
'PUBCHEM.COMPOUND:135411': '6-[3-(1-Adamantyl)-4-hydroxyphenyl]-2-naphthalenecarboxylic Acid',
'PUBCHEM.COMPOUND:154413': 'Centchroman', 'PUBCHEM.COMPOUND:40470': "2,2',3,3',4-Pentachlorobiphenyl",
'PUBCHEM.COMPOUND:2730': 'Chlorpyrifos', 'PUBCHEM.COMPOUND:29131': 'Chromium(6+)',
'PUBCHEM.COMPOUND:9171': 'Chrysene', 'PUBCHEM.COMPOUND:5702198': 'azane;dichloroplatinum',
'PUBCHEM.COMPOUND:2797': 'Clofibric acid', 'PUBCHEM.COMPOUND:24463': 'Copper sulfate pentahydrate',
'PUBCHEM.COMPOUND:323': 'Coumarin', 'PUBCHEM.COMPOUND:40585': 'Deltamethrin',
'PUBCHEM.COMPOUND:451668': 'Decitabine', 'PUBCHEM.COMPOUND:40024': 'Deoxynivalenol',
'PUBCHEM.COMPOUND:3017': 'Diazinon', 'PUBCHEM.COMPOUND:3026': 'Dibutyl Phthalate',
'PUBCHEM.COMPOUND:5371560': 'Dicrotophos', 'PUBCHEM.COMPOUND:969491': 'Dieldrin',
'PUBCHEM.COMPOUND:8343': 'Bis(2-ethylhexyl) phthalate', 'PUBCHEM.COMPOUND:5921': 'N-Nitrosodiethylamine',
'PUBCHEM.COMPOUND:448537': 'Diethylstilbestrol', 'PUBCHEM.COMPOUND:6124': 'N-Nitrosodimethylamine',
'PUBCHEM.COMPOUND:8346': 'Dioctyl phthalate', 'PUBCHEM.COMPOUND:5757': 'Estradiol',
'PUBCHEM.COMPOUND:702': 'Ethanol', 'PUBCHEM.COMPOUND:5991': 'Ethinyl estradiol',
'PUBCHEM.COMPOUND:11': '1,2-Dichloroethane', 'PUBCHEM.COMPOUND:3346': 'Fenthion',
'PUBCHEM.COMPOUND:3397': 'Flutamide', 'PUBCHEM.COMPOUND:135398658': 'Folic Acid',
'PUBCHEM.COMPOUND:14101198': '[(1R)-1-[(3S,6S,9S,12S,18R,21S,22R)-21-acetamido-18-benzyl-3-[(1R)-1-methoxyethyl]-4,9,10,12,16-pentamethyl-15-methylidene-2,5,8,11,14,17,20-heptaoxo-22-propan-2-yl-1,19-dioxa-4,7,10,13,16-pentazacyclodocos-6-yl]-2-methylpropyl] (2S,3R)-3-hydroxy-4-methyl-2-(propanoylamino)pentanoate',
'PUBCHEM.COMPOUND:104741': 'Fulvestrant', 'PUBCHEM.COMPOUND:8029': 'Furan',
'PUBCHEM.COMPOUND:60750': 'Gemcitabine', 'PUBCHEM.COMPOUND:5280961': 'Genistein',
'PUBCHEM.COMPOUND:9898639': 'Gentamicinsulfate salt', 'PUBCHEM.COMPOUND:637566': 'Geraniol',
'PUBCHEM.COMPOUND:3474': 'Glafenine', 'PUBCHEM.COMPOUND:23985': 'Gold',
'PUBCHEM.COMPOUND:3616': 'Hexamethylene bisacetamide', 'PUBCHEM.COMPOUND:42890': 'Idarubicin',
'PUBCHEM.COMPOUND:3715': 'Indomethacin', 'PUBCHEM.COMPOUND:6912226': 'Ionomycin',
'PUBCHEM.COMPOUND:3779': 'Isoproterenol',
'PUBCHEM.COMPOUND:9812710': 'Ivermectine 100 microg/mL in Acetonitrile',
'PUBCHEM.COMPOUND:46907787': '(S)-(+)-tert-Butyl 2-(4-(4-chlorophenyl)-2,3,9-trimethyl-6H-thieno(3,2-f)(1,2,4)triazolo(4,3-a)(1,4)diazepin-6-yl)acetate',
'PUBCHEM.COMPOUND:25195294': '4-(6-(4-(Piperazin-1-yl)phenyl)pyrazolo[1,5-a]pyrimidin-3-yl)quinoline',
'PUBCHEM.COMPOUND:896': 'Melatonin', 'PUBCHEM.COMPOUND:10836': 'Methamphetamine',
'PUBCHEM.COMPOUND:4098': 'Methapyrilene', 'PUBCHEM.COMPOUND:13709': 'Methidathion',
'PUBCHEM.COMPOUND:1349907': 'Methimazole', 'PUBCHEM.COMPOUND:1674': '3-Methylcholanthrene',
'PUBCHEM.COMPOUND:4156': 'Methyl methanesulfonate', 'PUBCHEM.COMPOUND:7456': 'Methylparaben',
'PUBCHEM.COMPOUND:6010': 'Methyltestosterone', 'PUBCHEM.COMPOUND:8575': 'Monobutyl phthalate',
'PUBCHEM.COMPOUND:4449': 'Nefazodone', 'PUBCHEM.COMPOUND:4122': 'Nocodazole',
'PUBCHEM.COMPOUND:442530': 'Ochratoxin A', 'PUBCHEM.COMPOUND:9887053': 'Oxaliplatin',
'PUBCHEM.COMPOUND:991': 'Parathion', 'PUBCHEM.COMPOUND:854': 'DL-Arabinose',
'PUBCHEM.COMPOUND:74483': 'Perfluorooctanesulfonic acid', 'PUBCHEM.COMPOUND:9554': 'Perfluorooctanoic acid',
'PUBCHEM.COMPOUND:5794': 'Piperonyl butoxide', 'PUBCHEM.COMPOUND:5694': 'Pirinixic acid',
'PUBCHEM.COMPOUND:15032': 'Pregnenolone carbonitrile', 'PUBCHEM.COMPOUND:657298': 'Propylthiouracil',
'PUBCHEM.COMPOUND:14942': 'Orthosilicic acid', 'PUBCHEM.COMPOUND:5280343': 'Quercetin',
'PUBCHEM.COMPOUND:5035': 'Raloxifene', 'PUBCHEM.COMPOUND:6758': 'Rotenone',
'PUBCHEM.COMPOUND:5186': 'Scriptaid', 'PUBCHEM.COMPOUND:1091': 'Selenious acid',
'PUBCHEM.COMPOUND:133538': '3-Methyl-6-chloro-2,3,4,5-tetrahydro-7,8-dihydroxy-1-(3-methylphenyl)-1H-3-benzazepine',
'PUBCHEM.COMPOUND:7305': 'Soman', 'PUBCHEM.COMPOUND:5323': 'Sulfadimethoxine',
'PUBCHEM.COMPOUND:5329102': 'Sunitinib', 'PUBCHEM.COMPOUND:5284461': 'T-2 Toxin',
'PUBCHEM.COMPOUND:24857286': 'Fasiglifam', 'PUBCHEM.COMPOUND:6410': 'tert-Butyl Hydroperoxide',
'PUBCHEM.COMPOUND:5995': 'Testosterone propionate', 'PUBCHEM.COMPOUND:6618': 'Tetrabromobisphenol A',
'PUBCHEM.COMPOUND:15625': '2,3,7,8-Tetrachlorodibenzo-P-dioxin',
'PUBCHEM.COMPOUND:27924': 'Phorbol 12-myristate 13-acetate', 'PUBCHEM.COMPOUND:2723949': 'Thioacetamide',
'PUBCHEM.COMPOUND:26042': 'Titanium Dioxide', 'PUBCHEM.COMPOUND:60700': 'Topotecan',
'PUBCHEM.COMPOUND:444795': 'Tretinoin', 'PUBCHEM.COMPOUND:6575': 'Trichloroethylene',
'PUBCHEM.COMPOUND:5564': 'Triclosan', 'PUBCHEM.COMPOUND:11089': 'Trimellitic anhydride',
'PUBCHEM.COMPOUND:65411': 'Triptonide', 'PUBCHEM.COMPOUND:23964': 'Tungsten',
'PUBCHEM.COMPOUND:3121': 'Valproic Acid', 'PUBCHEM.COMPOUND:14969': 'Vancomycin',
'PUBCHEM.COMPOUND:39676': 'Vinclozolin', 'PUBCHEM.COMPOUND:2116': 'DL-alpha-Tocopherol',
'MESH:D014874': 'MESH:D014874', 'PUBCHEM.COMPOUND:23994': 'Zinc', 'PUBCHEM.COMPOUND:11626560': 'Crizotinib',
'PUBCHEM.COMPOUND:57379345': 'Ceritinib', 'PUBCHEM.COMPOUND:5328940': 'Bosutinib',
'PUBCHEM.COMPOUND:49846579': 'Venetoclax',
'PUBCHEM.COMPOUND:25134326': 'N2-[2-Methoxy-4-[4-(4-methyl-1-piperazinyl)-1-piperidinyl]phenyl]-N4-[2-[(1-methylethyl)sulfonyl]phenyl]-1,3,5-triazine-2,4-diamine',
'PUBCHEM.COMPOUND:9829523': 'Midostaurin', 'PUBCHEM.COMPOUND:25183872': 'Ixazomib',
'PUBCHEM.COMPOUND:49806720': 'Alectinib', 'PUBCHEM.COMPOUND:71731823': 'Lorlatinib',
'PUBCHEM.COMPOUND:5311': 'Vorinostat', 'PUBCHEM.COMPOUND:5281855': 'Ellagic Acid',
'PUBCHEM.COMPOUND:5291': 'Imatinib', 'PUBCHEM.COMPOUND:135430309': '2-(4-methoxyphenyl)-1H-quinazolin-4-one',
'PUBCHEM.COMPOUND:447077': '6-(2,6-dichlorophenyl)-8-methyl-2-((3-(methylthio)phenyl)amino)pyrido[2,3-d]pyrimidin-7(8H)-one',
'PUBCHEM.COMPOUND:5284616': 'Sirolimus', 'PUBCHEM.COMPOUND:11707110': 'Trametinib',
'PUBCHEM.COMPOUND:10184653': 'Afatinib', 'TTD.DRUG:D0Z1OR': 'TTD.DRUG:D0Z1OR',
'UNII:334895S862': 'DOXYCYCLINE ANHYDROUS', 'PUBCHEM.COMPOUND:176870': 'Erlotinib',
'PUBCHEM.COMPOUND:6049': 'Edetic Acid', 'PUBCHEM.COMPOUND:440473': 'L-mimosine',
'PUBCHEM.COMPOUND:175': 'Acetate', 'PUBCHEM.COMPOUND:457193': 'Dactinomycin',
'PUBCHEM.COMPOUND:1548943': 'Capsaicin', 'PUBCHEM.COMPOUND:36462': 'Etoposide',
'PUBCHEM.COMPOUND:5281672': 'Myricetin', 'PUBCHEM.COMPOUND:6057': 'Tyrosine',
'PUBCHEM.COMPOUND:78165': '2-(N-Morpholino)-ethanesulfonic acid',
'PUBCHEM.COMPOUND:6518': 'Pentaerythritol tetranitrate', 'PUBCHEM.COMPOUND:5360696': 'Dextromethorphan',
'PUBCHEM.COMPOUND:6253': 'Cytarabine', 'PUBCHEM.COMPOUND:30323': 'Daunorubicin',
'PUBCHEM.COMPOUND:2733526': 'Tamoxifen', 'PUBCHEM.COMPOUND:3108': 'Dipyridamole',
'PUBCHEM.COMPOUND:10366136': 'Crenolanib', 'PUBCHEM.COMPOUND:3220': 'Emodin',
'PUBCHEM.COMPOUND:7405': 'L-Pyroglutamic acid',
'PUBCHEM.COMPOUND:656894': 'Isopropyl-beta-D-thiogalactopyranoside', 'PUBCHEM.COMPOUND:10607': 'Podofilox',
'PUBCHEM.COMPOUND:135398748': 'Penciclovir', 'PUBCHEM.COMPOUND:8771': 'Tetradecyl hydrogen sulfate (ester)',
'PUBCHEM.COMPOUND:679': 'Dimethyl Sulfoxide', 'PUBCHEM.COMPOUND:6918638': 'Belinostat',
'PUBCHEM.COMPOUND:44259': 'Staurosporine', 'PUBCHEM.COMPOUND:444732': 'trichostatin A',
'PUBCHEM.COMPOUND:6029': 'Uridine', 'PUBCHEM.COMPOUND:34755': 'S-adenosylmethionine',
'PUBCHEM.COMPOUND:5284627': 'Topiramate', 'PUBCHEM.COMPOUND:5957': "Adenosine-5'-triphosphate",
'PUBCHEM.COMPOUND:2353': 'Berberine', 'PUBCHEM.COMPOUND:72277': 'Epigallocatechin',
'PUBCHEM.COMPOUND:46191454': 'N-(6,6-dimethyl-5-(1-methylpiperidine-4-carbonyl)-1,4,5,6-tetrahydropyrrolo[3,4-c]pyrazol-3-yl)-3-methylbutanamide',
'PUBCHEM.COMPOUND:8988': 'D-proline', 'PUBCHEM.COMPOUND:977': 'Oxygen', 'DRUGBANK:DB12182': 'DRUGBANK:DB12182',
'PUBCHEM.COMPOUND:23725625': 'Olaparib', 'PUBCHEM.COMPOUND:4212': 'Mitoxantrone',
'PUBCHEM.COMPOUND:91766': 'Flufenoxuron', 'PUBCHEM.COMPOUND:65359': 'Oxiglutatione',
'PUBCHEM.COMPOUND:441923': 'Ginsenoside Rg1', 'PUBCHEM.COMPOUND:445154': 'Resveratrol',
'PUBCHEM.COMPOUND:65064': 'Epigallocatechin Gallate',
'PUBCHEM.COMPOUND:11338033': '4-(2,6-dichlorobenzamido)-N-(piperidin-4-yl)-1H-pyrazole-3-carboxamide',
'PUBCHEM.COMPOUND:9444': 'Azacitidine', 'PUBCHEM.COMPOUND:30751': 'Fludarabine phosphate'
}:
raise RuntimeError(f"Incorrect result: {result5}")
# Test get_curie() too while we're at it.
result6 = get_curie('BRCA1')
if result6 != 'NCBIGene:672':
raise RuntimeError(f"Searching 'BRCA1' on NameLookup does not return NCBIGene:672 as expected but {result6}.")