Entering edit mode
2.3 years ago
anasjamshed
▴
140
I have 16000 genes in text file and i want to make ppi graph through python by utilizing a string database. It works fine with a few hundred genes but when I try to make a graph of 2000 genes,it giving error.
My code:
### The required libraries and packages ###
import networkx as nx
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
# List of genes to search for
list1= open("genes.txt").read()
# split line by "," into list of strings
geneList = list1.rstrip().split("\n")
#Convert genes into proteins
proteins = '%0d'.join(geneList)
#Define the URL
url = 'https://string-db.org/api/tsv/network?identifiers=' + proteins + '&species=9606'
r = requests.get(url)
lines = r.text.split('\n') # pull the text from the response object and split based on new lines
data = [l.split('\t') for l in lines] # split each line into its components based on tabs
# convert to dataframe using the first row as the column names; drop empty, final row
df = pd.DataFrame(data[1:-1], columns = data[0])
#print(df)
# dataframe with the preferred names of the two proteins and the score of the interaction
interactions = df[['preferredName_A', 'preferredName_B', 'score']]
print(interactions)
G=nx.Graph(name='Gene Interaction Graph')
interactions = np.array(interactions)
for i in range(len(interactions)):
interaction = interactions[i]
a = interaction[0] # protein a node
b = interaction[1] # protein b node
w = float(interaction[2]) # score as weighted edge where high scores = low weight
G.add_weighted_edges_from([(a,b,w)]) # add weighted edge to graph
pos = nx.spring_layout(G) # position the nodes using the spring layout
plt.figure(figsize=(18,18),facecolor=[0.9,0.7,0.7,0.5])
nx.draw_networkx(G)
plt.axis('off')
plt.show()
Error:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14700/807706410.py in <module>
23
24 # dataframe with the preferred names of the two proteins and the score of the interaction
---> 25 interactions = df[['preferredName_A', 'preferredName_B', 'score']]
26
27 print(interactions)
~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3509 if is_iterator(key):
3510 key = list(key)
-> 3511 indexer = self.columns._get_indexer_strict(key, "columns")[1]
3512
3513 # take() does not accept boolean indexers
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _get_indexer_strict(self, key, axis_name)
5780 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
5781
-> 5782 self._raise_if_missing(keyarr, indexer, axis_name)
5783
5784 keyarr = self.take(indexer)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _raise_if_missing(self, key, indexer, axis_name)
5840 if use_interval_msg:
5841 key = list(key)
-> 5842 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
5843
5844 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
KeyError: "None of [Index(['preferredName_A', 'preferredName_B', 'score'], dtype='object')] are in the [columns]"
Can anyone help me to solve this?