Problem in generating interactions graphs through python script
0
0
Entering edit mode
2.2 years ago
anasjamshed ▴ 140

I have 16000 genes in text file and i want to make ppi graph through python by utilizing a string database. It works fine with a few hundred genes but when I try to make a graph of 2000 genes,it giving error.

My code:

### The required libraries and packages ###
import networkx as nx
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm

# List of genes to search for
list1= open("genes.txt").read()
# split line by "," into list of strings
geneList = list1.rstrip().split("\n")
#Convert genes into proteins
proteins = '%0d'.join(geneList)
#Define the URL
url = 'https://string-db.org/api/tsv/network?identifiers=' + proteins + '&species=9606'
r = requests.get(url)
lines = r.text.split('\n') # pull the text from the response object and split based on new lines
data = [l.split('\t') for l in lines] # split each line into its components based on tabs
# convert to dataframe using the first row as the column names; drop empty, final row
df = pd.DataFrame(data[1:-1], columns = data[0]) 
#print(df)

# dataframe with the preferred names of the two proteins and the score of the interaction
interactions = df[['preferredName_A', 'preferredName_B', 'score']] 

print(interactions)

G=nx.Graph(name='Gene Interaction Graph')
interactions = np.array(interactions)
for i in range(len(interactions)):
    interaction = interactions[i]
    a = interaction[0] # protein a node
    b = interaction[1] # protein b node
    w = float(interaction[2]) # score as weighted edge where high scores = low weight
    G.add_weighted_edges_from([(a,b,w)]) # add weighted edge to graph

pos = nx.spring_layout(G) # position the nodes using the spring layout
plt.figure(figsize=(18,18),facecolor=[0.9,0.7,0.7,0.5])
nx.draw_networkx(G)
plt.axis('off')
plt.show()

Error:

KeyError                                  Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14700/807706410.py in <module>
     23 
     24 # dataframe with the preferred names of the two proteins and the score of the interaction
---> 25 interactions = df[['preferredName_A', 'preferredName_B', 'score']]
     26 
     27 print(interactions)

~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   3509             if is_iterator(key):
   3510                 key = list(key)
-> 3511             indexer = self.columns._get_indexer_strict(key, "columns")[1]
   3512 
   3513         # take() does not accept boolean indexers

~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _get_indexer_strict(self, key, axis_name)
   5780             keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
   5781 
-> 5782         self._raise_if_missing(keyarr, indexer, axis_name)
   5783 
   5784         keyarr = self.take(indexer)

~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _raise_if_missing(self, key, indexer, axis_name)
   5840                 if use_interval_msg:
   5841                     key = list(key)
-> 5842                 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   5843 
   5844             not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())

KeyError: "None of [Index(['preferredName_A', 'preferredName_B', 'score'], dtype='object')] are in the [columns]"

Can anyone help me to solve this?

String Python • 584 views
ADD COMMENT

Login before adding your answer.

Traffic: 1966 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6