Question

Parsing Dimplot Result Files And Annotation

2

Entering edit mode

13.9 years ago

Samimirza ▴ 60

I have results from Dimplot in two files named filename1 and filename2 in my program. The results look like:

1a2y     HIS    A    30      PO4    C    130     S_S    1
1a2y     ASN    C    19      THR    A    53      S_S    1
1a2y     GLN    C    121     PHE    A    91      S_M    1
1a2y     SER    A    93      GLN    C    121     M_S    1

I have separate folders with results for each PDB file (X).

I am interested in creating an output file containing the complete set of residues from a PDB file and matching that with the results from the file which I have divided as two dictionaries, A and B, i.e. A contains (HIS A 30 ): { SS: 1} B contains (PO4 C 130 ):{ SS: 1}, also the same for C and D.

I want to match the residues from file X and if it matches with A or B or C or D then to update the value of MS, SS, MM, SM defined as interactions , and write 1 if present and 0 if not there.

So final output is something like:

Res C ResN SS SM MS MM
MET A 1 0 1 0 0
GLN A 2 0 0 0 0
LYS A 3 0 0 0 0
TYR A 4 0 0 0 0
GLU A 5 0 0 1 0
LYS A 6 0 0 0 0
LEU A 7 1 0 0 1
GLU A 8 0 0 0 0

My script is below: #!/usr/bin/python # read the list of PDB chains and create folder # change dir and create filenames for each combinatoion of .hhb and .nnb #parse lines and rearrange data

import re
import string
import os

pdblist = "/home/sami/Desktop/new_pdb4.txt"
diroutput = "/home/sami/Desktop/dimplot/output/"
pdbs = {}
pdbrec = '/data/bio/db/pdb/'

def read_pdb_list(pdblist, pdbs):  
    #read the list of PDBs and input PDB_ID and chains into dictionary
    fh = open(pdblist, 'r')
    all_line=fh.read()
    fh.close()

    lines=all_line.split("\n")
   # print lines
    for line in lines:
      #  print line
        if string.find(string.letters, line):
            columns =line.split()
            chains= columns[1:3]
            chains.sort()

            if len(columns) > 0 :

                pdbs.setdefault(columns[0],{})
                pdbs[columns[0]].setdefault(chains[0],{})
                pdbs[columns[0]][chains[0]].setdefault(chains[1],line)


def for_pbs(pdbid, chain1, chain2):

         name = pdbid + chain1 + chain2 + ".hhb"


read_pdb_list(pdblist,pdbs)



for p in pdbs.keys():
    each_pdb= diroutput + p + '/'
   # print p
    if not os.path.isdir(each_pdb):
        os.mkdir(each_pdb)

    for c1 in pdbs[p].keys():
        for c2 in pdbs[p][c1].keys():
            filename1 = p + c1 + c2 +".hhb" + "parsedhhb.txt"
            filename2 = p + c1 + c2 +".nnb" + "parse.txt"
            pdb_name = pdbrec+'pdb'+p.lower()+'.ent'

            work_dir = each_pdb

            os.chdir(work_dir)

    for line in open(pdb_name):
        D = {}
        Dl = {}
        if re.match('ATOM', line[0:4]):

            mylist = line.split()
            a = mylist[3]
            b= mylist[4]
            c = mylist[5]


            d = []
            d.append(c)
            d.append(a)
            d.append(b)
            d = tuple(d)
            D.setdefault(c, {})
            D[c].setdefault(a, {})

            D[c][a].setdefault(b, {})
            Dl.setdefault(d, {})
            for line in open(filename1):

                mylist2 = line.split()

                res1 = mylist2[1]
                chain1 = mylist2[2]
                resn1 = mylist2[3]
                res2 = mylist2[4]
                chain2 = mylist2[5]
                resn2 = mylist2[6]
                prop = mylist2[7]
                propr = mylist2[8]
                A = {}
                a = []
                b = []
                a.append(resn1)
                a.append(res1)
                a.append(chain1)
                a = tuple(a)
                b.append(resn2)
                b.append(res2)
                b.append(chain2)
                b = tuple(b)

                B = {}
                A.setdefault(a, {})
                A[a].setdefault(prop)
                A[a][prop] = propr
                B.setdefault(b, {})
                B[b].setdefault(prop)
                B[b][prop] = propr


            for line in open(filename2):

                    mylist3 = line.split()

                    res11 = mylist3[1]
                    chain11 = mylist3[2]
                    resn11 = mylist3[3]
                    res22 = mylist3[4]
                    chain22 = mylist3[5]
                    resn22 = mylist3[6]
                    prop1 = mylist3[7]
                    propr1 = mylist3[8]
                    E = {}
                    e = []
                    f = []
                    e.append(resn11)
                    e.append(res11)
                    e.append(chain11)
                    e = tuple(e)
                    f.append(resn22)
                    f.append(res22)
                    f.append(chain22)
                    f = tuple(f)

                    F = {}
                    E.setdefault(e, {})
                    E[e].setdefault(prop1)
                    E[e][prop1] = propr1
                    F.setdefault(f, {})
                    F[f].setdefault(prop1)
                    F[f][prop1] = propr1


            outfile_f = open('result2.txt', 'w')

            g = Dl.keys()
            h = A.keys()
            i = B.keys()
            j = E.keys()
            k = F.keys()
            for l in g:
                if (l in h) or (l in i) or (l in j) or (l in k):
                   try:
                      bb = A[l].keys()
                      cc = A[l].values()
                   except KeyError:
                      bb = '_'
                      cc = 0
                   try:
                      bb = B[l].keys()
                      cc = B[l].values()
                   except KeyError:
                      bb = '_'
                      cc = 0
                   try:
                      bb = E[l].keys()
                      cc = E[l].values()
                  except KeyError:
                      bb = '_'
                      cc = 0
                  try:     
                      bb = F[l].keys()
                      cc = F[l].values()
                  except KeyError:
                      bb = '_'
                      cc = 0
                  if bb == ['S_S']:

                    outfile_f.write("%-3s\t%-5s\t%-4s\t%-3s\t%-3s\t%-3s\t%-3s\n" %(l[1],l[2],l[0],cc,0, 0,0))

                elif bb == ['S_M']:
                    outfile_f.write("%-3s\t%-5s\t%-4s\t%-3s\t%-3s\t%-3s\t%-3s\n" %(l[1],l[2],l[0],0,cc, 0,0))

                elif bb == ['M_S']:
                    outfile_f.write("%-3s\t%-5s\t%-4s\t%-3s\t%-3s\t%-3s\t%-3s\n" %(l[1],l[2],l[0],0,0, cc,0))
                elif bb == ['M_M'] :     

                    outfile_f.write("%-3s\t%-5s\t%-4s\t%-3s\t%-3s\t%-3s\t%-3s\n" %(l[1],l[2],l[0],cc,0, 0,0))
                else:
                    outfile_f.write("%-3s\t%-5s\t%-4s\t%-3s\t%-3s\t%-3s\t%-3s\n" %(l[1],l[2],l[0],0,0, 0,0))


        outfile_f.close()

The problem is my program shows a blank result file as output.

parsing pdb python • 4.3k views

ADD COMMENT • link 13.9 years ago by Samimirza ▴ 60

1

Entering edit mode

Just a reminder to everyone: indent your code with at least 4 spaces, or it won't be correctly formatted and displayed.

ADD REPLY • link 13.9 years ago by Neilfws 49k

0

Entering edit mode

The problem is my program shows a blank result file as output.

ADD REPLY • link 13.9 years ago by Samimirza ▴ 60

0

Entering edit mode

I've changed the title, remember to explain which software you are referring to.

ADD REPLY • link 13.9 years ago by Giovanni M Dall'Olio 28k

0

Entering edit mode

Thanks, i will keep that in mind for the next question.

ADD REPLY • link 13.9 years ago by Samimirza ▴ 60

Ram · Answer 1 · 2011-01-28

2

Entering edit mode

13.9 years ago

Brad Chapman 9.7k

I would like to help with your problem, but am having a hard time following the code so thought I would respond with some practical suggestions to help with re-organizing your script. Working to improve the readability might help with finding the logic that is giving you trouble. My main suggestions would be:

Use small functions to prevent repeating yourself. For instance, this code:

mylist2 = line.split()
res1 = mylist2[1]
chain1 = mylist2[2]
resn1 = mylist2[3]
res2 = mylist2[4]
chain2 = mylist2[5]
resn2 = mylist2[6]
prop = mylist2[7]
propr = mylist2[8]
A = {}
a = []
b = []
a.append(resn1)
a.append(res1)
a.append(chain1)
a = tuple(a)
b.append(resn2)
b.append(res2)
b.append(chain2)
b = tuple(b)

B = {}
A.setdefault(a, {})
A[a].setdefault(prop)
A[a][prop] = propr
B.setdefault(b, {})
B[b].setdefault(prop)
B[b][prop] = propr

could be replaced with a reusable function:

def _reorganize_residues(line):
(res1, chain1, resn1, res2, chain2, resn2, prop, propr) = \
       line.split()
a = (resn1, res1, chain1)
b = (resn2, res2, chain2)
A = {a : {prop: propr}}
B = {b : {prop: propr}}
return A, B

Similarly this could help you replace a number of lines of code:

def _safe_get_vals(A, l):
try:
    bb = A[l].keys()
    cc = A[l].values()
except KeyError:
    bb = '_'
    cc = 0⇥
return bb, cc

By abstracting out these functions, you can help improve the flow.

Limit your functions to a reasonable number of lines that can be viewed together. Long deeply nested code is harder to follow and can lead to small errors that are difficult to pinpoint. With small functions, each can be tested independently as you debug.
Use helpful variable names. Short single letter variable names are only okay for short lived variables. It is difficult for a reader to tell what your intentions are with a/b/A/B.

Thanks for sharing your code and hope this is helpful for your debugging.

ADD COMMENT • link updated 5.3 years ago by Ram 44k • written 13.9 years ago by Brad Chapman 9.7k

0

Entering edit mode

Thanks, I was trying to create a tuple with a,b,c and d so that the dictionary created for each group was simpler to handle.

ADD REPLY • link 13.9 years ago by Samimirza ▴ 60

0

Entering edit mode

That definitely makes sense, and for a temporary value might be an okay usage. The major issue with the single letter variable names comes in during your final logic: g is a list of keys and l is an item in those; well, it is just too hard for me to read and follow. A re-write along the lines of my suggestions would help with both improving the readability for others and debugging.

ADD REPLY • link 13.9 years ago by Brad Chapman 9.7k

0

Entering edit mode

I tried to use def for function, but it seems I cant call a function within a nested code

ADD REPLY • link 13.9 years ago by Samimirza ▴ 60

0

Entering edit mode

It sounds like you might benefit from having a local programmer sit down with your code. Do you have anyone where you are at you could discuss with? If that's not a possibility, you could post the smallest example code that shows the problem and error message as a Gist: https://gist.github.com/ for us to look at. There are no limitations in Python on nesting and function usage so it is likely a syntax problem.

ADD REPLY • link 13.9 years ago by Brad Chapman 9.7k

Ram · Answer 2 · 2011-01-31

fpdb=open(pdb_name, 'r')

for i in fpdb.readlines():
    if i[:4]=='ATOM' and i[13:15]=='CA':
        val1=i.split()
        tmp_int=[]
        hhb_lst=open(filename1, 'r')
        for j in hhb_lst.readlines():
            val2=j.split()
            if val1[4]==c1 or val1[4]==c2:
                if val2[1]+val2[2]+val2[3] == val1[3]+val1[4]+val1[5]:
                    tmp_int.append(val2[-2])
                elif val2[4]+val2[5]+val2[6] == val1[3]+val1[4]+val1[5]:
                    val3=val2[-2].split('_')
                    if val3[0]==val3[1]:
                        tmp_int.append(val2[-2])
                    else:
                        tmp_int.append(val3[1]+'_'+val3[0])
        hhb_lst.close()
        nnb_lst=open(filename2, 'r')    
        for j in nnb_lst.readlines():
            val2=j.split()
            if val1[4]==c1 or val1[4]==c2:
                if val2[1]+val2[2]+val2[3] == val1[3]+val1[4]+val1[5]:
                    tmp_int.append(val2[-2])
                elif val2[4]+val2[5]+val2[6] == val1[3]+val1[4]+val1[5]:
                    val3=val2[-2].split('_')
                    if val3[0]==val3[1]:
                        tmp_int.append(val2[-2])
                    else:
                        tmp_int.append(val3[1]+'_'+val3[0])
        nnb_lst.close()

        outfile_f = open('resultfinal.txt', 'w')
        outfile_f.write("%-3s\t%-5s\t%-4s\t%-3s\t%-3s\t%-3s\t%-3s\n" %("RES","CHAIN","ATOM","S_S","S_M", "M_S","M_M"))

        if val1[4]==c1 or val1[4]==c2:
            if tmp_int!=[]:
                for value in tmp_int:
                    if value == 'S_S':
                        a1 = 1
                    elif value == 'S_M':
                        a2 = 1
                    elif value == 'M_S':
                        a3 = 1
                    elif value == 'M_M':
                        a4 = 1
                    else:
                        a1 = 0
                        a2 = 0
                        a3 = 0
                        a4 = 0
                        outfile_f.write("%-3s\t%-5s\t%-4s\t%-3s\t%-3s\t%-3s\t%-3s\n" %(val1[3],val1[4],val1[5],a1,a2, a3,a4))
                else:
                        outfile_f.write("%-3s\t%-5s\t%-4s\t%-3s\t%-3s\t%-3s\t%-3s\n" %(val1[3],val1[4],val1[5],0,0, 0,0))
fpdb.close()