Entering edit mode
3.0 years ago
anasjamshed
▴
140
Is it possible to annotate the bam/sam file by using pysam?
I am using htseq and my code is:
#Importing Libraries
import HTSeq
import itertools
import pysam
#Read Bam file
bamfile = pysam.AlignmentFile("sample_sort.bam")
#Print only importnat cols
for read in bamfile:
reads = "id",read.qname,"flag",read.flag,"chr no.",read.rname, "position",read.pos
print(reads)
#Total reads counts in bam file
total = pysam.AlignmentFile("sample_sort.bam").count()
print(total)
#View Features
print(feature.iv)
print(feature.source)
print(feature.type)
print(feature.score)
#View sorted
sorted(feature.attr.items())
#Declare Exons
exons = HTSeq.GenomicArrayOfSets("auto", stranded=False)
#Fetch Exons from gtf
for feature in gtf_file:
if feature.type == "exon":
exons[feature.iv] += feature.name
iv = HTSeq.GenomicInterval("III", 366085,366700, ".")
[(st[0], sorted(st[1])) for st in exons[iv].steps()]
iset = None
for iv2, step_set in exons[iv].steps():
if iset is None:
iset = step_set.copy()
else:
iset.intersection_update(step_set)
print(iset)
#Store exons in counts dictionary
counts = {}
for feature in gtf_file:
if feature.type == "exon":
counts[feature.name] = 0
#Read Samfile
sam_file = HTSeq.SAM_Reader("samplesort.sam")
#do alignment
for alnmt in sam_file:
if alnmt.aligned:
iset = None
for iv2, step_set in exons[alnmt.iv].steps():
if iset is None:
iset = step_set.copy()
else:
iset.intersection_update(step_set)
if len(iset) == 1:
counts[list(iset)[0]] += 1
#Print counts with gene names
for name in sorted(counts.keys()):
print(name, counts[name])
The output is showing 0 in all counts:
I want my result like this:
Can anyone help me, please?
This post is a duplicate of your previous post: Parsing bam file and compare it from gtf file through pysam
I am marking it off-topic - it might be deleted soon.