$ curl -s "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz" | gunzip -c | head | awk '{n=int($8); split($9,S,/,/);split($10,E,/,/); for(i=1;i<=n;++i) {printf("%s,%s,%s,%s,%s,EXON\n",$1,$2,$3,S[i],E[i]); if(i+1<=n) printf("%s,%s,%s,%s,%s,INTRON\n",$1,$2,$3,int(E[i]),int(S[i+1])); }}'
uc001aaa.3,chr1,+,11873,12227,EXON
uc001aaa.3,chr1,+,12227,12612,INTRON
uc001aaa.3,chr1,+,12612,12721,EXON
uc001aaa.3,chr1,+,12721,13220,INTRON
uc001aaa.3,chr1,+,13220,14409,EXON
uc010nxr.1,chr1,+,11873,12227,EXON
uc010nxr.1,chr1,+,12227,12645,INTRON
uc010nxr.1,chr1,+,12645,12697,EXON
uc010nxr.1,chr1,+,12697,13220,INTRON
uc010nxr.1,chr1,+,13220,14409,EXON
uc010nxq.1,chr1,+,11873,12227,EXON
uc010nxq.1,chr1,+,12227,12594,INTRON
uc010nxq.1,chr1,+,12594,12721,EXON
uc010nxq.1,chr1,+,12721,13402,INTRON
uc010nxq.1,chr1,+,13402,14409,EXON
uc009vis.3,chr1,-,14361,14829,EXON
uc009vis.3,chr1,-,14829,14969,INTRON
uc009vis.3,chr1,-,14969,15038,EXON
uc009vis.3,chr1,-,15038,15795,INTRON
uc009vis.3,chr1,-,15795,15942,EXON
uc009vis.3,chr1,-,15942,16606,INTRON
uc009vis.3,chr1,-,16606,16765,EXON
uc009vit.3,chr1,-,14361,14829,EXON
uc009vit.3,chr1,-,14829,14969,INTRON
(...)
If you have an annotation file:
A: how to get intronic and intergenic sequences based on gff file?