Using the UCSC/gencode and the following awk script:
BEGIN {
FS="\t";
}
{
strand = $4;
txStart = int($5);
txEnd = int($6);
cdsStart = int($7);
cdsEnd = int($8);
exonCount = int($9);
split($10,exonStarts,/[,]/);
split($11,exonEnds,/[,]/);
for(i=1;i+1<=exonCount;++i)
{
intronStart = int(exonEnds[i]);
intronEnd = int(exonStarts[i+1]);
if( (strand == "+" && intronEnd <= cdsStart) || (strand == "-" && intronStart >= cdsEnd) )
{
print;
next;
}
}
}
.
$ curl -s "http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/wgEncodeGencodeCompV24.txt.gz" | gunzip -c |awk -f biostar.awk
585 ENST00000417324.1 chr1 - 34553 36081 34553 34553 3 34553,35276,35720, 35174,35481,36081, 0 FAM138A none none -1,-1,-1,
585 ENST00000461467.1 chr1 - 35244 36073 35244 35244 2 35244,35720, 35481,36073, 0 FAM138A none none -1,-1,
585 ENST00000466430.5 chr1 - 89294 120932 89294 89294 4 89294,92090,112699,120774, 91629,92240,112804,120932, 0 RP11-34P13.7 none none -1,-1,-1,-1,
585 ENST00000495576.1 chr1 - 89550 91105 89550 89550 2 89550,90286, 90050,91105, 0 RP11-34P13.8 none none -1,-1,
585 ENST00000477740.5 chr1 - 92229 129217 92229 92229 4 92229,112699,120720,129054, 92240,112804,120932,129217, 0 RP11-34P13.7 none none -1,-1,-1,-1,
585 ENST00000471248.1 chr1 - 110952 129173 110952 110952 3 110952,112699,129054, 111357,112804,129173, 0 RP11-34P13.7 none none -1,-1,-1,
73 ENST00000610542.1 chr1 - 120724 133723 120724 120724 4 120724,120873,129054,133373, 120869,120932,129223,133723, 0 RP11-34P13.7 none none -1,-1,-1,-1,
73 ENST00000453576.2 chr1 - 129080 133566 129080 129080 2 129080,133373, 129223,133566, 0 RP11-34P13.7 none none -1,-1,
586 ENST00000493797.1 chr1 - 139789 140339 139789 139789 2 139789,140074, 139847,140339, 0 RP11-34P13.14 none none -1,-1,
586 ENST00000484859.1 chr1 - 141473 149707 141473 141473 2 141473,146385, 143011,149707, 0 RP11-34P13.13 none none -1,-1,
586 ENST00000490997.5 chr1 - 142807 146831 142807 142807 3 142807,146385,146641, 143011,146509,146831, 0 RP11-34P13.13 none none -1,-1,-1,
586 ENST00000466557.6 chr1 - 146385 173862 146385 146385 8 146385,155766,164262,165883,168099,169048,172556,173752, 146509,155831,164791,165942,168165,169264,172688,173862, 0 RP11-34P13.13 none none -1,-1,-1,-1,-1,-1,-1,-1,
586 ENST00000491962.1 chr1 - 165888 168767 165888 165888 3 165888,168099,168609, 165942,168165,168767, 0 RP11-34P13.13 none none -1,-1,-1,
73 ENST00000442116.1 chr1 - 257863 264733 257863 257863 2 257863,264603, 259025,264733, 0 AP006222.2 none none -1,-1,
73 ENST00000448958.1 chr1 - 258143 268807 258143 258143 3 258143,267302,268666, 259025,268204,268807, 0 AP006222.2 none none -1,-1,-1,
73 ENST00000634344.1 chr1 - 258500 267911 258500 258500 3 258500,261549,267302, 259025,261634,267911, 0 AP006222.2 none none -1,-1,-1,
587 ENST00000424587.6 chr1 - 266360 297502 266360 266360 4 266360,268666,289265,297344, 268204,268816,289370,297502, 0 AP006222.2 none none -1,-1,-1,-1,
587 ENST00000335577.4 chr1 - 287516 289370 287516 287516 2 287516,289265, 287921,289370, 0 AP006222.2 none none -1,-1,
587 ENST00000441866.1 chr1 - 357382 359681 357382 357382 3 357382,358048,359344, 357586,358183,359681, 0 RP5-857K21.15 none none -1,-1,-1,
587 ENST00000431321.1 chr1 - 365388 366120 365388 365388 2 365388,366018, 365692,366120, 0 RP4-669L17.10 none none -1,-1,
Please use
ADD COMMENT
to reply to earlier answers, as such this thread remains logically structured and easy to follow. In addition, don't forget to upvote when an answer was helpful and accept the answer if your question is resolved.Sorry it's been a long day. Next time I'll do it right.
Hahaha if this leads to a paper in any way I'll be surprised! But noted!!