Count # of homs and hets per sample
1
0
Entering edit mode
8.0 years ago
rmande • 0

I have a multi-sample VCF and I would like to count the # of heterozygotes and # of homozygotes for each variant. Is there an easy way to do this?

I was thinking the easiest way would be to grep for 1/0 and 1/1, but I'm not sure if that's the correct way to do this.

Any suggestions and discussions could help me. Thanks!

next-gen snp sequencing • 2.7k views
ADD COMMENT
1
Entering edit mode
8.0 years ago

Using bioalcidae : https://github.com/lindenb/jvarkit/wiki/BioAlcidae

the script.js

var sample2count={};

var samples = header.getSampleNamesInOrder() ;
for(var i=0;i< samples.size();++i)
    {
    sample2count[samples.get(i)]={"het":0,"hom":0};
    }


while( iter.hasNext() )
    {
    var ctx = iter.next();
    for(var sample in sample2count)
        {
        var g = ctx.getGenotype(sample);
        if( g.isHom()  )
            {
            sample2count[sample].hom++;
            }
        else if(g.isHet())
            {
            sample2count[sample].het++;
            }
        }
    }
out.println("#sample,hom,het");
for(var sample in sample2count)
    {
    out.println(sample+","+sample2count[sample].hom+","+sample2count[sample].het);
    }

example:

 curl -L "https://raw.githubusercontent.com/vcftools/vcftools/master/examples/valid-4.1.vcf" |\
  java -jar  jvarkit-git/dist/bioalcidae.jar -F VCF -f script.js

#sample,hom,het
NA00001,4,8
NA00002,2,10
NA00003,11,0

ADD COMMENT
0
Entering edit mode

Hi Pierre,

That would give me the # of hom and het variants per sample, no? I'm interested in seeing how many samples are homozygous or heterozygous for a given variant.

ADD REPLY
0
Entering edit mode

ah, let me change this !

while( iter.hasNext() )
    {
    var ctx = iter.next();
    var het=0,hom=0;
    for(var i=0;i< ctx.getNSamples();++i)
        {
        var g = ctx.getGenotype(i);
        if( g.isHom()  )
            {
            hom++;
            }
        else if(g.isHet())
            {
            het++;
            }
        }
    out.println(ctx.getContig()+","+ctx.getStart()+","+ctx.getReference().getDisplayString()+","+hom+","+het);
    }

now the output is:

19,14370,G,2,1
20,17330,T,2,1
20,1110696,A,1,2
20,1230237,T,3,0
20,1234567,GTC,1,2
20,2234567,C,1,2
20,2234568,C,1,2
20,2234569,C,1,2
20,3234569,C,1,2
20,4234569,N,0,2
20,5234569,N,1,2
Y,17330,T,3,0
ADD REPLY
0
Entering edit mode

Ok I will try that. Thanks!

A few questions.

When I try and run: make bioalcidae I get the following error Makefile:553: recipe for target 'api.ncbi.blast' failed make: * [api.ncbi.blast] Error 127

And do I need any specific Javascript libraries to run script.js?

ADD REPLY
0
Entering edit mode

why does it fail ? are you working behind a proxy ?

ADD REPLY
0
Entering edit mode

I don't believe I am.

ADD REPLY
0
Entering edit mode

show me what happends when you run make api.ncbi.blast

$ make api.ncbi.blast
mkdir -p /home/lindenb/src/jvarkit-git/src/main/generated-sources/java
xjc -d /home/lindenb/src/jvarkit-git/src/main/generated-sources/java  -p gov.nih.nlm.ncbi.blast -dtd  "https://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"
parsing a schema...
compiling a schema...
gov/nih/nlm/ncbi/blast/BlastOutput.java
gov/nih/nlm/ncbi/blast/BlastOutputIterations.java
gov/nih/nlm/ncbi/blast/BlastOutputMbstat.java
gov/nih/nlm/ncbi/blast/BlastOutputParam.java
gov/nih/nlm/ncbi/blast/Hit.java
gov/nih/nlm/ncbi/blast/HitHsps.java
gov/nih/nlm/ncbi/blast/Hsp.java
gov/nih/nlm/ncbi/blast/Iteration.java
gov/nih/nlm/ncbi/blast/IterationHits.java
gov/nih/nlm/ncbi/blast/IterationStat.java
gov/nih/nlm/ncbi/blast/ObjectFactory.java
gov/nih/nlm/ncbi/blast/Parameters.java
gov/nih/nlm/ncbi/blast/Statistics.java
ADD REPLY
0
Entering edit mode

$ make api.ncbi.blast

mkdir -p /home/rmande/jvarkit/src/main/generated-sources/java xjc -d /home/rmande/jvarkit/src/main/generated-sources/java -p gov.nih.nlm.ncbi.blast -dtd "https://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd" /bin/bash: xjc: command not found Makefile:553: recipe for target 'api.ncbi.blast' failed make: * [api.ncbi.blast] Error 127

ADD REPLY
0
Entering edit mode

https://github.com/lindenb/jvarkit/wiki/BioAlcidae#requirements--dependencies :

java compiler SDK 1.8 http://www.oracle.com/technetwork/java/index.html (NOT the old java 1.7 or 1.6) . Please check that this java is in the ${PATH}.

ADD REPLY

Login before adding your answer.

Traffic: 2305 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6