|
/** |
|
|
|
Java JDK 8 is required. |
|
|
|
The jjs command-line tool is used to invoke the java/javascript Nashorn engine. |
|
You can use it to interpret one or several script files, or to run an interactive shell. |
|
|
|
HTSJDK is a Java API for high-throughput sequencing data (HTS) formats. |
|
|
|
You can download the pre-compiled libraries from maven central. |
|
|
|
``` |
|
mkdir -p lib/com/github/samtools/htsjdk/2.5.1/ && wget -O "lib/com/github/samtools/htsjdk/2.5.1/htsjdk-2.5.1.jar" "http://central.maven.org/maven2/com/github/samtools/htsjdk/2.5.1/htsjdk-2.5.1.jar" |
|
mkdir -p lib/commons-logging/commons-logging/1.1.1/ && wget -O "lib/commons-logging/commons-logging/1.1.1/commons-logging-1.1.1.jar" "http://central.maven.org/maven2/commons-logging/commons-logging/1.1.1/commons-logging-1.1.1.jar" |
|
mkdir -p lib/gov/nih/nlm/ncbi/ngs-java/1.2.4/ && wget -O "lib/gov/nih/nlm/ncbi/ngs-java/1.2.4/ngs-java-1.2.4.jar" "http://central.maven.org/maven2/gov/nih/nlm/ncbi/ngs-java/1.2.4/ngs-java-1.2.4.jar" |
|
mkdir -p lib/org/apache/commons/commons-compress/1.4.1/ && wget -O "lib/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar" "http://central.maven.org/maven2/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar" |
|
mkdir -p lib/org/apache/commons/commons-jexl/2.1.1/ && wget -O "lib/org/apache/commons/commons-jexl/2.1.1/commons-jexl-2.1.1.jar" "http://central.maven.org/maven2/org/apache/commons/commons-jexl/2.1.1/commons-jexl-2.1.1.jar" |
|
mkdir -p lib/org/tukaani/xz/1.5/ && wget -O "lib/org/tukaani/xz/1.5/xz-1.5.jar" "http://central.maven.org/maven2/org/tukaani/xz/1.5/xz-1.5.jar" |
|
mkdir -p lib/org/xerial/snappy/snappy-java/1.0.3-rc3/ && wget -O "lib/org/xerial/snappy/snappy-java/1.0.3-rc3/snappy-java-1.0.3-rc3.jar" "http://central.maven.org/maven2/org/xerial/snappy/snappy-java/1.0.3-rc3/snappy-java-1.0.3-rc3.jar" |
|
``` |
|
|
|
How to invoke this script: |
|
|
|
``` |
|
jjs \ |
|
-cp "/path/to/lib/com/github/samtools/htsjdk/2.5.1/htsjdk-2.5.1.jar:/path/to/lib/commons-logging/commons-logging/1.1.1/commons-logging-1.1.1.jar:/path/to/lib/gov/nih/nlm/ncbi/ngs-java/1.2.4/ngs-java-1.2.4.jar:/path/to/lib/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar:/path/to/lib/org/apache/commons/commons-jexl/2.1.1/commons-jexl-2.1.1.jar:/path/to/lib/org/tukaani/xz/1.5/xz-1.5.jar:/path/to/lib/org/xerial/snappy/snappy-java/1.0.3-rc3/snappy-java-1.0.3-rc3.jar" \ |
|
biostar236160.js -- input.vcf |
|
``` |
|
|
|
|
|
*/ |
|
|
|
/** check the number of arguments */ |
|
if( arguments.length!=1) { |
|
java.lang.System.err.println("Usage: <vcf file>"); |
|
java.lang.System.exit(-1); |
|
} |
|
/** get ArrayList and HashSet for handling genotypes and alleles */ |
|
var ArrayList= Java.type("java.util.ArrayList"); |
|
var HashSet= Java.type("java.util.HashSet"); |
|
|
|
/** random number */ |
|
var Random = Java.type("java.util.Random"); |
|
var rand = new Random(); |
|
|
|
/** get the java class htsjdk.variant.vcf.VCFFileReader https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/variant/vcf/VCFFileReader.html */ |
|
var VCFFileReader = Java.type("htsjdk.variant.vcf.VCFFileReader"); |
|
|
|
/** get a class for defining a VCF writer */ |
|
var VariantContextWriterBuilder = Java.type("htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder"); |
|
var Options = Java.type("htsjdk.variant.variantcontext.writer.Options"); |
|
|
|
/** get the java class for building variants and genotypes */ |
|
var VariantContextBuilder = Java.type("htsjdk.variant.variantcontext.VariantContextBuilder"); |
|
var GenotypeBuilder = Java.type("htsjdk.variant.variantcontext.GenotypeBuilder"); |
|
var Genotype = Java.type("htsjdk.variant.variantcontext.Genotype"); |
|
|
|
/** create a File object */ |
|
var vcfFile =new java.io.File(arguments[0]); |
|
|
|
/** create a new VCFFileReader for this file https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/variant/vcf/VCFFileReader.html#VCFFileReader-java.io.File-boolean- */ |
|
var requireIndex = false; |
|
var vcfReader = new VCFFileReader( vcfFile, requireIndex ); |
|
|
|
/** get an iterator over the variants in this vcf file https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/variant/vcf/VCFFileReader.html#iterator-- */ |
|
var iter = vcfReader.iterator(); |
|
|
|
/** create a VCF writer to stdout */ |
|
var w = new VariantContextWriterBuilder(). |
|
unsetOption(Options.INDEX_ON_THE_FLY). |
|
setOutputVCFStream(java.lang.System.out). |
|
build(); |
|
|
|
|
|
/** write header */ |
|
w.writeHeader(vcfReader.getFileHeader()); |
|
|
|
/** while there is something to read */ |
|
while(iter.hasNext()) |
|
{ |
|
/** get the next variant */ |
|
var variant = iter.next(); |
|
var newgenotypes = new ArrayList(); |
|
var ctxalleles = new HashSet(); |
|
ctxalleles.add(variant.getReference()); |
|
for(var i=0;i< variant.getNSamples();++i) |
|
{ |
|
|
|
var genotype= variant.getGenotype(i); |
|
var alleles= genotype.getAlleles(); |
|
if(alleles.isEmpty() || genotype.isNoCall()) { |
|
genotype = Genotype.createMissing(genotype.getSampleName(),1); |
|
} |
|
else if(alleles.size()>1) |
|
{ |
|
var idx=rand.nextInt(alleles.size()); |
|
var a = alleles.get(idx); |
|
ctxalleles.add(a); |
|
var alist = new ArrayList(); |
|
alist.add(a); |
|
genotype = new GenotypeBuilder(genotype.getSampleName(),alist).make(); |
|
} |
|
else |
|
{ |
|
ctxalleles.add(alleles.get(0)); |
|
} |
|
newgenotypes.add(genotype); |
|
} |
|
|
|
w.add(new VariantContextBuilder(arguments[0], |
|
variant.getContig(), |
|
variant.getStart(), |
|
variant.getEnd(), |
|
ctxalleles |
|
).genotypes(newgenotypes).make()); |
|
} |
|
|
|
/** close the iterator */ |
|
iter.close(); |
|
|
|
/** close the vcfReader */ |
|
vcfReader.close(); |
|
|
|
/** close output */ |
|
w.close(); |
|
|
if you were to work with the same problem but on the plink ped files, here's my approach after recoding alleles as 0/1/2: