Entering edit mode
9 months ago
Đặng Hải Đăng
•
0
Hello everybody.
Here is my new code:
params.OUTDIR = "/data/data_dang/cfDNA_lowdepth"
params.deduplicated = "${params.OUTDIR}/*.deduplicated.bam"
params.sortedDir = "${params.OUTDIR}/sorted"
params.binDir = "${params.OUTDIR}/6bin"
params.txtDir = "${params.OUTDIR}/6txt"
params.pythonPath = "/conda/env_dangdang/anaconda3/envs/python/bin"
params.samtoolsPath = "/conda/env_dangdang/anaconda3/envs/samtools/bin"
deduplicated_ch = Channel.fromPath(params.deduplicated)
process sorted_bam {
executor 'local'
cache "deep"
tag "sorted_bam"
storeDir params.sortedDir
errorStrategy 'retry'
maxRetries 1
cpus 4
input:
path deduplicated_file from deduplicated_ch
output:
set file("${deduplicated_file.baseName.replaceAll('.deduplicated', '')}.sorted.bam"),
file("${deduplicated_file.baseName.replaceAll('.deduplicated', '')}.sorted.bam.bai"),
val deduplicated_file.baseName.replaceAll('.deduplicated', '') into sorted_ch #this line is total wrong!
script:
"""
export PATH=/conda/env_dangdang/anaconda3/envs/samtools/bin:\$PATH
samtools sort -@ 4 -o ${deduplicated_file.baseName.replaceAll('.deduplicated', '')}.sorted.bam ${deduplicated_file}
samtools index ${deduplicated_file.baseName.replaceAll('.deduplicated', '')}.sorted.bam
"""
}
def generateBins() {
def num_bins = [249, 243, 198, 191, 180, 171, 159, 146, 141, 135, 135, 133, 115, 107, 102, 90, 81, 78, 59, 63, 48, 51]
def bins = [:]
(1..22).each { chr ->
def num_chr_bins = num_bins[chr - 1]
(0..<num_chr_bins).each { bin_index ->
def start = 1 + bin_index * 1000000
def end = start + 999999
def bin_id = "chr${chr}_${start}_${end}"
bins.put(bin_id, [chr: chr, start: start, end: end])
}
}
return bins
}
def binChannel = generateBins()
// binChannel.each { binId, binInfo ->
// println("Bin ID: $binId, Chromosome: ${binInfo.chr}, Start: ${binInfo.start}, End: ${binInfo.end}")
// }
process processBam {
executor 'local'
cache "deep"
storeDir params.binDir
errorStrategy 'retry'
maxRetries 1
cpus 4
input:
set bin_id, chr, start, end from binChannel.collect { binId, binInfo ->
tuple(binId, binInfo.chr, binInfo.start, binInfo.end)
}
set sorted_bam, bai_file, sample_id from sorted_ch
output:
set file("${sorted_bam.baseName.replaceAll('.sorted', '')}_${chr}_${start}_${end}.txt") into bin_ch
script:
"""
export PATH=${params.samtoolsPath}:$PATH
for (bin_id, sample_id) in bin_id.collectMany { id -> sample_id.collect { bam -> [id, bam] } } {
samtools view -h ${sorted_bam} ${chr}:${start}-${end} | awk -v OFS='\t' '
BEGIN {
prev = ""
prevline = ""
}
{
if (\$0 ~ "^@") {
print \$0
next
}
if (\$1 == prev) {
print prevline
print \$0
} else {
prev = \$1
prevline = \$0
}
}' > "${sorted_bam.baseName.replaceAll('.sorted', '')}_${chr}_${start}_${end}.final.sam"
samtools view "${sorted_bam.baseName.replaceAll('.sorted', '')}_${chr}_${start}_${end}.final.sam" | awk -v OFS='\t' '{
for (i = 12; i <= NF; i++) {
if (\$i ~ /^XM:Z:/) {
tag = substr(\$i, 6);
if (index(tag, "z") > 0) {
two_hot = 2;
} else if (index(tag, "Z") > 0) {
two_hot = 0;
} else {
two_hot = 1;
}
print \$1, tag, two_hot;
}
}
}' > "${sorted_bam.baseName.replaceAll('.sorted', '')}_${chr}_${start}_${end}.txt"}
"""
}
process processTXT {
executor 'local'
cache "deep"
tag "processTXT"
storeDir params.txtDir
errorStrategy 'retry'
maxRetries 1
cpus 4
input:
set txt_path from bin_ch
output:
set file("${txt_path.baseName}.txt") into txt_ch
script:
"""
export PATH=${params.pythonPath}:$PATH
python /data/data_dang/cfDNA_lowdepth/src/0000.py ${txt_path} ${txt_path.baseName}.txt
"""
}
How to use bai_file
/sample_id
(cardinality 10) and bin_id
(cardinality 2875) wisely? I need your help. Thanks a real lot.
(Updated).
params.OUTDIR = "/data/data_dang/cfDNA_lowdepth"
params.deduplicated = "${params.OUTDIR}/*.deduplicated.bam"
params.sortedDir = "${params.OUTDIR}/sorted"
params.binDir = "${params.OUTDIR}/6bin"
params.txtDir = "${params.OUTDIR}/6txt"
params.pythonPath = "/conda/env_dangdang/anaconda3/envs/python/bin"
params.samtoolsPath = "/conda/env_dangdang/anaconda3/envs/samtools/bin"
deduplicated_ch = Channel.fromPath(params.deduplicated)
process sorted_bam {
executor 'local'
cache "deep"
tag "sorted_bam"
storeDir params.sortedDir
errorStrategy 'retry'
maxRetries 1
cpus 4
input:
path deduplicated_file from deduplicated_ch
output:
set file("${deduplicated_file.baseName.replaceAll('.deduplicated', '')}.sorted.bam"),
file("${deduplicated_file.baseName.replaceAll('.deduplicated', '')}.sorted.bam.bai") into sorted_ch
script:
"""
export PATH=/conda/env_dangdang/anaconda3/envs/samtools/bin:\$PATH
samtools sort -@ 4 -o ${deduplicated_file.baseName.replaceAll('.deduplicated', '')}.sorted.bam ${deduplicated_file}
samtools index ${deduplicated_file.baseName.replaceAll('.deduplicated', '')}.sorted.bam
"""
}
def generateBins() {
def num_bins = [249, 243, 198, 191, 180, 171, 159, 146, 141, 135, 135, 133, 115, 107, 102, 90, 81, 78, 59, 63, 48, 51]
def bins = [:]
(1..22).each { chr ->
def num_chr_bins = num_bins[chr - 1]
(0..<num_chr_bins).each { bin_index ->
def start = 1 + bin_index * 1000000
def end = start + 999999
def bin_id = "chr${chr}_${start}_${end}"
bins.put(bin_id, [chr: chr, start: start, end: end])
}
}
return bins
}
def binChannel = generateBins()
// binChannel.each { binId, binInfo ->
// println("Bin ID: $binId, Chromosome: ${binInfo.chr}, Start: ${binInfo.start}, End: ${binInfo.end}")
// }
process processBam {
executor 'local'
cache "deep"
storeDir params.binDir
errorStrategy 'retry'
maxRetries 1
cpus 4
input:
set bin_id, chr, start, end from binChannel.collect { binId, binInfo ->
tuple(binId, binInfo.chr, binInfo.start, binInfo.end)
}
set sorted_bam, bai_file from sorted_ch
output:
set file("${sorted_bam.baseName.replaceAll('.sorted', '')}_${chr}_${start}_${end}.txt") into bin_ch
script:
"""
export PATH=${params.samtoolsPath}:$PATH
for i in {1..2875}; do
samtools view -h ${sorted_bam} ${chr}:${start}-${end} | awk -v OFS='\t' '
BEGIN {
prev = ""
prevline = ""
}
{
if (\$0 ~ "^@") {
print \$0
next
}
if (\$1 == prev) {
print prevline
print \$0
} else {
prev = \$1
prevline = \$0
}
}' > "${sorted_bam.baseName.replaceAll('.sorted', '')}_${chr}_${start}_${end}.final.sam"
done
samtools view "${sorted_bam.baseName.replaceAll('.sorted', '')}_${chr}_${start}_${end}.final.sam" | awk -v OFS='\t' '{
for (i = 12; i <= NF; i++) {
if (\$i ~ /^XM:Z:/) {
tag = substr(\$i, 6);
if (index(tag, "z") > 0) {
two_hot = 2;
} else if (index(tag, "Z") > 0) {
two_hot = 0;
} else {
two_hot = 1;
}
print \$1, tag, two_hot;
}
}
}' > "${sorted_bam.baseName.replaceAll('.sorted', '')}_${chr}_${start}_${end}.txt"
"""
}
process processTXT {
executor 'local'
cache "deep"
tag "processTXT"
storeDir params.txtDir
errorStrategy 'retry'
maxRetries 1
cpus 4
input:
set txt_path from bin_ch
output:
set file("${txt_path.baseName}.txt") into txt_ch
script:
"""
export PATH=${params.pythonPath}:$PATH
python /data/data_dang/cfDNA_lowdepth/src/0000.py ${txt_path} ${txt_path.baseName}.txt
"""
}
Running 2875 times is failed.