Hi,
I have these 2 dummy scripts to get my hand on nextflow pipeline's creation. \
main.nf
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
// Include in workflow
include {
ECHO;
WRITE_STDOUT
} from "./modules.nf"
// Parameter initialization
if (!params.samples){params.samplesList = ["s1", "s2"]}
params.samplesList = params.samples?.split(',') as List
params.fastqdir = "./data/fastqs"
params.index = ["1", "2"]
workflow {
Channel
.fromPath(params.fastqdir)
.set { fastqdir_ch }
Channel
.fromList(params.samplesList)
.set {sampleList_ch}
Channel
.fromList(params.index)
.set {readIndex_ch}
full_fq_paths_ch = ECHO(
sampleList_ch
.combine(readIndex_ch)
.combine(fastqdir_ch)
.view()
)
merge_txt_ch = WRITE_STDOUT(
full_fq_paths_ch
.groupTuple(by: 0)
)
}
and modules.nf
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
process ECHO {
input:
tuple(
val(sample_id),
val(read_id),
path(fastq_prefix)
)
output:
tuple(
val("${sample_id}"),
path("${sample_id}_out.${read_id}.txt")
)
script:
"""
echo "${fastq_prefix}/${sample_id}_${read_id}.fastq.gz" >> "${sample_id}_out.${read_id}.txt"
"""
}
process WRITE_STDOUT {
publishDir './data/fastqc', mode: 'copy'
input:
tuple val(sample_id), path(fastq_paths)
output:
path "${sample_id}/${sample_id}.output.txt"
script:
"""
mkdir ${sample_id}
cat ${fastq_paths.join(" ")} >> "${sample_id}/${sample_id}.output.txt"
"""
}
The only goal is to test the pipeline. For now it is working by running for example: \
nextflow run ./scripts/main.nf --samples s1,s2,s3
I would like to go one step further and dynamicaly disable processes (without using specific flags) if all outputs are already present in one particular directory (not the nextflow working directory). Following the workflow I give, this would mean not to launch the ECHO
process if all text files have been created for all samples (s1_out.1.txt
, s2_out.1.txt
, s3_out.1.txt
etc...)
I am aware of the -resume
method but this only work if the pipeline has been launched before.
Is it possible to do this and how?
Thank you very much
For whatt I understand, one of the strength of nextflow is to work remotly with symlinks. I know that what I ask is outside of the normal usecases of nextflow.
Thank you very much for your insight