Here is the C code in samtools:
typedef struct {
long long n_reads[2], n_mapped[2], n_pair_all[2], n_pair_map[2], n_pair_good[2];
long long n_sgltn[2], n_read1[2], n_read2[2];
long long n_dup[2];
long long n_diffchr[2], n_diffhigh[2];
} bam_flagstat_t;
(...)
printf("%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
printf("%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
printf("%lld + %lld mapped (%.2f%%:%.2f%%)\n", s->n_mapped[0], s->n_mapped[1], (float)s->n_mapped[0] / s->n_reads[0] * 100.0, (float)s->n_mapped[1] / s->n_reads[1] * 100.0);
printf("%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
printf("%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
printf("%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
printf("%lld + %lld properly paired (%.2f%%:%.2f%%)\n", s->n_pair_good[0], s->n_pair_good[1], (float)s->n_pair_good[0] / s->n_pair_all[0] * 100.0, (float)s->n_pair_good[1] / s->n_pair_all[1] * 100.0);
printf("%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]);
printf("%lld + %lld singletons (%.2f%%:%.2f%%)\n", s->n_sgltn[0], s->n_sgltn[1], (float)s->n_sgltn[0] / s->n_pair_all[0] * 100.0, (float)s->n_sgltn[1] / s->n_pair_all[1] * 100.0);
printf("%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]);
printf("%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]);
(...)
++(s)->n_reads[w]; \
if ((c)->flag & BAM_FPAIRED) { \
++(s)->n_pair_all[w]; \
if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good[w]; \
if ((c)->flag & BAM_FREAD1) ++(s)->n_read1[w]; \
if ((c)->flag & BAM_FREAD2) ++(s)->n_read2[w]; \
if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn[w]; \
if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \
++(s)->n_pair_map[w]; \
if ((c)->mtid != (c)->tid) { \
++(s)->n_diffchr[w]; \
if ((c)->qual >= 5) ++(s)->n_diffhigh[w]; \
}
[2]
is an array of two elements storing the number of reads and the number of 'QC-failed reads'.
- 'NAN' means 'Not A Number' (e.g: div by 0)
n_reads
are the total number of reads
n_pair_all
: the read is paired in sequencing, no matter whether it is mapped in a pair
n_pair_good
: the read is mapped in a proper pair
n_read1
: count read1
n_read2
: count read2
n_sgltn
: the read itself is unmapped the mate is mapped
n_pair_map
: the read itself is mapped the mate is unmapped
n_diffchr
: number of reads with a mate mapped on a different chromosome
n_diffhigh
: number of reads with a mate on a different chromosome having a quality greater than 5
for more information, see the spec
There are question marks about your stats for me. Your number of forward and reverse reads are very disparate and only a tiny fraction of your reads map properly in pairs.