Entering edit mode
5 months ago
Aaron
▴
10
when I use htslib to write a bam. Error of "truncated file" shows by samtools following is the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <htslib/sam.h>
int main() {
// Open BAM file for writing
htsFile *fp = hts_open("output.bam", "wb");
if (fp == NULL) {
fprintf(stderr, "Error opening file for writing.\n");
return 1;
}
// Create and populate BAM header
bam_hdr_t *hdr = bam_hdr_init();
hdr->n_targets = 1;
hdr->target_len = (uint32_t*)malloc(hdr->n_targets * sizeof(uint32_t));
hdr->target_name = (char**)malloc(hdr->n_targets * sizeof(char*));
hdr->target_name[0] = strdup("chr1");
hdr->target_len[0] = 248956422;
hdr->text = strdup("@HD\tVN:1.0\tSO:coordinate\n@SQ\tSN:chr1\tLN:248956422\n");
hdr->l_text = strlen(hdr->text);
// Write BAM header
if (sam_hdr_write(fp, hdr) < 0) {
fprintf(stderr, "Error writing header.\n");
return 1;
}
// Create and populate BAM alignment record
bam1_t *b = bam_init1();
b->core.tid = 0; // Reference sequence ID
b->core.pos = 100; // 0-based leftmost coordinate
b->core.qual = 60; // Mapping quality
b->core.l_qname = 7; // Length of the query name
b->core.flag = 0; // Bitwise flag
b->core.n_cigar = 1; // Number of CIGAR operations
b->core.l_qseq = 10; // Length of the query sequence
b->core.mtid = -1; // Mate reference sequence ID (-1 if no mate)
b->core.mpos = -1; // Mate position (-1 if no mate)
b->core.isize = 0; // Insert size
// Calculate required data length correctly
int data_len = b->core.l_qname + b->core.n_cigar * 4 + (b->core.l_qseq + 1) / 2 + b->core.l_qseq;
b->data = (uint8_t*)malloc(data_len);
b->m_data = data_len;
// Set query name
strcpy((char*)b->data, "query1");
// Set CIGAR (1 match operation)
uint32_t *cigar = (uint32_t*)(b->data + b->core.l_qname);
cigar[0] = bam_cigar_gen(b->core.l_qseq, BAM_CMATCH);
// Set query sequence (e.g., "AGCTTAGCTA")
uint8_t *seq = bam_get_seq(b);
bam_seq_seti(seq, 0, 1); // A
bam_seq_seti(seq, 1, 2); // G
bam_seq_seti(seq, 2, 1); // C
bam_seq_seti(seq, 3, 4); // T
bam_seq_seti(seq, 4, 4); // T
bam_seq_seti(seq, 5, 1); // A
bam_seq_seti(seq, 6, 2); // G
bam_seq_seti(seq, 7, 1); // C
bam_seq_seti(seq, 8, 4); // T
bam_seq_seti(seq, 9, 1); // A
// Set quality scores (dummy values here)
uint8_t *qual = bam_get_qual(b);
for (int i = 0; i < b->core.l_qseq; ++i) qual[i] = 30;
// Write BAM alignment record
if (sam_write1(fp, hdr, b) < 0) {
fprintf(stderr, "Error writing alignment.\n");
return 1;
}
// Write the EOF marker
if (bgzf_write(fp->fp.bgzf, BAM_EOF, sizeof(BAM_EOF)) != sizeof(BAM_EOF)) {
fprintf(stderr, "Error writing EOF marker.\n");
return 1;
}
// Clean up
bam_destroy1(b);
bam_hdr_destroy(hdr);
hts_close(fp);
return 0;
}
Thanks very much.
Aaron : Please accept this answer (green check mark) to provide closure to this thread.