X-Git-Url: https://git.donarmstrong.com/?p=rsem.git;a=blobdiff_plain;f=wiggle.cpp;h=00dcce8d431edc2c38c0891d17b8085dbcbac25b;hp=19f52b480a00dcd991b30b583fce2ce0177a628b;hb=HEAD;hpb=946f9a6adb2a82048c8453d44693cd3838d32939 diff --git a/wiggle.cpp b/wiggle.cpp index 19f52b4..00dcce8 100644 --- a/wiggle.cpp +++ b/wiggle.cpp @@ -1,16 +1,27 @@ #include #include #include +#include #include #include "sam/bam.h" #include "sam/sam.h" +#include "utils.h" #include "wiggle.h" +bool no_fractional_weight = false; + void add_bam_record_to_wiggle(const bam1_t *b, Wiggle& wiggle) { - uint8_t *p_tag = bam_aux_get(b, "ZW"); - float w = (p_tag != NULL ? bam_aux2f(p_tag) : 1.0); + double w; + + if (no_fractional_weight) w = 1.0; + else { + uint8_t *p_tag = bam_aux_get(b, "ZW"); + if (p_tag == NULL) return; + w = bam_aux2f(p_tag); + } + int pos = b->core.pos; uint32_t *p = bam1_cigar(b); @@ -36,29 +47,42 @@ void build_wiggles(const std::string& bam_filename, WiggleProcessor& processor) { samfile_t *bam_in = samopen(bam_filename.c_str(), "rb", NULL); if (bam_in == 0) { fprintf(stderr, "Cannot open %s!\n", bam_filename.c_str()); exit(-1); } - //assert(bam_in != 0); - int cur_tid = -1; //current tid; - int cnt = 0; - bam1_t *b = bam_init1(); - Wiggle wiggle; + bam_header_t *header = bam_in->header; + bool *used = new bool[header->n_targets]; + memset(used, 0, sizeof(bool) * header->n_targets); + + int cur_tid = -1; //current tid; + HIT_INT_TYPE cnt = 0; + bam1_t *b = bam_init1(); + Wiggle wiggle; while (samread(bam_in, b) >= 0) { if (b->core.flag & 0x0004) continue; if (b->core.tid != cur_tid) { - if (cur_tid >= 0) processor.process(wiggle); + if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); } cur_tid = b->core.tid; - wiggle.name = bam_in->header->target_name[cur_tid]; - wiggle.read_depth.assign(bam_in->header->target_len[cur_tid], 0.0); + wiggle.name = header->target_name[cur_tid]; + wiggle.length = header->target_len[cur_tid]; + wiggle.read_depth.assign(wiggle.length, 0.0); } - add_bam_record_to_wiggle(b, wiggle); + add_bam_record_to_wiggle(b, wiggle); ++cnt; - if (cnt % 1000000 == 0) fprintf(stderr, "%d FIN\n", cnt); + if (cnt % 1000000 == 0) std::cout<< cnt<< std::endl; } - if (cur_tid >= 0) processor.process(wiggle); + if (cur_tid >= 0) { used[cur_tid] = true; processor.process(wiggle); } + + for (int32_t i = 0; i < header->n_targets; i++) + if (!used[i]) { + wiggle.name = header->target_name[i]; + wiggle.length = header->target_len[i]; + wiggle.read_depth.clear(); + processor.process(wiggle); + } samclose(bam_in); bam_destroy1(b); + delete[] used; } UCSCWiggleTrackWriter::UCSCWiggleTrackWriter(const std::string& output_filename, @@ -75,17 +99,19 @@ UCSCWiggleTrackWriter::~UCSCWiggleTrackWriter() { void UCSCWiggleTrackWriter::process(const Wiggle& wiggle) { int sp, ep; + + if (wiggle.read_depth.empty()) return; sp = ep = -1; - for (size_t i = 0; i < wiggle.read_depth.size(); i++) { - if (wiggle.read_depth[i] > 0) { + for (size_t i = 0; i < wiggle.length; i++) { + if (wiggle.read_depth[i] >= 0.0095) { ep = i; } else { if (sp < ep) { ++sp; fprintf(fo, "fixedStep chrom=%s start=%d step=1\n", wiggle.name.c_str(), sp + 1); - for (int j = sp; j <= ep; j++) fprintf(fo, "%.7g\n", wiggle.read_depth[j]); + for (int j = sp; j <= ep; j++) fprintf(fo, "%.2f\n", wiggle.read_depth[j]); } sp = i; } @@ -93,7 +119,7 @@ void UCSCWiggleTrackWriter::process(const Wiggle& wiggle) { if (sp < ep) { ++sp; fprintf(fo, "fixedStep chrom=%s start=%d step=1\n", wiggle.name.c_str(), sp + 1); - for (int j = sp; j <= ep; j++) fprintf(fo, "%.7g\n", wiggle.read_depth[j]); + for (int j = sp; j <= ep; j++) fprintf(fo, "%.2f\n", wiggle.read_depth[j]); } } @@ -102,9 +128,13 @@ ReadDepthWriter::ReadDepthWriter(std::ostream& stream) } void ReadDepthWriter::process(const Wiggle& wiggle) { + stream_ << wiggle.name << '\t' - << wiggle.read_depth.size() << '\t'; - for (size_t i = 0; i < wiggle.read_depth.size(); ++i) { + << wiggle.length << '\t'; + + if (wiggle.read_depth.empty()) { stream_ << "NA\n"; return; } + + for (size_t i = 0; i < wiggle.length; ++i) { if (i > 0) stream_ << ' '; stream_ << wiggle.read_depth[i]; }