]> git.donarmstrong.com Git - rsem.git/commitdiff
Refactored wiggle code and added rsem-bam2readdepth program
authorColin Dewey <cdewey@biostat.wisc.edu>
Wed, 16 Nov 2011 21:34:30 +0000 (15:34 -0600)
committerColin Dewey <cdewey@biostat.wisc.edu>
Wed, 16 Nov 2011 21:34:30 +0000 (15:34 -0600)
bam2readdepth.cpp [new file with mode: 0644]
bam2wig.cpp
makefile
wiggle.cpp [new file with mode: 0644]
wiggle.h [new file with mode: 0644]

diff --git a/bam2readdepth.cpp b/bam2readdepth.cpp
new file mode 100644 (file)
index 0000000..c7f0adb
--- /dev/null
@@ -0,0 +1,13 @@
+#include <iostream>
+#include "wiggle.h"
+
+int main(int argc, char* argv[]) {
+       if (argc != 2) {
+               printf("Usage: rsem-bam2readdepth sorted_bam_input\n");
+        std::exit(1);
+       }
+    ReadDepthWriter depth_writer(std::cout);
+    build_wiggles(argv[1], depth_writer);
+
+       return 0;
+}
index fcb86b3c562373a0a0492a4de875e6565c551b94..cb02bc2a9a6f575235dfe13bf9b581aca0852a12 100644 (file)
-#include<cstdio>
-#include<cstring>
-#include<cstdlib>
-#include<cassert>
-#include<string>
-
-#include "sam/bam.h"
-#include "sam/sam.h"
+#include <cstdio>
+#include "wiggle.h"
 
 using namespace std;
 
-samfile_t *bam_in;
-bam1_t *b;
-
-int cur_tid; //current tid;
-float *wig_arr; // wiggle array
-FILE *fo;
-
-void generateWiggle(int tid) {
-       int chr_len = bam_in->header->target_len[tid];
-       char *chr_name = bam_in->header->target_name[tid];
-       int sp, ep;
-
-       sp = ep = -1;
-       for (int i = 0; i < chr_len; i++) {
-               if (wig_arr[i] > 0) {
-                       ep = i;
-               }
-               else {
-                       if (sp < ep) {
-                               ++sp;
-                               fprintf(fo, "fixedStep chrom=%s start=%d step=1\n", chr_name, sp + 1);
-                               for (int j = sp; j <= ep; j++) fprintf(fo, "%.7g\n", wig_arr[j]);
-                       }
-                       sp = i;
-               }
-       }
-       if (sp < ep) {
-               ++sp;
-               fprintf(fo, "fixedStep chrom=%s start=%d step=1\n", chr_name, sp + 1);
-               for (int j = sp; j <= ep; j++) fprintf(fo, "%.7g\n", wig_arr[j]);
-       }
-}
-
 int main(int argc, char* argv[]) {
-       int cnt = 0;
-
        if (argc != 4) {
-               printf("Usage : rsem-bam2wig sorted_bam_input wig_output wiggle_name\n");
+               printf("Usage: rsem-bam2wig sorted_bam_input wig_output wiggle_name\n");
                exit(-1);
        }
-
-       bam_in = samopen(argv[1], "rb", NULL);
-       if (bam_in == 0) { fprintf(stderr, "Cannot open %s!\n", argv[1]); exit(-1); }
-       //assert(bam_in != 0);
-       b = bam_init1();
-
-       fo = fopen(argv[2], "w");
-       fprintf(fo, "track type=wiggle_0 name=\"%s\" description=\"%s\" visibility=full\n", argv[3], argv[3]);
-
-       cur_tid = -1;
-       wig_arr = NULL;
-       while (samread(bam_in, b) >= 0) {
-               if (b->core.tid != cur_tid) {
-                       if (cur_tid >= 0) generateWiggle(cur_tid);
-                       cur_tid = b->core.tid;
-                       size_t len = sizeof(float) * bam_in->header->target_len[cur_tid];
-                       wig_arr = (float*)realloc(wig_arr, len);
-                       memset(wig_arr, 0, len);
-               }
-
-               float w = bam_aux2f(bam_aux_get(b, "ZW"));
-               int pos = b->core.pos;
-               uint32_t *p = bam1_cigar(b);
-
-               for (int i = 0; i < (int)b->core.n_cigar; i++, ++p) {
-                       int op = *p & BAM_CIGAR_MASK;
-                       int op_len = *p >> BAM_CIGAR_SHIFT;
-
-                       switch (op) {
-                         //case BAM_CSOFT_CLIP : pos += op_len; break;
-                       case BAM_CINS : pos += op_len; break;
-                       case BAM_CMATCH :
-                               for (int j = 0; j < op_len; j++, ++pos) wig_arr[pos] += w;
-                               break;
-                       case BAM_CREF_SKIP : pos += op_len; break;
-                       default : assert(false);
-                       }
-               }
-
-               ++cnt;
-               if (cnt % 1000000 == 0) printf("%d FIN\n", cnt);
-       }
-       if (cur_tid >= 0) generateWiggle(cur_tid);
-       free(wig_arr);
-
-       samclose(bam_in);
-       bam_destroy1(b);
-
-       fclose(fo);
+    UCSCWiggleTrackWriter track_writer(argv[2], argv[3]);
+    build_wiggles(argv[1], track_writer);
 
        return 0;
 }
index 97cba95d1e8aad570e1f40d871f2c80b8a054768..c5c52711205b5de3a194ac9d461be83a789fc227 100644 (file)
--- a/makefile
+++ b/makefile
@@ -2,7 +2,7 @@ CC = g++
 #LFLAGS = -Wall -O3 -ffast-math
 CFLAGS = -Wall -c -I.
 COFLAGS = -Wall -O3 -ffast-math -c -I.
-PROGRAMS = rsem-bam2wig rsem-build-read-index rsem-run-em rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-parse-alignments rsem-preref rsem-simulate-reads rsem-run-gibbs rsem-calculate-credibility-intervals
+PROGRAMS = rsem-bam2wig rsem-bam2readdepth rsem-build-read-index rsem-run-em rsem-extract-reference-transcripts rsem-synthesis-reference-transcripts rsem-parse-alignments rsem-preref rsem-simulate-reads rsem-run-gibbs rsem-calculate-credibility-intervals
 
 
 all : build-sam $(PROGRAMS)
@@ -82,8 +82,14 @@ rsem-run-em : EM.o sam/libbam.a
 EM.o : utils.h Read.h SingleRead.h SingleReadQ.h PairedEndRead.h PairedEndReadQ.h SingleHit.h PairedEndHit.h Model.h SingleModel.h SingleQModel.h PairedEndModel.h PairedEndQModel.h Refs.h GroupInfo.h HitContainer.h ReadIndex.h ReadReader.h Orientation.h LenDist.h RSPD.h QualDist.h QProfile.h NoiseQProfile.h ModelParams.h RefSeq.h RefSeqPolicy.h PolyARules.h Profile.h NoiseProfile.h Transcript.h Transcripts.h HitWrapper.h BamWriter.h sam/bam.h sam/sam.h EM.cpp simul.h
        $(CC) $(COFLAGS) EM.cpp
 
-rsem-bam2wig : sam/bam.h sam/sam.h sam/libbam.a bam2wig.cpp
-       $(CC) -O3 -Wall bam2wig.cpp sam/libbam.a -lz -o rsem-bam2wig
+rsem-bam2wig : wiggle.h wiggle.o sam/libbam.a bam2wig.cpp
+       $(CC) -O3 -Wall bam2wig.cpp wiggle.o sam/libbam.a -lz -o $@
+
+rsem-bam2readdepth : wiggle.h wiggle.o sam/libbam.a bam2readdepth.cpp
+       $(CC) -O3 -Wall bam2readdepth.cpp wiggle.o sam/libbam.a -lz -o $@
+
+wiggle.o: sam/bam.h sam/sam.h wiggle.cpp wiggle.h
+       $(CC) $(COFLAGS) wiggle.cpp
 
 rsem-simulate-reads : simulation.o
        $(CC) -o rsem-simulate-reads simulation.o
diff --git a/wiggle.cpp b/wiggle.cpp
new file mode 100644 (file)
index 0000000..90b6f8b
--- /dev/null
@@ -0,0 +1,108 @@
+#include <cstring>
+#include <cstdlib>
+#include <cassert>
+
+#include "wiggle.h"
+
+#include "sam/bam.h"
+#include "sam/sam.h"
+
+void add_bam_record_to_wiggle(const bam1_t *b, Wiggle& wiggle) {
+    float w = bam_aux2f(bam_aux_get(b, "ZW"));
+    int pos = b->core.pos;
+    uint32_t *p = bam1_cigar(b);
+    
+    for (int i = 0; i < (int)b->core.n_cigar; i++, ++p) {
+        int op = *p & BAM_CIGAR_MASK;
+        int op_len = *p >> BAM_CIGAR_SHIFT;
+        
+        switch (op) {
+            //case BAM_CSOFT_CLIP : pos += op_len; break;
+        case BAM_CINS : pos += op_len; break;
+        case BAM_CMATCH :
+            for (int j = 0; j < op_len; j++, ++pos) {
+                wiggle.read_depth[pos] += w;
+            }
+            break;
+        case BAM_CREF_SKIP : pos += op_len; break;
+        default : assert(false);
+        }
+    }
+}
+
+void build_wiggles(const std::string& bam_filename,
+                   WiggleProcessor& processor) {
+    samfile_t *bam_in = samopen(bam_filename.c_str(), "rb", NULL);
+       if (bam_in == 0) { fprintf(stderr, "Cannot open %s!\n", bam_filename.c_str()); exit(-1); }
+       //assert(bam_in != 0);
+
+    int cur_tid = -1; //current tid;
+       int cnt = 0;
+    bam1_t *b = bam_init1();
+    Wiggle wiggle;
+       while (samread(bam_in, b) >= 0) {
+               if (b->core.tid != cur_tid) {
+                       if (cur_tid >= 0) processor.process(wiggle);
+                       cur_tid = b->core.tid;
+            wiggle.name = bam_in->header->target_name[cur_tid];
+            wiggle.read_depth.assign(bam_in->header->target_len[cur_tid], 0.0);
+               }
+        add_bam_record_to_wiggle(b, wiggle);
+               ++cnt;
+               if (cnt % 1000000 == 0) fprintf(stderr, "%d FIN\n", cnt);
+       }
+       if (cur_tid >= 0) processor.process(wiggle);
+
+       samclose(bam_in);
+       bam_destroy1(b);
+}
+
+UCSCWiggleTrackWriter::UCSCWiggleTrackWriter(const std::string& output_filename,
+                                             const std::string& track_name) {
+    fo = fopen(output_filename.c_str(), "w");
+    fprintf(fo, "track type=wiggle_0 name=\"%s\" description=\"%s\" visibility=full\n",
+            track_name.c_str(),
+            track_name.c_str());
+}
+
+UCSCWiggleTrackWriter::~UCSCWiggleTrackWriter() {
+    fclose(fo);
+}
+
+void UCSCWiggleTrackWriter::process(const Wiggle& wiggle) {
+    int sp, ep;
+    
+    sp = ep = -1;
+    for (size_t i = 0; i < wiggle.read_depth.size(); i++) {
+        if (wiggle.read_depth[i] > 0) {
+            ep = i;
+        }
+        else {
+            if (sp < ep) {
+                ++sp;
+                fprintf(fo, "fixedStep chrom=%s start=%d step=1\n", wiggle.name.c_str(), sp + 1);
+                for (int j = sp; j <= ep; j++) fprintf(fo, "%.7g\n", wiggle.read_depth[j]);
+            }
+            sp = i;
+        }
+    }
+    if (sp < ep) {
+        ++sp;
+        fprintf(fo, "fixedStep chrom=%s start=%d step=1\n", wiggle.name.c_str(), sp + 1);
+        for (int j = sp; j <= ep; j++) fprintf(fo, "%.7g\n", wiggle.read_depth[j]);
+    }
+}
+
+ReadDepthWriter::ReadDepthWriter(std::ostream& stream) 
+    : stream_(stream) {
+}
+
+void ReadDepthWriter::process(const Wiggle& wiggle) {
+    stream_ << wiggle.name << '\t'
+            << wiggle.read_depth.size() << '\t';
+    for (size_t i = 0; i < wiggle.read_depth.size(); ++i) {
+        if (i > 0) stream_ << ' ';
+        stream_ << wiggle.read_depth[i];
+    }
+    stream_ << '\n';
+}
diff --git a/wiggle.h b/wiggle.h
new file mode 100644 (file)
index 0000000..09cc4f9
--- /dev/null
+++ b/wiggle.h
@@ -0,0 +1,41 @@
+#include <cstdio>
+#include <string>
+#include <vector>
+#include <ostream>
+
+struct Wiggle {
+    std::string name;
+    std::vector<float> read_depth;
+};
+
+class WiggleProcessor {
+public:
+    virtual ~WiggleProcessor() {}
+    virtual void process(const Wiggle& wiggle) = 0;
+};
+
+class UCSCWiggleTrackWriter : public WiggleProcessor {
+public:
+    UCSCWiggleTrackWriter(const std::string& output_filename,
+                          const std::string& track_name);
+        
+    ~UCSCWiggleTrackWriter();
+
+    void process(const Wiggle& wiggle);
+
+private:
+    FILE *fo;
+};
+
+class ReadDepthWriter : public WiggleProcessor {
+public:
+    ReadDepthWriter(std::ostream& stream);
+
+    void process(const Wiggle& wiggle);
+
+private:
+    std::ostream& stream_;
+};
+
+void build_wiggles(const std::string& bam_filename,
+                   WiggleProcessor& processor);