1 // ***************************************************************************
\r
2 // BamMultiReader.h (c) 2010 Erik Garrison
\r
3 // Marth Lab, Department of Biology, Boston College
\r
4 // All rights reserved.
\r
5 // ---------------------------------------------------------------------------
\r
6 // Last modified: 22 February 2010 (EG)
\r
7 // ---------------------------------------------------------------------------
\r
8 // Functionality for simultaneously reading multiple BAM files
\r
9 // ***************************************************************************
\r
11 #ifndef BAMMULTIREADER_H
\r
12 #define BAMMULTIREADER_H
\r
17 using namespace std;
\r
19 // BamTools includes
\r
21 #include "BamReader.h"
\r
23 namespace BamTools {
\r
25 enum BamReaderState { START, END, CLOSED };
\r
27 class BamMultiReader {
\r
29 // constructor / destructor
\r
31 BamMultiReader(void);
\r
32 ~BamMultiReader(void);
\r
41 // ----------------------
\r
42 // BAM file operations
\r
43 // ----------------------
\r
47 // performs random-access jump to reference, position
\r
48 bool Jump(int refID, int position = 0);
\r
49 // opens BAM files (and optional BAM index files, if provided)
\r
50 //void Open(const vector<std::string&> filenames, const vector<std::string&> indexFilenames);
\r
51 void Open(const vector<string> filenames, bool openIndexes = true);
\r
52 // returns file pointers to beginning of alignments
\r
55 // ----------------------
\r
56 // access alignment data
\r
57 // ----------------------
\r
58 // updates the reference id marker to match the lower limit of our readers
\r
59 void UpdateReferenceID(void);
\r
61 // retrieves next available alignment (returns success/fail) from all files
\r
62 bool GetNextAlignment(BamAlignment&);
\r
63 // ... should this be private?
\r
64 bool HasOpenReaders(void);
\r
66 // ----------------------
\r
67 // access auxiliary data
\r
68 // ----------------------
\r
70 // returns unified SAM header text for all files
\r
71 const string GetHeaderText(void) const;
\r
72 // returns number of reference sequences
\r
73 const int GetReferenceCount(void) const;
\r
74 // returns vector of reference objects
\r
75 const BamTools::RefVector GetReferenceData(void) const;
\r
76 // returns reference id (used for BamMultiReader::Jump()) for the given reference name
\r
77 const int GetReferenceID(const std::string& refName) const;
\r
78 // validates that we have a congruent set of BAM files that are aligned against the same reference sequences
\r
79 void ValidateReaders() const;
\r
81 // ----------------------
\r
82 // BAM index operations
\r
83 // ----------------------
\r
85 // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai")
\r
86 bool CreateIndexes(void);
\r
88 //const int GetReferenceID(const string& refName) const;
\r
91 void PrintFilenames(void);
\r
92 void UpdateAlignments(void);
\r
95 // private implementation
\r
97 // TODO perhaps, for legibility, I should use a struct to wrap them all up
\r
98 // But this may actually make things more confusing, as I'm only
\r
99 // operating on them all simultaneously during GetNextAlignment
\r
101 // all these vectors are ordered the same
\r
102 // readers.at(N) refers to the same reader as alignments.at(N) and readerStates.at(N)
\r
103 vector<BamReader*> readers; // the set of readers which we operate on
\r
104 vector<BamAlignment*> alignments; // the equivalent set of alignments we use to step through the files
\r
105 vector<BamReaderState> readerStates; // states of the various readers
\r
106 // alignment position?
\r
107 vector<string> fileNames;
\r
110 } // namespace BamTools
\r
112 #endif // BAMMULTIREADER_H
\r