1 // ***************************************************************************
\r
2 // BamMultiReader.h (c) 2010 Erik Garrison
\r
3 // Marth Lab, Department of Biology, Boston College
\r
4 // All rights reserved.
\r
5 // ---------------------------------------------------------------------------
\r
6 // Last modified: 2 September 2010 (DB)
\r
7 // ---------------------------------------------------------------------------
\r
8 // Functionality for simultaneously reading multiple BAM files
\r
9 // ***************************************************************************
\r
11 #ifndef BAMMULTIREADER_H
\r
12 #define BAMMULTIREADER_H
\r
17 #include <utility> // for pair
\r
20 using namespace std;
\r
22 // BamTools includes
\r
24 #include "BamReader.h"
\r
26 namespace BamTools {
\r
28 // index mapping reference/position pairings to bamreaders and their alignments
\r
29 typedef multimap<pair<int, int>, pair<BamReader*, BamAlignment*> > AlignmentIndex;
\r
32 class BamMultiReader {
\r
34 // constructor / destructor
\r
36 BamMultiReader(void);
\r
37 ~BamMultiReader(void);
\r
46 // region under analysis, specified using SetRegion
\r
49 // ----------------------
\r
50 // BAM file operations
\r
51 // ----------------------
\r
56 // opens BAM files (and optional BAM index files, if provided)
\r
57 // @openIndexes - triggers index opening, useful for suppressing
\r
58 // error messages during merging of files in which we may not have
\r
60 // @coreMode - setup our first alignments using GetNextAlignmentCore();
\r
61 // also useful for merging
\r
62 // @useDefaultIndex - look for default BAM index ".bai" first. If false,
\r
63 // or if ".bai" does not exist, will look for BamTools index ".bti". If
\r
64 // neither exist, will open without an index
\r
65 bool Open(const vector<string> filenames, bool openIndexes = true, bool coreMode = false, bool useDefaultIndex = true);
\r
67 // returns whether underlying BAM readers ALL have an index loaded
\r
68 // this is useful to indicate whether Jump() or SetRegion() are possible
\r
69 bool IsIndexLoaded(void) const;
\r
71 // performs random-access jump to reference, position
\r
72 bool Jump(int refID, int position = 0);
\r
74 // sets the target region
\r
75 bool SetRegion(const BamRegion& region);
\r
76 bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above
\r
78 // returns file pointers to beginning of alignments
\r
81 // ----------------------
\r
82 // access alignment data
\r
83 // ----------------------
\r
84 // updates the reference id marker to match the lower limit of our readers
\r
85 void UpdateReferenceID(void);
\r
87 // retrieves next available alignment (returns success/fail) from all files
\r
88 bool GetNextAlignment(BamAlignment&);
\r
89 // retrieves next available alignment (returns success/fail) from all files
\r
90 // and populates the support data with information about the alignment
\r
91 // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT
\r
92 bool GetNextAlignmentCore(BamAlignment&);
\r
93 // ... should this be private?
\r
94 bool HasOpenReaders(void);
\r
96 // ----------------------
\r
97 // access auxiliary data
\r
98 // ----------------------
\r
100 // returns unified SAM header text for all files
\r
101 const string GetHeaderText(void) const;
\r
102 // returns number of reference sequences
\r
103 const int GetReferenceCount(void) const;
\r
104 // returns vector of reference objects
\r
105 const BamTools::RefVector GetReferenceData(void) const;
\r
106 // returns reference id (used for BamMultiReader::Jump()) for the given reference name
\r
107 const int GetReferenceID(const std::string& refName) const;
\r
108 // validates that we have a congruent set of BAM files that are aligned against the same reference sequences
\r
109 void ValidateReaders() const;
\r
111 // ----------------------
\r
112 // BAM index operations
\r
113 // ----------------------
\r
115 // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai")
\r
116 bool CreateIndexes(bool useDefaultIndex = true);
\r
118 //const int GetReferenceID(const string& refName) const;
\r
121 void PrintFilenames(void);
\r
122 void DumpAlignmentIndex(void);
\r
123 void UpdateAlignments(void); // updates our alignment cache
\r
125 // private implementation
\r
128 // the set of readers and alignments which we operate on, maintained throughout the life of this class
\r
129 vector<pair<BamReader*, BamAlignment*> > readers;
\r
131 // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment
\r
132 // when a reader reaches EOF, its entry is removed from this index
\r
133 AlignmentIndex alignments;
\r
135 vector<string> fileNames;
\r
138 } // namespace BamTools
\r
140 #endif // BAMMULTIREADER_H
\r