1 // ***************************************************************************
2 // BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 1 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Convenience class for reading multiple BAM files.
9 // This functionality allows applications to work on very large sets of files
10 // without requiring intermediate merge, sort, and index steps for each file
11 // subset. It also improves the performance of our merge system as it
12 // precludes the need to sort merged files.
13 // ***************************************************************************
15 #include <api/BamMultiReader.h>
16 #include <api/internal/BamMultiReader_p.h>
17 using namespace BamTools;
23 /*! \class BamTools::BamReader
24 \brief Convenience class for reading multiple BAM files.
27 /*! \fn BamMultiReader::BamMultiReader(void)
30 BamMultiReader::BamMultiReader(void)
31 : d(new Internal::BamMultiReaderPrivate)
34 /*! \fn BamMultiReader::~BamMultiReader(void)
37 BamMultiReader::~BamMultiReader(void) {
42 /*! \fn void BamMultiReader::Close(void)
43 \brief Closes all open BAM files.
45 Also clears out all header and reference data.
47 \sa CloseFile(), IsOpen(), Open(), BamReader::Close()
49 void BamMultiReader::Close(void) {
53 /*! \fn void BamMultiReader::CloseFile(const std::string& filename)
54 \brief Closes requested BAM file.
56 Leaves any other file(s) open, along with header and reference data.
58 \sa Close(), IsOpen(), Open(), BamReader::Close()
60 void BamMultiReader::CloseFile(const std::string& filename) {
61 d->CloseFile(filename);
64 /*! \fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)
65 \brief Creates index files for the current BAM files.
67 \param type file format to create, see BamIndex::IndexType for available formats
68 \return \c true if index files created OK
69 \sa LocateIndexes(), OpenIndexes(), BamReader::CreateIndex()
71 bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type) {
72 return d->CreateIndexes(type);
75 /*! \fn const std::vector<std::string> BamMultiReader::Filenames(void) const
76 \brief Returns list of filenames for all open BAM files.
78 Retrieved filenames will contain whatever was passed via Open().
79 If you need full directory paths here, be sure to include them
80 when you open the BAM files.
82 \returns names of open BAM files. If no files are open, returns an empty vector.
83 \sa IsOpen(), BamReader::GetFilename()
85 const std::vector<std::string> BamMultiReader::Filenames(void) const {
86 return d->Filenames();
89 /*! \fn SamHeader BamMultiReader::GetHeader(void) const
90 \brief Returns unified SAM-format header for all files
92 N.B. - Modifying the retrieved text does NOT affect the current
93 BAM files. Thesse file have been opened in a read-only mode. However,
94 your modified header text can be used in conjunction with BamWriter
95 to generate a new BAM file with the appropriate header information.
97 \returns header data wrapped in SamHeader object
98 \sa GetHeaderText(), BamReader::GetHeader()
100 SamHeader BamMultiReader::GetHeader(void) const {
101 return d->GetHeader();
104 /*! \fn std::string BamMultiReader::GetHeaderText(void) const
105 \brief Returns unified SAM-format header text for all files
107 N.B. - Modifying the retrieved text does NOT affect the current
108 BAM files. Thesse file have been opened in a read-only mode. However,
109 your modified header text can be used in conjunction with BamWriter
110 to generate a new BAM file with the appropriate header information.
112 \returns SAM-formatted header text
113 \sa GetHeader(), BamReader::GetHeaderText()
115 std::string BamMultiReader::GetHeaderText(void) const {
116 return d->GetHeaderText();
119 /*! \fn bool BamMultiReader::GetNextAlignment(BamAlignment& alignment)
120 \brief Retrieves next available alignment.
122 Equivalent to BamReader::GetNextAlignment() with respect to what is a valid
123 overlapping alignment and what data gets populated.
125 This method takes care of determining which alignment actually is 'next'
126 across multiple files, depending on current SortOrder.
128 \param alignment destination for alignment record data
129 \returns \c true if a valid alignment was found
130 \sa GetNextAlignmentCore(), SetRegion(), SetSortOrder(), BamReader::GetNextAlignment()
132 bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
133 return d->GetNextAlignment(nextAlignment);
136 /*! \fn bool BamMultiReader::GetNextAlignmentCore(BamAlignment& alignment)
137 \brief Retrieves next available alignment.
139 Equivalent to BamReader::GetNextAlignmentCore() with respect to what is a valid
140 overlapping alignment and what data gets populated.
142 This method takes care of determining which alignment actually is 'next'
143 across multiple files, depending on current SortOrder.
145 \param alignment destination for alignment record data
146 \returns \c true if a valid alignment was found
147 \sa GetNextAlignment(), SetRegion(), SetSortOrder(), BamReader::GetNextAlignmentCore()
149 bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
150 return d->GetNextAlignmentCore(nextAlignment);
153 /*! \fn int BamMultiReader::GetReferenceCount(void) const
154 \brief Returns number of reference sequences.
155 \sa BamReader::GetReferenceCount()
157 int BamMultiReader::GetReferenceCount(void) const {
158 return d->GetReferenceCount();
161 /*! \fn const RefVector& BamMultiReader::GetReferenceData(void) const
162 \brief Returns all reference sequence entries.
163 \sa RefData, BamReader::GetReferenceData()
165 const BamTools::RefVector BamMultiReader::GetReferenceData(void) const {
166 return d->GetReferenceData();
169 /*! \fn int BamMultiReader::GetReferenceID(const std::string& refName) const
170 \brief Returns the ID of the reference with this name.
172 If \a refName is not found, returns -1.
174 \sa BamReader::GetReferenceID()
176 int BamMultiReader::GetReferenceID(const std::string& refName) const {
177 return d->GetReferenceID(refName);
180 /*! \fn bool BamMultiReader::HasIndexes(void) const
181 \brief Returns \c true if all BAM files have index data available.
182 \sa BamReader::HasIndex()
184 bool BamMultiReader::HasIndexes(void) const {
185 return d->HasIndexes();
188 /*! \fn bool BamMultiReader::HasOpenReaders(void) const
189 \brief Returns \c true if there are any open BAM files.
191 bool BamMultiReader::HasOpenReaders(void) const {
192 return d->HasOpenReaders();
195 /*! \fn bool BamMultiReader::Jump(int refID, int position)
196 \brief Performs a random-access jump within current BAM files.
198 This is a convenience method, equivalent to calling SetRegion()
199 with only a left boundary specified.
201 \returns \c true if jump was successful
202 \sa HasIndex(), BamReader::Jump()
205 bool BamMultiReader::Jump(int refID, int position) {
206 return d->Jump(refID, position);
209 /*! \fn bool BamMultiReader::LocateIndexes(const BamIndex::IndexType& preferredType)
210 \brief Looks for index files that match current BAM files.
212 Use this function when you need index files, and perhaps have a
213 preferred index format, but do not depend heavily on which indexes
214 actually get loaded at runtime.
216 For each BAM file, this function will defer to your \a preferredType
217 whenever possible. However, if an index file of \a preferredType can
218 not be found, then it will look for any other index file that matches
221 An example case would look this:
224 BamMultiReader reader;
227 // ensure that all files have an index
228 if ( !reader.LocateIndexes() ) // opens any existing index files that match our BAM files
229 reader.CreateIndexes(); // creates index files for BAM files that still lack one
231 // do interesting stuff
236 If you want precise control over which index files are loaded, use OpenIndexes()
237 with the desired index filenames. If that function returns false, you can use
238 CreateIndexes() to then build index files of the exact requested format.
240 \param preferredType desired index file format, see BamIndex::IndexType for available formats
241 \returns \c true if index files could be found for \b ALL open BAM files
242 \sa BamReader::LocateIndex()
244 bool BamMultiReader::LocateIndexes(const BamIndex::IndexType& preferredType) {
245 return d->LocateIndexes(preferredType);
248 /*! \fn bool BamMultiReader::Open(const std::vector<std::string>& filenames)
249 \brief Opens BAM files.
251 N.B. - Opening BAM files will invalidate any current region set on the multireader.
252 All file pointers will be returned to the beginning of the alignment data.
253 Follow this with Jump() or SetRegion() to establish a region of interest.
255 \param filenames list of BAM filenames to open
256 \returns \c true if BAM files were opened successfully
257 \sa Close(), HasOpenReaders(), OpenFile(), OpenIndexes(), BamReader::Open()
259 bool BamMultiReader::Open(const std::vector<std::string>& filenames) {
260 return d->Open(filenames);
263 /*! \fn bool BamMultiReader::OpenFile(const std::string& filename)
264 \brief Opens a single BAM file.
266 Adds another BAM file to multireader "on-the-fly".
268 N.B. - Opening a BAM file invalidates any current region set on the multireader.
269 All file pointers will be returned to the beginning of the alignment data.
270 Follow this with Jump() or SetRegion() to establish a region of interest.
272 \param filename BAM filename to open
273 \returns \c true if BAM file was opened successfully
274 \sa Close(), HasOpenReaders(), Open(), OpenIndexes(), BamReader::Open()
276 bool BamMultiReader::OpenFile(const std::string& filename) {
277 return d->OpenFile(filename);
280 /*! \fn bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames)
281 \brief Opens index files for current BAM files.
283 N.B. - Currently assumes that index filenames match the order (and number) of
284 BAM files passed to Open().
286 \param indexFilenames list of BAM index file names
287 \returns \c true if BAM index file was opened & data loaded successfully
288 \sa LocateIndex(), Open(), SetIndex(), BamReader::OpenIndex()
290 bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames) {
291 return d->OpenIndexes(indexFilenames);
294 /*! \fn bool BamMultiReader::Rewind(void)
295 \brief Returns the internal file pointers to the beginning of alignment records.
297 Useful for performing multiple sequential passes through BAM files.
298 Calling this function clears any prior region that may have been set.
300 \returns \c true if rewind operation was successful
301 \sa Jump(), SetRegion(), BamReader::Rewind()
303 bool BamMultiReader::Rewind(void) {
307 /*! \fn void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)
308 \brief Changes the caching behavior of the index data.
310 Default mode is BamIndex::LimitedIndexCaching.
312 \param mode desired cache mode for index, see BamIndex::IndexCacheMode for
313 description of the available cache modes
314 \sa HasIndex(), BamReader::SetIndexCacheMode()
316 void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {
317 d->SetIndexCacheMode(mode);
320 /*! \fn bool BamMultiReader::SetRegion(const BamRegion& region)
321 \brief Sets a target region of interest
323 Equivalent to calling BamReader::SetRegion() on all open BAM files.
325 \param region desired region-of-interest to activate
326 \returns \c true if ALL readers set the region successfully
327 \sa HasIndexes(), Jump(), BamReader::SetRegion()
329 bool BamMultiReader::SetRegion(const BamRegion& region) {
330 return d->SetRegion(region);
333 /*! \fn bool BamMultiReader::SetRegion(const int& leftRefID,
334 const int& leftPosition,
335 const int& rightRefID,
336 const int& rightPosition)
337 \brief Sets a target region of interest
339 This is an overloaded function.
341 Equivalent to calling BamReader::SetRegion() on all open BAM files.
343 \param leftRefID referenceID of region's left boundary
344 \param leftPosition position of region's left boundary
345 \param rightRefID reference ID of region's right boundary
346 \param rightPosition position of region's right boundary
348 \returns \c true if ALL readers set the region successfully
349 \sa HasIndexes(), Jump(), BamReader::SetRegion()
351 bool BamMultiReader::SetRegion(const int& leftRefID,
352 const int& leftPosition,
353 const int& rightRefID,
354 const int& rightPosition)
356 BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);
357 return d->SetRegion(region);