1 // ***************************************************************************
2 // BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 15 March 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Convenience class for reading multiple BAM files.
9 // This functionality allows applications to work on very large sets of files
10 // without requiring intermediate merge, sort, and index steps for each file
11 // subset. It also improves the performance of our merge system as it
12 // precludes the need to sort merged files.
13 // ***************************************************************************
15 #include <api/BamMultiReader.h>
16 #include <api/internal/BamMultiReader_p.h>
17 using namespace BamTools;
23 /*! \class BamTools::BamReader
24 \brief Convenience class for reading multiple BAM files.
27 /*! \fn BamMultiReader::BamMultiReader(void)
30 BamMultiReader::BamMultiReader(void)
31 : d(new Internal::BamMultiReaderPrivate)
34 /*! \fn BamMultiReader::~BamMultiReader(void)
37 BamMultiReader::~BamMultiReader(void) {
42 /*! \fn void BamMultiReader::Close(void)
43 \brief Closes all open BAM files.
45 Also clears out all header and reference data.
47 \sa CloseFile(), IsOpen(), Open(), BamReader::Close()
49 void BamMultiReader::Close(void) {
53 /*! \fn void BamMultiReader::CloseFile(const std::string& filename)
54 \brief Closes requested BAM file.
56 Leaves any other file(s) open, along with header and reference data.
58 \sa Close(), IsOpen(), Open(), BamReader::Close()
60 void BamMultiReader::CloseFile(const std::string& filename) {
61 d->CloseFile(filename);
64 /*! \fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)
65 \brief Creates index files for the current BAM files.
67 \param type file format to create, see BamIndex::IndexType for available formats
68 \return \c true if index files created OK
69 \sa LocateIndexes(), OpenIndexes(), BamReader::CreateIndex()
71 bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type) {
72 return d->CreateIndexes(type);
75 /*! \fn const std::vector<std::string> BamMultiReader::Filenames(void) const
76 \brief Returns list of filenames for all open BAM files.
78 Retrieved filenames will contain whatever was passed via Open().
79 If you need full directory paths here, be sure to include them
80 when you open the BAM files.
82 \returns names of open BAM files. If no files are open, returns an empty vector.
83 \sa IsOpen(), BamReader::GetFilename()
85 const std::vector<std::string> BamMultiReader::Filenames(void) const {
86 return d->Filenames();
89 /*! \fn SamHeader BamMultiReader::GetHeader(void) const
90 \brief Returns unified SAM-format header for all files
92 N.B. - Modifying the retrieved text does NOT affect the current
93 BAM files. Thesse file have been opened in a read-only mode. However,
94 your modified header text can be used in conjunction with BamWriter
95 to generate a new BAM file with the appropriate header information.
97 \returns header data wrapped in SamHeader object
98 \sa GetHeaderText(), BamReader::GetHeader()
100 SamHeader BamMultiReader::GetHeader(void) const {
101 return d->GetHeader();
104 /*! \fn std::string BamMultiReader::GetHeaderText(void) const
105 \brief Returns unified SAM-format header text for all files
107 N.B. - Modifying the retrieved text does NOT affect the current
108 BAM files. Thesse file have been opened in a read-only mode. However,
109 your modified header text can be used in conjunction with BamWriter
110 to generate a new BAM file with the appropriate header information.
112 \returns SAM-formatted header text
113 \sa GetHeader(), BamReader::GetHeaderText()
115 std::string BamMultiReader::GetHeaderText(void) const {
116 return d->GetHeaderText();
119 /*! \fn bool BamMultiReader::GetNextAlignment(BamAlignment& alignment)
120 \brief Retrieves next available alignment.
122 Equivalent to BamReader::GetNextAlignment() with respect to what is a valid
123 overlapping alignment and what data gets populated.
125 This method takes care of determining which alignment actually is 'next'
126 across multiple files, depending on current SortOrder.
128 \param alignment destination for alignment record data
129 \returns \c true if a valid alignment was found
130 \sa GetNextAlignmentCore(), SetRegion(), SetSortOrder(), BamReader::GetNextAlignment()
132 bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
133 return d->GetNextAlignment(nextAlignment);
136 /*! \fn bool BamMultiReader::GetNextAlignmentCore(BamAlignment& alignment)
137 \brief Retrieves next available alignment.
139 Equivalent to BamReader::GetNextAlignmentCore() with respect to what is a valid
140 overlapping alignment and what data gets populated.
142 This method takes care of determining which alignment actually is 'next'
143 across multiple files, depending on current SortOrder.
145 \param alignment destination for alignment record data
146 \returns \c true if a valid alignment was found
147 \sa GetNextAlignment(), SetRegion(), SetSortOrder(), BamReader::GetNextAlignmentCore()
149 bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
150 return d->GetNextAlignmentCore(nextAlignment);
153 /*! \fn int BamMultiReader::GetReferenceCount(void) const
154 \brief Returns number of reference sequences.
155 \sa BamReader::GetReferenceCount()
157 int BamMultiReader::GetReferenceCount(void) const {
158 return d->GetReferenceCount();
161 /*! \fn const RefVector& BamMultiReader::GetReferenceData(void) const
162 \brief Returns all reference sequence entries.
163 \sa RefData, BamReader::GetReferenceData()
165 const BamTools::RefVector BamMultiReader::GetReferenceData(void) const {
166 return d->GetReferenceData();
169 /*! \fn int BamMultiReader::GetReferenceID(const std::string& refName) const
170 \brief Returns the ID of the reference with this name.
172 If \a refName is not found, returns -1.
174 \sa BamReader::GetReferenceID()
176 int BamMultiReader::GetReferenceID(const std::string& refName) const {
177 return d->GetReferenceID(refName);
180 /*! \fn bool BamMultiReader::HasIndexes(void) const
181 \brief Returns \c true if all BAM files have index data available.
182 \sa BamReader::HasIndex()
184 bool BamMultiReader::HasIndexes(void) const {
185 return d->HasIndexes();
188 /*! \fn bool BamMultiReader::HasOpenReaders(void) const
189 \brief Returns \c true if there are any open BAM files.
191 bool BamMultiReader::HasOpenReaders(void) const {
192 return d->HasOpenReaders();
195 /*! \fn bool BamMultiReader::IsIndexLoaded(void) const
196 \brief Returns \c true if all BAM files have index data available.
198 \deprecated Instead use HasIndexes()
200 See explanation in BamReader.cpp for more details on the deprecation decision.
204 bool BamMultiReader::IsIndexLoaded(void) const {
205 return d->HasIndexes();
208 /*! \fn bool BamMultiReader::Jump(int refID, int position)
209 \brief Performs a random-access jump within current BAM files.
211 This is a convenience method, equivalent to calling SetRegion()
212 with only a left boundary specified.
214 \returns \c true if jump was successful
215 \sa HasIndex(), BamReader::Jump()
218 bool BamMultiReader::Jump(int refID, int position) {
219 return d->Jump(refID, position);
222 /*! \fn bool BamMultiReader::LocateIndexes(const BamIndex::IndexType& preferredType)
223 \brief Looks for index files that match current BAM files.
225 Use this function when you need index files, and perhaps have a
226 preferred index format, but do not depend heavily on which indexes
227 actually get loaded at runtime.
229 For each BAM file, this function will defer to your \a preferredType
230 whenever possible. However, if an index file of \a preferredType can
231 not be found, then it will look for any other index file that matches
234 An example case would look this:
237 BamMultiReader reader;
240 // ensure that all files have an index
241 if ( !reader.LocateIndexes() ) // opens any existing index files that match our BAM files
242 reader.CreateIndexes(); // creates index files for BAM files that still lack one
244 // do interesting stuff
249 If you want precise control over which index files are loaded, use OpenIndexes()
250 with the desired index filenames. If that function returns false, you can use
251 CreateIndexes() to then build index files of the exact requested format.
253 \param preferredType desired index file format, see BamIndex::IndexType for available formats
254 \returns \c true if index files could be found for \b ALL open BAM files
255 \sa BamReader::LocateIndex()
257 bool BamMultiReader::LocateIndexes(const BamIndex::IndexType& preferredType) {
258 return d->LocateIndexes(preferredType);
261 /*! \fn bool BamMultiReader::Open(const std::vector<std::string>& filenames)
262 \brief Opens BAM files.
264 N.B. - Opening BAM files will invalidate any current region set on the multireader.
265 All file pointers will be returned to the beginning of the alignment data.
266 Follow this with Jump() or SetRegion() to establish a region of interest.
268 \param filenames list of BAM filenames to open
269 \returns \c true if BAM files were opened successfully
270 \sa Close(), HasOpenReaders(), OpenFile(), OpenIndexes(), BamReader::Open()
272 bool BamMultiReader::Open(const std::vector<std::string>& filenames) {
273 return d->Open(filenames);
276 /*! \fn bool BamMultiReader::OpenFile(const std::string& filename)
277 \brief Opens a single BAM file.
279 Adds another BAM file to multireader "on-the-fly".
281 N.B. - Opening a BAM file invalidates any current region set on the multireader.
282 All file pointers will be returned to the beginning of the alignment data.
283 Follow this with Jump() or SetRegion() to establish a region of interest.
285 \param filename BAM filename to open
286 \returns \c true if BAM file was opened successfully
287 \sa Close(), HasOpenReaders(), Open(), OpenIndexes(), BamReader::Open()
289 bool BamMultiReader::OpenFile(const std::string& filename) {
290 return d->OpenFile(filename);
293 /*! \fn bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames)
294 \brief Opens index files for current BAM files.
296 N.B. - Currently assumes that index filenames match the order (and number) of
297 BAM files passed to Open().
299 \param indexFilenames list of BAM index file names
300 \returns \c true if BAM index file was opened & data loaded successfully
301 \sa LocateIndex(), Open(), SetIndex(), BamReader::OpenIndex()
303 bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames) {
304 return d->OpenIndexes(indexFilenames);
307 /*! \fn void BamMultiReader::PrintFilenames(void) const
308 \brief Convenience method for printing filenames to stdout.
309 \deprecated Doesn't really belong as an API function. Clients should
310 determine how the data is reported.
311 \sa Filenames(), BamReader::GetFilename()
313 void BamMultiReader::PrintFilenames(void) const {
317 /*! \fn bool BamMultiReader::Rewind(void)
318 \brief Returns the internal file pointers to the beginning of alignment records.
320 Useful for performing multiple sequential passes through BAM files.
321 Calling this function clears any prior region that may have been set.
323 \returns \c true if rewind operation was successful
324 \sa Jump(), SetRegion(), BamReader::Rewind()
326 bool BamMultiReader::Rewind(void) {
330 /*! \fn void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)
331 \brief Changes the caching behavior of the index data.
333 Default mode is BamIndex::LimitedIndexCaching.
335 \param mode desired cache mode for index, see BamIndex::IndexCacheMode for
336 description of the available cache modes
337 \sa HasIndex(), BamReader::SetIndexCacheMode()
339 void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {
340 d->SetIndexCacheMode(mode);
343 /*! \fn bool BamMultiReader::SetRegion(const BamRegion& region)
344 \brief Sets a target region of interest
346 Equivalent to calling BamReader::SetRegion() on all open BAM files.
348 \param region desired region-of-interest to activate
349 \returns \c true if ALL readers set the region successfully
350 \sa HasIndexes(), Jump(), BamReader::SetRegion()
352 bool BamMultiReader::SetRegion(const BamRegion& region) {
353 return d->SetRegion(region);
356 /*! \fn bool BamMultiReader::SetRegion(const int& leftRefID,
357 const int& leftPosition,
358 const int& rightRefID,
359 const int& rightPosition)
360 \brief Sets a target region of interest
362 This is an overloaded function.
364 Equivalent to calling BamReader::SetRegion() on all open BAM files.
366 \param leftRefID referenceID of region's left boundary
367 \param leftPosition position of region's left boundary
368 \param rightRefID reference ID of region's right boundary
369 \param rightPosition position of region's right boundary
371 \returns \c true if ALL readers set the region successfully
372 \sa HasIndexes(), Jump(), BamReader::SetRegion()
374 bool BamMultiReader::SetRegion(const int& leftRefID,
375 const int& leftPosition,
376 const int& rightRefID,
377 const int& rightPosition)
379 BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);
380 return d->SetRegion(region);
383 /*! \fn void BamMultiReader::SetSortOrder(const SortOrder& order)
384 \brief Sets the expected sorting order for reading across multiple BAM files.
386 Default is BamMultiReader::SortedByPosition.
388 The SortOrder determines how the reader determines which alignment is "next"
389 from among its open readers.
391 \param order expected sort order
393 void BamMultiReader::SetSortOrder(const SortOrder& order) {
394 d->SetSortOrder(order);