1 // ***************************************************************************
2 // BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 1 October 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Convenience class for reading multiple BAM files.
10 // This functionality allows applications to work on very large sets of files
11 // without requiring intermediate merge, sort, and index steps for each file
12 // subset. It also improves the performance of our merge system as it
13 // precludes the need to sort merged files.
14 // ***************************************************************************
16 #include <api/BamMultiReader.h>
17 #include <api/internal/BamMultiReader_p.h>
18 using namespace BamTools;
24 /*! \class BamTools::BamReader
25 \brief Convenience class for reading multiple BAM files.
28 /*! \fn BamMultiReader::BamMultiReader(void)
31 BamMultiReader::BamMultiReader(void)
32 : d(new Internal::BamMultiReaderPrivate)
35 /*! \fn BamMultiReader::~BamMultiReader(void)
38 BamMultiReader::~BamMultiReader(void) {
43 /*! \fn void BamMultiReader::Close(void)
44 \brief Closes all open BAM files.
46 Also clears out all header and reference data.
48 \sa CloseFile(), IsOpen(), Open(), BamReader::Close()
50 void BamMultiReader::Close(void) {
54 /*! \fn void BamMultiReader::CloseFile(const std::string& filename)
55 \brief Closes requested BAM file.
57 Leaves any other file(s) open, along with header and reference data.
59 \sa Close(), IsOpen(), Open(), BamReader::Close()
61 void BamMultiReader::CloseFile(const std::string& filename) {
62 d->CloseFile(filename);
65 /*! \fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)
66 \brief Creates index files for the current BAM files.
68 \param type file format to create, see BamIndex::IndexType for available formats
69 \return \c true if index files created OK
70 \sa LocateIndexes(), OpenIndexes(), BamReader::CreateIndex()
72 bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type) {
73 return d->CreateIndexes(type);
76 /*! \fn const std::vector<std::string> BamMultiReader::Filenames(void) const
77 \brief Returns list of filenames for all open BAM files.
79 Retrieved filenames will contain whatever was passed via Open().
80 If you need full directory paths here, be sure to include them
81 when you open the BAM files.
83 \returns names of open BAM files. If no files are open, returns an empty vector.
84 \sa IsOpen(), BamReader::GetFilename()
86 const std::vector<std::string> BamMultiReader::Filenames(void) const {
87 return d->Filenames();
90 /*! \fn SamHeader BamMultiReader::GetHeader(void) const
91 \brief Returns unified SAM-format header for all files
93 N.B. - Modifying the retrieved text does NOT affect the current
94 BAM files. Thesse file have been opened in a read-only mode. However,
95 your modified header text can be used in conjunction with BamWriter
96 to generate a new BAM file with the appropriate header information.
98 \returns header data wrapped in SamHeader object
99 \sa GetHeaderText(), BamReader::GetHeader()
101 SamHeader BamMultiReader::GetHeader(void) const {
102 return d->GetHeader();
105 /*! \fn std::string BamMultiReader::GetHeaderText(void) const
106 \brief Returns unified SAM-format header text for all files
108 N.B. - Modifying the retrieved text does NOT affect the current
109 BAM files. Thesse file have been opened in a read-only mode. However,
110 your modified header text can be used in conjunction with BamWriter
111 to generate a new BAM file with the appropriate header information.
113 \returns SAM-formatted header text
114 \sa GetHeader(), BamReader::GetHeaderText()
116 std::string BamMultiReader::GetHeaderText(void) const {
117 return d->GetHeaderText();
120 /*! \fn bool BamMultiReader::GetNextAlignment(BamAlignment& alignment)
121 \brief Retrieves next available alignment.
123 Equivalent to BamReader::GetNextAlignment() with respect to what is a valid
124 overlapping alignment and what data gets populated.
126 This method takes care of determining which alignment actually is 'next'
127 across multiple files, depending on current SortOrder.
129 \param alignment destination for alignment record data
130 \returns \c true if a valid alignment was found
131 \sa GetNextAlignmentCore(), SetRegion(), SetSortOrder(), BamReader::GetNextAlignment()
133 bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
134 return d->GetNextAlignment(nextAlignment);
137 /*! \fn bool BamMultiReader::GetNextAlignmentCore(BamAlignment& alignment)
138 \brief Retrieves next available alignment.
140 Equivalent to BamReader::GetNextAlignmentCore() with respect to what is a valid
141 overlapping alignment and what data gets populated.
143 This method takes care of determining which alignment actually is 'next'
144 across multiple files, depending on current SortOrder.
146 \param alignment destination for alignment record data
147 \returns \c true if a valid alignment was found
148 \sa GetNextAlignment(), SetRegion(), SetSortOrder(), BamReader::GetNextAlignmentCore()
150 bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
151 return d->GetNextAlignmentCore(nextAlignment);
154 /*! \fn int BamMultiReader::GetReferenceCount(void) const
155 \brief Returns number of reference sequences.
156 \sa BamReader::GetReferenceCount()
158 int BamMultiReader::GetReferenceCount(void) const {
159 return d->GetReferenceCount();
162 /*! \fn const RefVector& BamMultiReader::GetReferenceData(void) const
163 \brief Returns all reference sequence entries.
164 \sa RefData, BamReader::GetReferenceData()
166 const BamTools::RefVector BamMultiReader::GetReferenceData(void) const {
167 return d->GetReferenceData();
170 /*! \fn int BamMultiReader::GetReferenceID(const std::string& refName) const
171 \brief Returns the ID of the reference with this name.
173 If \a refName is not found, returns -1.
175 \sa BamReader::GetReferenceID()
177 int BamMultiReader::GetReferenceID(const std::string& refName) const {
178 return d->GetReferenceID(refName);
181 /*! \fn bool BamMultiReader::HasIndexes(void) const
182 \brief Returns \c true if all BAM files have index data available.
183 \sa BamReader::HasIndex()
185 bool BamMultiReader::HasIndexes(void) const {
186 return d->HasIndexes();
189 /*! \fn bool BamMultiReader::HasOpenReaders(void) const
190 \brief Returns \c true if there are any open BAM files.
192 bool BamMultiReader::HasOpenReaders(void) const {
193 return d->HasOpenReaders();
196 /*! \fn bool BamMultiReader::Jump(int refID, int position)
197 \brief Performs a random-access jump within current BAM files.
199 This is a convenience method, equivalent to calling SetRegion()
200 with only a left boundary specified.
202 \returns \c true if jump was successful
203 \sa HasIndex(), BamReader::Jump()
206 bool BamMultiReader::Jump(int refID, int position) {
207 return d->Jump(refID, position);
210 /*! \fn bool BamMultiReader::LocateIndexes(const BamIndex::IndexType& preferredType)
211 \brief Looks for index files that match current BAM files.
213 Use this function when you need index files, and perhaps have a
214 preferred index format, but do not depend heavily on which indexes
215 actually get loaded at runtime.
217 For each BAM file, this function will defer to your \a preferredType
218 whenever possible. However, if an index file of \a preferredType can
219 not be found, then it will look for any other index file that matches
222 An example case would look this:
225 BamMultiReader reader;
228 // ensure that all files have an index
229 if ( !reader.LocateIndexes() ) // opens any existing index files that match our BAM files
230 reader.CreateIndexes(); // creates index files for BAM files that still lack one
232 // do interesting stuff
237 If you want precise control over which index files are loaded, use OpenIndexes()
238 with the desired index filenames. If that function returns false, you can use
239 CreateIndexes() to then build index files of the exact requested format.
241 \param preferredType desired index file format, see BamIndex::IndexType for available formats
242 \returns \c true if index files could be found for \b ALL open BAM files
243 \sa BamReader::LocateIndex()
245 bool BamMultiReader::LocateIndexes(const BamIndex::IndexType& preferredType) {
246 return d->LocateIndexes(preferredType);
249 /*! \fn bool BamMultiReader::Open(const std::vector<std::string>& filenames)
250 \brief Opens BAM files.
252 N.B. - Opening BAM files will invalidate any current region set on the multireader.
253 All file pointers will be returned to the beginning of the alignment data.
254 Follow this with Jump() or SetRegion() to establish a region of interest.
256 \param filenames list of BAM filenames to open
257 \returns \c true if BAM files were opened successfully
258 \sa Close(), HasOpenReaders(), OpenFile(), OpenIndexes(), BamReader::Open()
260 bool BamMultiReader::Open(const std::vector<std::string>& filenames) {
261 return d->Open(filenames);
264 /*! \fn bool BamMultiReader::OpenFile(const std::string& filename)
265 \brief Opens a single BAM file.
267 Adds another BAM file to multireader "on-the-fly".
269 N.B. - Opening a BAM file invalidates any current region set on the multireader.
270 All file pointers will be returned to the beginning of the alignment data.
271 Follow this with Jump() or SetRegion() to establish a region of interest.
273 \param filename BAM filename to open
274 \returns \c true if BAM file was opened successfully
275 \sa Close(), HasOpenReaders(), Open(), OpenIndexes(), BamReader::Open()
277 bool BamMultiReader::OpenFile(const std::string& filename) {
278 return d->OpenFile(filename);
281 /*! \fn bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames)
282 \brief Opens index files for current BAM files.
284 N.B. - Currently assumes that index filenames match the order (and number) of
285 BAM files passed to Open().
287 \param indexFilenames list of BAM index file names
288 \returns \c true if BAM index file was opened & data loaded successfully
289 \sa LocateIndex(), Open(), SetIndex(), BamReader::OpenIndex()
291 bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames) {
292 return d->OpenIndexes(indexFilenames);
295 /*! \fn bool BamMultiReader::Rewind(void)
296 \brief Returns the internal file pointers to the beginning of alignment records.
298 Useful for performing multiple sequential passes through BAM files.
299 Calling this function clears any prior region that may have been set.
301 \returns \c true if rewind operation was successful
302 \sa Jump(), SetRegion(), BamReader::Rewind()
304 bool BamMultiReader::Rewind(void) {
308 /*! \fn void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)
309 \brief Changes the caching behavior of the index data.
311 Default mode is BamIndex::LimitedIndexCaching.
313 \param mode desired cache mode for index, see BamIndex::IndexCacheMode for
314 description of the available cache modes
315 \sa HasIndex(), BamReader::SetIndexCacheMode()
317 void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {
318 d->SetIndexCacheMode(mode);
321 /*! \fn bool BamMultiReader::SetRegion(const BamRegion& region)
322 \brief Sets a target region of interest
324 Equivalent to calling BamReader::SetRegion() on all open BAM files.
326 \param region desired region-of-interest to activate
327 \returns \c true if ALL readers set the region successfully
328 \sa HasIndexes(), Jump(), BamReader::SetRegion()
330 bool BamMultiReader::SetRegion(const BamRegion& region) {
331 return d->SetRegion(region);
334 /*! \fn bool BamMultiReader::SetRegion(const int& leftRefID,
335 const int& leftPosition,
336 const int& rightRefID,
337 const int& rightPosition)
338 \brief Sets a target region of interest
340 This is an overloaded function.
342 Equivalent to calling BamReader::SetRegion() on all open BAM files.
344 \param leftRefID referenceID of region's left boundary
345 \param leftPosition position of region's left boundary
346 \param rightRefID reference ID of region's right boundary
347 \param rightPosition position of region's right boundary
349 \returns \c true if ALL readers set the region successfully
350 \sa HasIndexes(), Jump(), BamReader::SetRegion()
352 bool BamMultiReader::SetRegion(const int& leftRefID,
353 const int& leftPosition,
354 const int& rightRefID,
355 const int& rightPosition)
357 BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);
358 return d->SetRegion(region);