1 // ***************************************************************************
2 // BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 25 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Convenience class for reading multiple BAM files.
9 // This functionality allows applications to work on very large sets of files
10 // without requiring intermediate merge, sort, and index steps for each file
11 // subset. It also improves the performance of our merge system as it
12 // precludes the need to sort merged files.
13 // ***************************************************************************
15 #include "api/BamMultiReader.h"
16 #include "api/internal/bam/BamMultiReader_p.h"
17 using namespace BamTools;
23 /*! \class BamTools::BamMultiReader
24 \brief Convenience class for reading multiple BAM files.
27 /*! \fn BamMultiReader::BamMultiReader(void)
30 BamMultiReader::BamMultiReader(void)
31 : d(new Internal::BamMultiReaderPrivate)
34 /*! \fn BamMultiReader::~BamMultiReader(void)
37 BamMultiReader::~BamMultiReader(void) {
42 /*! \fn void BamMultiReader::Close(void)
43 \brief Closes all open BAM files.
45 Also clears out all header and reference data.
47 \sa CloseFile(), IsOpen(), Open(), BamReader::Close()
49 bool BamMultiReader::Close(void) {
53 /*! \fn void BamMultiReader::CloseFile(const std::string& filename)
54 \brief Closes requested BAM file.
56 Leaves any other file(s) open, along with header and reference data.
58 \param[in] filename name of specific BAM file to close
60 \sa Close(), IsOpen(), Open(), BamReader::Close()
62 bool BamMultiReader::CloseFile(const std::string& filename) {
63 return d->CloseFile(filename);
66 /*! \fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)
67 \brief Creates index files for the current BAM files.
69 \param[in] type file format to create, see BamIndex::IndexType for available formats
70 \return \c true if index files created OK
71 \sa LocateIndexes(), OpenIndexes(), BamReader::CreateIndex()
73 bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type) {
74 return d->CreateIndexes(type);
77 /*! \fn const std::vector<std::string> BamMultiReader::Filenames(void) const
78 \brief Returns list of filenames for all open BAM files.
80 Retrieved filenames will contain whatever was passed via Open().
81 If you need full directory paths here, be sure to include them
82 when you open the BAM files.
84 \returns names of open BAM files. If no files are open, returns an empty vector.
85 \sa IsOpen(), BamReader::GetFilename()
87 const std::vector<std::string> BamMultiReader::Filenames(void) const {
88 return d->Filenames();
91 /*! \fn std::string BamMultiReader::GetErrorString(void) const
92 \brief Returns a human-readable description of the last error that occurred
94 This method allows elimination of STDERR pollution. Developers of client code
95 may choose how the messages are displayed to the user, if at all.
97 \return error description
99 std::string BamMultiReader::GetErrorString(void) const {
100 return d->GetErrorString();
103 /*! \fn SamHeader BamMultiReader::GetHeader(void) const
104 \brief Returns unified SAM-format header for all files
106 \note Modifying the retrieved text does NOT affect the current
107 BAM files. These files have been opened in a read-only mode. However,
108 your modified header text can be used in conjunction with BamWriter
109 to generate a new BAM file with the appropriate header information.
111 \returns header data wrapped in SamHeader object
112 \sa GetHeaderText(), BamReader::GetHeader()
114 SamHeader BamMultiReader::GetHeader(void) const {
115 return d->GetHeader();
118 /*! \fn std::string BamMultiReader::GetHeaderText(void) const
119 \brief Returns unified SAM-format header text for all files
121 \note Modifying the retrieved text does NOT affect the current
122 BAM files. These files have been opened in a read-only mode. However,
123 your modified header text can be used in conjunction with BamWriter
124 to generate a new BAM file with the appropriate header information.
126 \returns SAM-formatted header text
127 \sa GetHeader(), BamReader::GetHeaderText()
129 std::string BamMultiReader::GetHeaderText(void) const {
130 return d->GetHeaderText();
133 /*! \fn bool BamMultiReader::GetNextAlignment(BamAlignment& alignment)
134 \brief Retrieves next available alignment.
136 Equivalent to BamReader::GetNextAlignment() with respect to what is a valid
137 overlapping alignment and what data gets populated.
139 This method takes care of determining which alignment actually is 'next'
140 across multiple files, depending on their sort order.
142 \param[out] alignment destination for alignment record data
143 \returns \c true if a valid alignment was found
144 \sa GetNextAlignmentCore(), SetRegion(), BamReader::GetNextAlignment()
146 bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
147 return d->GetNextAlignment(nextAlignment);
150 /*! \fn bool BamMultiReader::GetNextAlignmentCore(BamAlignment& alignment)
151 \brief Retrieves next available alignment.
153 Equivalent to BamReader::GetNextAlignmentCore() with respect to what is a valid
154 overlapping alignment and what data gets populated.
156 This method takes care of determining which alignment actually is 'next'
157 across multiple files, depending on their sort order.
159 \param[out] alignment destination for alignment record data
160 \returns \c true if a valid alignment was found
161 \sa GetNextAlignment(), SetRegion(), BamReader::GetNextAlignmentCore()
163 bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
164 return d->GetNextAlignmentCore(nextAlignment);
167 /*! \fn int BamMultiReader::GetReferenceCount(void) const
168 \brief Returns number of reference sequences.
169 \sa BamReader::GetReferenceCount()
171 int BamMultiReader::GetReferenceCount(void) const {
172 return d->GetReferenceCount();
175 /*! \fn const RefVector& BamMultiReader::GetReferenceData(void) const
176 \brief Returns all reference sequence entries.
177 \sa RefData, BamReader::GetReferenceData()
179 const BamTools::RefVector BamMultiReader::GetReferenceData(void) const {
180 return d->GetReferenceData();
183 /*! \fn int BamMultiReader::GetReferenceID(const std::string& refName) const
184 \brief Returns the ID of the reference with this name.
186 If \a refName is not found, returns -1.
188 \param[in] refName name of reference to look up
189 \sa BamReader::GetReferenceID()
191 int BamMultiReader::GetReferenceID(const std::string& refName) const {
192 return d->GetReferenceID(refName);
195 /*! \fn bool BamMultiReader::HasIndexes(void) const
196 \brief Returns \c true if all BAM files have index data available.
197 \sa BamReader::HasIndex()
199 bool BamMultiReader::HasIndexes(void) const {
200 return d->HasIndexes();
203 /*! \fn bool BamMultiReader::HasOpenReaders(void) const
204 \brief Returns \c true if there are any open BAM files.
206 bool BamMultiReader::HasOpenReaders(void) const {
207 return d->HasOpenReaders();
210 /*! \fn bool BamMultiReader::Jump(int refID, int position)
211 \brief Performs a random-access jump within current BAM files.
213 This is a convenience method, equivalent to calling SetRegion()
214 with only a left boundary specified.
216 \param[in] refID ID of reference to jump to
217 \param[in] position (0-based) left boundary
219 \returns \c true if jump was successful
220 \sa HasIndex(), BamReader::Jump()
223 bool BamMultiReader::Jump(int refID, int position) {
224 return d->Jump(refID, position);
227 /*! \fn bool BamMultiReader::LocateIndexes(const BamIndex::IndexType& preferredType)
228 \brief Looks for index files that match current BAM files.
230 Use this function when you need index files, and perhaps have a
231 preferred index format, but do not depend heavily on which indexes
232 actually get loaded at runtime.
234 For each BAM file, this function will defer to your \a preferredType
235 whenever possible. However, if an index file of \a preferredType can
236 not be found, then it will look for any other index file that matches
239 An example case would look this:
241 BamMultiReader reader;
245 // ensure that all files have an index
246 if ( !reader.LocateIndexes() ) // opens any existing index files that match our BAM files
247 reader.CreateIndexes(); // creates index files for any BAM files that still lack one
249 // do interesting stuff using random-access...
253 If you want precise control over which index files are loaded, use OpenIndexes()
254 with the desired index filenames. If that function returns false, you can use
255 CreateIndexes() to then build index files of the exact requested format.
257 \param[in] preferredType desired index file format, see BamIndex::IndexType for available formats
258 \returns \c true if index files could be found for \b ALL open BAM files
259 \sa BamReader::LocateIndex()
261 bool BamMultiReader::LocateIndexes(const BamIndex::IndexType& preferredType) {
262 return d->LocateIndexes(preferredType);
265 /*! \fn bool BamMultiReader::Open(const std::vector<std::string>& filenames)
266 \brief Opens BAM files.
268 \note Opening BAM files will invalidate any current region set on the multireader.
269 All file pointers will be returned to the beginning of the alignment data. Follow
270 this with Jump() or SetRegion() to establish a region of interest.
272 \param[in] filenames list of BAM filenames to open
273 \returns \c true if BAM files were opened successfully
274 \sa Close(), HasOpenReaders(), OpenFile(), OpenIndexes(), BamReader::Open()
276 bool BamMultiReader::Open(const std::vector<std::string>& filenames) {
277 return d->Open(filenames);
280 /*! \fn bool BamMultiReader::OpenFile(const std::string& filename)
281 \brief Opens a single BAM file.
283 Adds another BAM file to multireader "on-the-fly".
285 \note Opening a BAM file will invalidate any current region set on the multireader.
286 All file pointers will be returned to the beginning of the alignment data. Follow
287 this with Jump() or SetRegion() to establish a region of interest.
289 \param[in] filename BAM filename to open
290 \returns \c true if BAM file was opened successfully
291 \sa Close(), HasOpenReaders(), Open(), OpenIndexes(), BamReader::Open()
293 bool BamMultiReader::OpenFile(const std::string& filename) {
294 return d->OpenFile(filename);
297 /*! \fn bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames)
298 \brief Opens index files for current BAM files.
300 \note Currently assumes that index filenames match the order (and number) of
301 BAM files passed to Open().
303 \param[in] indexFilenames list of BAM index file names
304 \returns \c true if BAM index file was opened & data loaded successfully
305 \sa LocateIndex(), Open(), SetIndex(), BamReader::OpenIndex()
307 bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames) {
308 return d->OpenIndexes(indexFilenames);
311 /*! \fn bool BamMultiReader::Rewind(void)
312 \brief Returns the internal file pointers to the beginning of alignment records.
314 Useful for performing multiple sequential passes through BAM files.
315 Calling this function clears any prior region that may have been set.
317 \returns \c true if rewind operation was successful
318 \sa Jump(), SetRegion(), BamReader::Rewind()
320 bool BamMultiReader::Rewind(void) {
324 /*! \fn bool BamMultiReader::SetRegion(const BamRegion& region)
325 \brief Sets a target region of interest
327 Equivalent to calling BamReader::SetRegion() on all open BAM files.
329 \warning BamRegion now represents a zero-based, HALF-OPEN interval.
330 In previous versions of BamTools (0.x & 1.x) all intervals were treated
331 as zero-based, CLOSED.
333 \param[in] region desired region-of-interest to activate
334 \returns \c true if ALL readers set the region successfully
335 \sa HasIndexes(), Jump(), BamReader::SetRegion()
337 bool BamMultiReader::SetRegion(const BamRegion& region) {
338 return d->SetRegion(region);
341 /*! \fn bool BamMultiReader::SetRegion(const int& leftRefID,
342 const int& leftPosition,
343 const int& rightRefID,
344 const int& rightPosition)
345 \brief Sets a target region of interest
347 This is an overloaded function. Equivalent to calling BamReader::SetRegion() on all open BAM files.
349 \warning This function now expects a zero-based, HALF-OPEN interval.
350 In previous versions of BamTools (0.x & 1.x) all intervals were treated
351 as zero-based, CLOSED.
353 \param[in] leftRefID referenceID of region's left boundary
354 \param[in] leftPosition position of region's left boundary
355 \param[in] rightRefID reference ID of region's right boundary
356 \param[in] rightPosition position of region's right boundary
358 \returns \c true if ALL readers set the region successfully
359 \sa HasIndexes(), Jump(), BamReader::SetRegion()
361 bool BamMultiReader::SetRegion(const int& leftRefID,
362 const int& leftPosition,
363 const int& rightRefID,
364 const int& rightPosition)
366 return d->SetRegion( BamRegion(leftRefID, leftPosition, rightRefID, rightPosition) );