1 // ***************************************************************************
2 // BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 4 March 2011 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides read access to BAM files.
9 // ***************************************************************************
11 #include <api/BamReader.h>
12 #include <api/internal/BamReader_p.h>
13 using namespace BamTools;
14 using namespace BamTools::Internal;
23 /*! \class BamTools::BamReader
24 \brief Provides read access to BAM files.
27 /*! \fn BamReader::BamReader(void)
30 BamReader::BamReader(void)
31 : d(new BamReaderPrivate(this))
34 /*! \fn BamReader::~BamReader(void)
37 BamReader::~BamReader(void) {
42 /*! \fn void BamReader::Close(void)
43 \brief Closes the current BAM file.
45 Also clears out all header and reference data.
49 void BamReader::Close(void) {
53 /*! \fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)
54 \brief Creates an index file for current BAM file.
56 \param type file format to create, see BamIndex::IndexType for available formats
57 \return \c true if index created OK
58 \sa LocateIndex(), OpenIndex()
60 bool BamReader::CreateIndex(const BamIndex::IndexType& type) {
61 return d->CreateIndex(type);
64 /*! \fn const std::string BamReader::GetFilename(void) const
65 \brief Returns name of current BAM file.
67 Retrieved filename will contain whatever was passed via Open().
68 If you need full directory paths here, be sure to include them
69 when you open the BAM file.
71 \returns name of open BAM file. If no file is open, returns an empty string.
74 const std::string BamReader::GetFilename(void) const {
78 /*! \fn SamHeader BamReader::GetHeader(void) const
79 \brief Returns SAM header data.
81 Header data is wrapped in a SamHeader object that can be conveniently queried & modified.
83 N.B. - Modifying the retrieved SamHeader object does NOT affect the
84 current BAM file. This file has been opened in a read-only mode.
85 However, your modified SamHeader object can be used in conjunction with
86 BamWriter to generate a new BAM file with the appropriate header information.
88 \returns header data object
91 SamHeader BamReader::GetHeader(void) const {
92 return d->GetSamHeader();
95 /*! \fn std::string BamReader::GetHeaderText(void) const
96 \brief Returns SAM header data, as SAM-formatted text.
98 N.B. - Modifying the retrieved text does NOT affect the current
99 BAM file. This file has been opened in a read-only mode. However,
100 your modified header text can be used in conjunction with BamWriter
101 to generate a new BAM file with the appropriate header information.
103 \returns SAM-formatted header text
106 std::string BamReader::GetHeaderText(void) const {
107 return d->GetHeaderText();
110 /*! \fn bool BamReader::GetNextAlignment(BamAlignment& alignment)
111 \brief Retrieves next available alignment.
113 Attempts to read the next alignment record from BAM file, and checks to see
114 if it overlaps the current region. If no region is currently set, then the
115 next alignment available is always considered valid.
117 If a region has been set, via Jump() or SetRegion(), an alignment is only
118 considered valid if it overlaps the region. If the actual 'next' alignment record
119 in the BAM file does not overlap this region, then this function will read sequentially
120 through the file until the next alignment that overlaps this region is found.
121 Once the region has been exhausted (i.e. the next alignment loaded is beyond the region),
122 the function aborts and returns \c false. In this case, there is no point to continue
123 reading, assuming properly sorted alignments.
125 This function fully populates all of the alignment's available data fields,
126 including the string data fields (read name, bases, qualities, tags, filename).
127 If only positional data (refID, position, CIGAR ops, alignment flags, etc.)
128 are required, consider using GetNextAlignmentCore() for a significant
131 \param alignment destination for alignment record data
132 \returns \c true if a valid alignment was found
134 bool BamReader::GetNextAlignment(BamAlignment& alignment) {
135 return d->GetNextAlignment(alignment);
138 /*! \fn bool BamReader::GetNextAlignmentCore(BamAlignment& alignment)
139 \brief Retrieves next available alignment, without populating the alignment's string data fields.
141 Equivalent to GetNextAlignment() with respect to what is a valid overlapping alignment.
143 However, this method does NOT populate the alignment's string data fields
144 (read name, bases, qualities, tags, filename). This provides a boost in speed
145 when these fields are not required for every alignment. These fields can be
146 populated 'lazily' (as needed) by calling BamAlignment::BuildCharData() later.
148 \param alignment destination for alignment record data
149 \returns \c true if a valid alignment was found
152 bool BamReader::GetNextAlignmentCore(BamAlignment& alignment) {
153 return d->GetNextAlignmentCore(alignment);
156 /*! \fn int BamReader::GetReferenceCount(void) const
157 \brief Returns number of reference sequences.
159 int BamReader::GetReferenceCount(void) const {
160 return d->GetReferenceCount();
163 /*! \fn const RefVector& BamReader::GetReferenceData(void) const
164 \brief Returns all reference sequence entries.
167 const RefVector& BamReader::GetReferenceData(void) const {
168 return d->GetReferenceData();
171 /*! \fn int BamReader::GetReferenceID(const std::string& refName) const
172 \brief Returns the ID of the reference with this name.
174 If \a refName is not found, returns -1.
176 int BamReader::GetReferenceID(const std::string& refName) const {
177 return d->GetReferenceID(refName);
180 /*! \fn bool BamReader::HasIndex(void) const
181 \brief Returns \c true if index data is available.
183 bool BamReader::HasIndex(void) const {
184 return d->HasIndex();
187 /*! \fn bool BamReader::IsIndexLoaded(void) const
188 \brief Returns \c true if index data is available.
190 \deprecated Instead use HasIndex()
192 Deprecated purely for API semantic clarity - HasIndex() should be clearer
193 than IsIndexLoaded() in light of the new caching modes that may clear the
194 index data from memory, but leave the index file open for later random access
197 For example, what would (IsIndexLoaded() == true) mean when cacheMode has been
198 explicitly set to NoIndexCaching? This is confusing at best, misleading about
199 current memory behavior at worst.
202 bool BamReader::IsIndexLoaded(void) const {
203 return d->HasIndex();
206 /*! \fn bool BamReader::IsOpen(void) const
207 \brief Returns \c true if a BAM file is open for reading.
209 bool BamReader::IsOpen(void) const {
213 /*! \fn bool BamReader::Jump(int refID, int position)
214 \brief Performs a random-access jump within BAM file.
216 This is a convenience method, equivalent to calling SetRegion()
217 with only a left boundary specified.
219 \returns \c true if jump was successful
222 bool BamReader::Jump(int refID, int position) {
223 return d->SetRegion( BamRegion(refID, position) );
226 /*! \fn bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType)
227 \brief Looks in BAM file's directory for a matching index file.
229 Use this function when you need an index file, and perhaps have a
230 preferred index format, but do not depend heavily on which format
231 actually gets loaded at runtime.
233 This function will defer to your \a preferredType whenever possible.
234 However, if an index file of \a preferredType can not be found, then
235 it will look for any other index file that corresponds to this BAM file.
237 If you want precise control over which index file is loaded, use OpenIndex()
238 with the desired index filename. If that function returns false, you can use
239 CreateIndex() to then build an index of the exact requested format.
241 \param preferredType desired index file format, see BamIndex::IndexType for available formats
242 \returns \c true if (any) index file could be found
244 bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType) {
245 return d->LocateIndex(preferredType);
248 /*! \fn bool BamReader::Open(const std::string& filename)
249 \brief Opens a BAM file.
251 If BamReader is already opened on another file, this function closes
252 that file, then attempts to open requested \a filename.
254 \param filename name of BAM file to open
255 \returns \c true if BAM file was opened successfully
256 \sa Close(), IsOpen(), OpenIndex()
258 bool BamReader::Open(const std::string& filename) {
259 return d->Open(filename);
262 /*! \fn bool BamReader::OpenIndex(const std::string& indexFilename)
263 \brief Opens a BAM index file.
265 \param indexFilename name of BAM index file
267 \returns \c true if BAM index file was opened & data loaded successfully
268 \sa LocateIndex(), Open(), SetIndex()
270 bool BamReader::OpenIndex(const std::string& indexFilename) {
271 return d->OpenIndex(indexFilename);
274 /*! \fn bool BamReader::Rewind(void)
275 \brief Returns the internal file pointer to the first alignment record.
277 Useful for performing multiple sequential passes through a BAM file.
278 Calling this function clears any prior region that may have been set.
280 N.B. - Note that this function sets the file pointer to first alignment record
281 in the BAM file, NOT the beginning of the file.
283 \returns \c true if rewind operation was successful
284 \sa Jump(), SetRegion()
286 bool BamReader::Rewind(void) {
290 /*! \fn void BamReader::SetIndex(BamIndex* index)
291 \brief Sets a custom BamIndex on this reader.
293 Only necessary for custom BamIndex subclasses. Most clients should
294 never have to use this function.
299 reader.SetIndex(new MyCustomBamIndex);
302 N.B. - BamReader takes ownership of \a index - i.e. BamReader will
303 take care of deleting the pointer when the reader is destructed,
304 when the current BAM file is closed, or when a new index is requested.
306 \param index custom BamIndex subclass created by client
307 \sa CreateIndex(), LocateIndex(), OpenIndex()
309 void BamReader::SetIndex(BamIndex* index) {
313 /*! \fn void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)
314 \brief Changes the caching behavior of the index data.
316 Default mode is BamIndex::LimitedIndexCaching.
318 \param mode desired cache mode for index, see BamIndex::IndexCacheMode for
319 description of the available cache modes
322 void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {
323 d->SetIndexCacheMode(mode);
326 /*! \fn bool BamReader::SetRegion(const BamRegion& region)
327 \brief Sets a target region of interest
329 Requires that index data be available. Attempts a random-access
330 jump in the BAM file, near \a region left boundary position.
332 Subsequent calls to GetNextAlignment() or GetNextAlignmentCore()
333 will only return \c true when alignments can be found that overlap
336 A \a region with no right boundary is considered open-ended, meaning
337 that all alignments that lie downstream of the left boundary are
338 considered valid, continuing to the end of the BAM file.
340 \param region desired region-of-interest to activate
341 \returns \c true if reader was able to jump successfully to the region's left boundary
342 \sa HasIndex(), Jump()
344 bool BamReader::SetRegion(const BamRegion& region) {
345 return d->SetRegion(region);
348 /*! \fn bool BamReader::SetRegion(const int& leftRefID,
349 const int& leftPosition,
350 const int& rightRefID,
351 const int& rightPosition)
352 \brief Sets a target region of interest.
354 This is an overloaded function.
356 \param leftRefID referenceID of region's left boundary
357 \param leftPosition position of region's left boundary
358 \param rightRefID reference ID of region's right boundary
359 \param rightPosition position of region's right boundary
361 \returns \c true if reader was able to jump successfully to the region's left boundary
362 \sa HasIndex(), Jump()
364 bool BamReader::SetRegion(const int& leftRefID,
365 const int& leftBound,
366 const int& rightRefID,
367 const int& rightBound)
369 return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );