1 // ***************************************************************************
2 // BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 7 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides read access to BAM files.
8 // ***************************************************************************
10 #include <api/BamReader.h>
11 #include <api/internal/BamReader_p.h>
12 using namespace BamTools;
13 using namespace BamTools::Internal;
22 /*! \class BamTools::BamReader
23 \brief Provides read access to BAM files.
26 /*! \fn BamReader::BamReader(void)
29 BamReader::BamReader(void)
30 : d(new BamReaderPrivate(this))
33 /*! \fn BamReader::~BamReader(void)
36 BamReader::~BamReader(void) {
41 /*! \fn bool BamReader::Close(void)
42 \brief Closes the current BAM file.
44 Also clears out all header and reference data.
46 \return \c true if file closed OK
49 bool BamReader::Close(void) {
53 /*! \fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)
54 \brief Creates an index file for current BAM file.
56 \param type file format to create, see BamIndex::IndexType for available formats
57 \return \c true if index created OK
58 \sa LocateIndex(), OpenIndex()
60 bool BamReader::CreateIndex(const BamIndex::IndexType& type) {
61 return d->CreateIndex(type);
64 string BamReader::GetErrorString(void) const {
65 return d->GetErrorString();
68 /*! \fn const std::string BamReader::GetFilename(void) const
69 \brief Returns name of current BAM file.
71 Retrieved filename will contain whatever was passed via Open().
72 If you need full directory paths here, be sure to include them
73 when you open the BAM file.
75 \returns name of open BAM file. If no file is open, returns an empty string.
78 const std::string BamReader::GetFilename(void) const {
82 /*! \fn SamHeader BamReader::GetHeader(void) const
83 \brief Returns SAM header data.
85 Header data is wrapped in a SamHeader object that can be conveniently queried & modified.
87 N.B. - Modifying the retrieved SamHeader object does NOT affect the
88 current BAM file. This file has been opened in a read-only mode.
89 However, your modified SamHeader object can be used in conjunction with
90 BamWriter to generate a new BAM file with the appropriate header information.
92 \returns header data object
95 SamHeader BamReader::GetHeader(void) const {
96 return d->GetSamHeader();
99 /*! \fn std::string BamReader::GetHeaderText(void) const
100 \brief Returns SAM header data, as SAM-formatted text.
102 N.B. - Modifying the retrieved text does NOT affect the current
103 BAM file. This file has been opened in a read-only mode. However,
104 your modified header text can be used in conjunction with BamWriter
105 to generate a new BAM file with the appropriate header information.
107 \returns SAM-formatted header text
110 std::string BamReader::GetHeaderText(void) const {
111 return d->GetHeaderText();
114 /*! \fn bool BamReader::GetNextAlignment(BamAlignment& alignment)
115 \brief Retrieves next available alignment.
117 Attempts to read the next alignment record from BAM file, and checks to see
118 if it overlaps the current region. If no region is currently set, then the
119 next alignment available is always considered valid.
121 If a region has been set, via Jump() or SetRegion(), an alignment is only
122 considered valid if it overlaps the region. If the actual 'next' alignment record
123 in the BAM file does not overlap this region, then this function will read sequentially
124 through the file until the next alignment that overlaps this region is found.
125 Once the region has been exhausted (i.e. the next alignment loaded is beyond the region),
126 the function aborts and returns \c false. In this case, there is no point to continue
127 reading, assuming properly sorted alignments.
129 This function fully populates all of the alignment's available data fields,
130 including the string data fields (read name, bases, qualities, tags, filename).
131 If only positional data (refID, position, CIGAR ops, alignment flags, etc.)
132 are required, consider using GetNextAlignmentCore() for a significant
135 \param alignment destination for alignment record data
136 \returns \c true if a valid alignment was found
138 bool BamReader::GetNextAlignment(BamAlignment& alignment) {
139 return d->GetNextAlignment(alignment);
142 /*! \fn bool BamReader::GetNextAlignmentCore(BamAlignment& alignment)
143 \brief Retrieves next available alignment, without populating the alignment's string data fields.
145 Equivalent to GetNextAlignment() with respect to what is a valid overlapping alignment.
147 However, this method does NOT populate the alignment's string data fields
148 (read name, bases, qualities, tags, filename). This provides a boost in speed
149 when these fields are not required for every alignment. These fields can be
150 populated 'lazily' (as needed) by calling BamAlignment::BuildCharData() later.
152 \param alignment destination for alignment record data
153 \returns \c true if a valid alignment was found
156 bool BamReader::GetNextAlignmentCore(BamAlignment& alignment) {
157 return d->GetNextAlignmentCore(alignment);
160 /*! \fn int BamReader::GetReferenceCount(void) const
161 \brief Returns number of reference sequences.
163 int BamReader::GetReferenceCount(void) const {
164 return d->GetReferenceCount();
167 /*! \fn const RefVector& BamReader::GetReferenceData(void) const
168 \brief Returns all reference sequence entries.
171 const RefVector& BamReader::GetReferenceData(void) const {
172 return d->GetReferenceData();
175 /*! \fn int BamReader::GetReferenceID(const std::string& refName) const
176 \brief Returns the ID of the reference with this name.
178 If \a refName is not found, returns -1.
180 int BamReader::GetReferenceID(const std::string& refName) const {
181 return d->GetReferenceID(refName);
184 /*! \fn bool BamReader::HasIndex(void) const
185 \brief Returns \c true if index data is available.
187 bool BamReader::HasIndex(void) const {
188 return d->HasIndex();
191 /*! \fn bool BamReader::IsIndexLoaded(void) const
192 \brief Returns \c true if index data is available.
194 \deprecated Instead use HasIndex()
196 Deprecated purely for API semantic clarity - HasIndex() should be clearer
197 than IsIndexLoaded() in light of the new caching modes that may clear the
198 index data from memory, but leave the index file open for later random access
201 For example, what would (IsIndexLoaded() == true) mean when cacheMode has been
202 explicitly set to NoIndexCaching? This is confusing at best, misleading about
203 current memory behavior at worst.
206 bool BamReader::IsIndexLoaded(void) const {
207 return d->HasIndex();
210 /*! \fn bool BamReader::IsOpen(void) const
211 \brief Returns \c true if a BAM file is open for reading.
213 bool BamReader::IsOpen(void) const {
217 /*! \fn bool BamReader::Jump(int refID, int position)
218 \brief Performs a random-access jump within BAM file.
220 This is a convenience method, equivalent to calling SetRegion()
221 with only a left boundary specified.
223 \returns \c true if jump was successful
226 bool BamReader::Jump(int refID, int position) {
227 return d->SetRegion( BamRegion(refID, position) );
230 /*! \fn bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType)
231 \brief Looks in BAM file's directory for a matching index file.
233 Use this function when you need an index file, and perhaps have a
234 preferred index format, but do not depend heavily on which format
235 actually gets loaded at runtime.
237 This function will defer to your \a preferredType whenever possible.
238 However, if an index file of \a preferredType can not be found, then
239 it will look for any other index file that corresponds to this BAM file.
241 If you want precise control over which index file is loaded, use OpenIndex()
242 with the desired index filename. If that function returns false, you can use
243 CreateIndex() to then build an index of the exact requested format.
245 \param preferredType desired index file format, see BamIndex::IndexType for available formats
246 \returns \c true if (any) index file could be found
248 bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType) {
249 return d->LocateIndex(preferredType);
252 /*! \fn bool BamReader::Open(const std::string& filename)
253 \brief Opens a BAM file.
255 If BamReader is already opened on another file, this function closes
256 that file, then attempts to open requested \a filename.
258 \param filename name of BAM file to open
259 \returns \c true if BAM file was opened successfully
260 \sa Close(), IsOpen(), OpenIndex()
262 bool BamReader::Open(const std::string& filename) {
263 return d->Open(filename);
266 /*! \fn bool BamReader::OpenIndex(const std::string& indexFilename)
267 \brief Opens a BAM index file.
269 \param indexFilename name of BAM index file
271 \returns \c true if BAM index file was opened & data loaded successfully
272 \sa LocateIndex(), Open(), SetIndex()
274 bool BamReader::OpenIndex(const std::string& indexFilename) {
275 return d->OpenIndex(indexFilename);
278 /*! \fn bool BamReader::Rewind(void)
279 \brief Returns the internal file pointer to the first alignment record.
281 Useful for performing multiple sequential passes through a BAM file.
282 Calling this function clears any prior region that may have been set.
284 N.B. - Note that this function sets the file pointer to first alignment record
285 in the BAM file, NOT the beginning of the file.
287 \returns \c true if rewind operation was successful
288 \sa Jump(), SetRegion()
290 bool BamReader::Rewind(void) {
294 /*! \fn void BamReader::SetIndex(BamIndex* index)
295 \brief Sets a custom BamIndex on this reader.
297 Only necessary for custom BamIndex subclasses. Most clients should
298 never have to use this function.
303 reader.SetIndex(new MyCustomBamIndex);
306 N.B. - BamReader takes ownership of \a index - i.e. BamReader will
307 take care of deleting the pointer when the reader is destructed,
308 when the current BAM file is closed, or when a new index is requested.
310 \param index custom BamIndex subclass created by client
311 \sa CreateIndex(), LocateIndex(), OpenIndex()
313 void BamReader::SetIndex(BamIndex* index) {
317 /*! \fn void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)
318 \brief Changes the caching behavior of the index data.
320 Default mode is BamIndex::LimitedIndexCaching.
322 \param mode desired cache mode for index, see BamIndex::IndexCacheMode for
323 description of the available cache modes
326 void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {
327 d->SetIndexCacheMode(mode);
330 /*! \fn bool BamReader::SetRegion(const BamRegion& region)
331 \brief Sets a target region of interest
333 Requires that index data be available. Attempts a random-access
334 jump in the BAM file, near \a region left boundary position.
336 Subsequent calls to GetNextAlignment() or GetNextAlignmentCore()
337 will only return \c true when alignments can be found that overlap
340 A \a region with no right boundary is considered open-ended, meaning
341 that all alignments that lie downstream of the left boundary are
342 considered valid, continuing to the end of the BAM file.
344 \param region desired region-of-interest to activate
345 \returns \c true if reader was able to jump successfully to the region's left boundary
346 \sa HasIndex(), Jump()
348 bool BamReader::SetRegion(const BamRegion& region) {
349 return d->SetRegion(region);
352 /*! \fn bool BamReader::SetRegion(const int& leftRefID,
353 const int& leftPosition,
354 const int& rightRefID,
355 const int& rightPosition)
356 \brief Sets a target region of interest.
358 This is an overloaded function.
360 \param leftRefID referenceID of region's left boundary
361 \param leftPosition position of region's left boundary
362 \param rightRefID reference ID of region's right boundary
363 \param rightPosition position of region's right boundary
365 \returns \c true if reader was able to jump successfully to the region's left boundary
366 \sa HasIndex(), Jump()
368 bool BamReader::SetRegion(const int& leftRefID,
369 const int& leftBound,
370 const int& rightRefID,
371 const int& rightBound)
373 return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );