1 // ***************************************************************************
2 // BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 4 March 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides read access to BAM files.
8 // ***************************************************************************
10 #include <api/BamReader.h>
11 #include <api/internal/BamReader_p.h>
12 using namespace BamTools;
13 using namespace BamTools::Internal;
22 /*! \class BamTools::BamReader
23 \brief Provides read access to BAM files.
26 /*! \fn BamReader::BamReader(void)
29 BamReader::BamReader(void)
30 : d(new BamReaderPrivate(this))
33 /*! \fn BamReader::~BamReader(void)
36 BamReader::~BamReader(void) {
41 /*! \fn void BamReader::Close(void)
42 \brief Closes the current BAM file.
44 Also clears out all header and reference data.
48 void BamReader::Close(void) {
52 /*! \fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)
53 \brief Creates an index file for current BAM file.
55 \param type file format to create, see BamIndex::IndexType for available formats
56 \return \c true if index created OK
57 \sa LocateIndex(), OpenIndex()
59 bool BamReader::CreateIndex(const BamIndex::IndexType& type) {
60 return d->CreateIndex(type);
63 /*! \fn const std::string BamReader::GetFilename(void) const
64 \brief Returns name of current BAM file.
66 Retrieved filename will contain whatever was passed via Open().
67 If you need full directory paths here, be sure to include them
68 when you open the BAM file.
70 \returns name of open BAM file. If no file is open, returns an empty string.
73 const std::string BamReader::GetFilename(void) const {
77 /*! \fn SamHeader BamReader::GetHeader(void) const
78 \brief Returns SAM header data.
80 Header data is wrapped in a SamHeader object that can be conveniently queried & modified.
82 N.B. - Modifying the retrieved SamHeader object does NOT affect the
83 current BAM file. This file has been opened in a read-only mode.
84 However, your modified SamHeader object can be used in conjunction with
85 BamWriter to generate a new BAM file with the appropriate header information.
87 \returns header data object
90 SamHeader BamReader::GetHeader(void) const {
91 return d->GetSamHeader();
94 /*! \fn std::string BamReader::GetHeaderText(void) const
95 \brief Returns SAM header data, as SAM-formatted text.
97 N.B. - Modifying the retrieved text does NOT affect the current
98 BAM file. This file has been opened in a read-only mode. However,
99 your modified header text can be used in conjunction with BamWriter
100 to generate a new BAM file with the appropriate header information.
102 \returns SAM-formatted header text
105 std::string BamReader::GetHeaderText(void) const {
106 return d->GetHeaderText();
109 /*! \fn bool BamReader::GetNextAlignment(BamAlignment& alignment)
110 \brief Retrieves next available alignment.
112 Attempts to read the next alignment record from BAM file, and checks to see
113 if it overlaps the current region. If no region is currently set, then the
114 next alignment available is always considered valid.
116 If a region has been set, via Jump() or SetRegion(), an alignment is only
117 considered valid if it overlaps the region. If the actual 'next' alignment record
118 in the BAM file does not overlap this region, then this function will read sequentially
119 through the file until the next alignment that overlaps this region is found.
120 Once the region has been exhausted (i.e. the next alignment loaded is beyond the region),
121 the function aborts and returns \c false. In this case, there is no point to continue
122 reading, assuming properly sorted alignments.
124 This function fully populates all of the alignment's available data fields,
125 including the string data fields (read name, bases, qualities, tags, filename).
126 If only positional data (refID, position, CIGAR ops, alignment flags, etc.)
127 are required, consider using GetNextAlignmentCore() for a significant
130 \param alignment destination for alignment record data
131 \returns \c true if a valid alignment was found
133 bool BamReader::GetNextAlignment(BamAlignment& alignment) {
134 return d->GetNextAlignment(alignment);
137 /*! \fn bool BamReader::GetNextAlignmentCore(BamAlignment& alignment)
138 \brief Retrieves next available alignment, without populating the alignment's string data fields.
140 Equivalent to GetNextAlignment() with respect to what is a valid overlapping alignment.
142 However, this method does NOT populate the alignment's string data fields
143 (read name, bases, qualities, tags, filename). This provides a boost in speed
144 when these fields are not required for every alignment. These fields can be
145 populated 'lazily' (as needed) by calling BamAlignment::BuildCharData() later.
147 \param alignment destination for alignment record data
148 \returns \c true if a valid alignment was found
151 bool BamReader::GetNextAlignmentCore(BamAlignment& alignment) {
152 return d->GetNextAlignmentCore(alignment);
155 /*! \fn int BamReader::GetReferenceCount(void) const
156 \brief Returns number of reference sequences.
158 int BamReader::GetReferenceCount(void) const {
159 return d->GetReferenceCount();
162 /*! \fn const RefVector& BamReader::GetReferenceData(void) const
163 \brief Returns all reference sequence entries.
166 const RefVector& BamReader::GetReferenceData(void) const {
167 return d->GetReferenceData();
170 /*! \fn int BamReader::GetReferenceID(const std::string& refName) const
171 \brief Returns the ID of the reference with this name.
173 If \a refName is not found, returns -1.
175 int BamReader::GetReferenceID(const std::string& refName) const {
176 return d->GetReferenceID(refName);
179 /*! \fn bool BamReader::HasIndex(void) const
180 \brief Returns \c true if index data is available.
182 bool BamReader::HasIndex(void) const {
183 return d->HasIndex();
186 /*! \fn bool BamReader::IsIndexLoaded(void) const
187 \brief Returns \c true if index data is available.
189 \deprecated Instead use HasIndex()
191 Deprecated purely for API semantic clarity - HasIndex() should be clearer
192 than IsIndexLoaded() in light of the new caching modes that may clear the
193 index data from memory, but leave the index file open for later random access
196 For example, what would (IsIndexLoaded() == true) mean when cacheMode has been
197 explicitly set to NoIndexCaching? This is confusing at best, misleading about
198 current memory behavior at worst.
201 bool BamReader::IsIndexLoaded(void) const {
202 return d->HasIndex();
205 /*! \fn bool BamReader::IsOpen(void) const
206 \brief Returns \c true if a BAM file is open for reading.
208 bool BamReader::IsOpen(void) const {
212 /*! \fn bool BamReader::Jump(int refID, int position)
213 \brief Performs a random-access jump within BAM file.
215 This is a convenience method, equivalent to calling SetRegion()
216 with only a left boundary specified.
218 \returns \c true if jump was successful
221 bool BamReader::Jump(int refID, int position) {
222 return d->SetRegion( BamRegion(refID, position) );
225 /*! \fn bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType)
226 \brief Looks in BAM file's directory for a matching index file.
228 Use this function when you need an index file, and perhaps have a
229 preferred index format, but do not depend heavily on which format
230 actually gets loaded at runtime.
232 This function will defer to your \a preferredType whenever possible.
233 However, if an index file of \a preferredType can not be found, then
234 it will look for any other index file that corresponds to this BAM file.
236 If you want precise control over which index file is loaded, use OpenIndex()
237 with the desired index filename. If that function returns false, you can use
238 CreateIndex() to then build an index of the exact requested format.
240 \param preferredType desired index file format, see BamIndex::IndexType for available formats
241 \returns \c true if (any) index file could be found
243 bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType) {
244 return d->LocateIndex(preferredType);
247 /*! \fn bool BamReader::Open(const std::string& filename)
248 \brief Opens a BAM file.
250 If BamReader is already opened on another file, this function closes
251 that file, then attempts to open requested \a filename.
253 \param filename name of BAM file to open
254 \returns \c true if BAM file was opened successfully
255 \sa Close(), IsOpen(), OpenIndex()
257 bool BamReader::Open(const std::string& filename) {
258 return d->Open(filename);
261 /*! \fn bool BamReader::OpenIndex(const std::string& indexFilename)
262 \brief Opens a BAM index file.
264 \param indexFilename name of BAM index file
266 \returns \c true if BAM index file was opened & data loaded successfully
267 \sa LocateIndex(), Open(), SetIndex()
269 bool BamReader::OpenIndex(const std::string& indexFilename) {
270 return d->OpenIndex(indexFilename);
273 /*! \fn bool BamReader::Rewind(void)
274 \brief Returns the internal file pointer to the first alignment record.
276 Useful for performing multiple sequential passes through a BAM file.
277 Calling this function clears any prior region that may have been set.
279 N.B. - Note that this function sets the file pointer to first alignment record
280 in the BAM file, NOT the beginning of the file.
282 \returns \c true if rewind operation was successful
283 \sa Jump(), SetRegion()
285 bool BamReader::Rewind(void) {
289 /*! \fn void BamReader::SetIndex(BamIndex* index)
290 \brief Sets a custom BamIndex on this reader.
292 Only necessary for custom BamIndex subclasses. Most clients should
293 never have to use this function.
298 reader.SetIndex(new MyCustomBamIndex);
301 N.B. - BamReader takes ownership of \a index - i.e. BamReader will
302 take care of deleting the pointer when the reader is destructed,
303 when the current BAM file is closed, or when a new index is requested.
305 \param index custom BamIndex subclass created by client
306 \sa CreateIndex(), LocateIndex(), OpenIndex()
308 void BamReader::SetIndex(BamIndex* index) {
312 /*! \fn void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)
313 \brief Changes the caching behavior of the index data.
315 Default mode is BamIndex::LimitedIndexCaching.
317 \param mode desired cache mode for index, see BamIndex::IndexCacheMode for
318 description of the available cache modes
321 void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {
322 d->SetIndexCacheMode(mode);
325 /*! \fn bool BamReader::SetRegion(const BamRegion& region)
326 \brief Sets a target region of interest
328 Requires that index data be available. Attempts a random-access
329 jump in the BAM file, near \a region left boundary position.
331 Subsequent calls to GetNextAlignment() or GetNextAlignmentCore()
332 will only return \c true when alignments can be found that overlap
335 A \a region with no right boundary is considered open-ended, meaning
336 that all alignments that lie downstream of the left boundary are
337 considered valid, continuing to the end of the BAM file.
339 \param region desired region-of-interest to activate
340 \returns \c true if reader was able to jump successfully to the region's left boundary
341 \sa HasIndex(), Jump()
343 bool BamReader::SetRegion(const BamRegion& region) {
344 return d->SetRegion(region);
347 /*! \fn bool BamReader::SetRegion(const int& leftRefID,
348 const int& leftPosition,
349 const int& rightRefID,
350 const int& rightPosition)
351 \brief Sets a target region of interest.
353 This is an overloaded function.
355 \param leftRefID referenceID of region's left boundary
356 \param leftPosition position of region's left boundary
357 \param rightRefID reference ID of region's right boundary
358 \param rightPosition position of region's right boundary
360 \returns \c true if reader was able to jump successfully to the region's left boundary
361 \sa HasIndex(), Jump()
363 bool BamReader::SetRegion(const int& leftRefID,
364 const int& leftBound,
365 const int& rightRefID,
366 const int& rightBound)
368 return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );