1 // ***************************************************************************
2 // BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 10 October 2011 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides read access to BAM files.
8 // ***************************************************************************
10 #include "api/BamReader.h"
11 #include "api/internal/BamReader_p.h"
12 using namespace BamTools;
13 using namespace BamTools::Internal;
22 /*! \class BamTools::BamReader
23 \brief Provides read access to BAM files.
26 /*! \fn BamReader::BamReader(void)
29 BamReader::BamReader(void)
30 : d(new BamReaderPrivate(this))
33 /*! \fn BamReader::~BamReader(void)
36 BamReader::~BamReader(void) {
41 /*! \fn bool BamReader::Close(void)
42 \brief Closes the current BAM file.
44 Also clears out all header and reference data.
46 \return \c true if file closed OK
49 bool BamReader::Close(void) {
53 /*! \fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)
54 \brief Creates an index file for current BAM file.
56 \param[in] type file format to create, see BamIndex::IndexType for available formats
57 \return \c true if index created OK
58 \sa LocateIndex(), OpenIndex()
60 bool BamReader::CreateIndex(const BamIndex::IndexType& type) {
61 return d->CreateIndex(type);
64 /*! \fn std::string BamReader::GetErrorString(void) const
65 \brief Returns a human-readable description of the last error that occurred
67 This method allows elimination of STDERR pollution. Developers of client code
68 may choose how the messages are displayed to the user, if at all.
70 \return error description
72 string BamReader::GetErrorString(void) const {
73 return d->GetErrorString();
76 /*! \fn const std::string BamReader::GetFilename(void) const
77 \brief Returns name of current BAM file.
79 Retrieved filename will contain whatever was passed via Open().
80 If you need full directory paths here, be sure to include them
81 when you open the BAM file.
83 \returns name of open BAM file. If no file is open, returns an empty string.
86 const std::string BamReader::GetFilename(void) const {
90 /*! \fn SamHeader BamReader::GetHeader(void) const
91 \brief Returns SAM header data.
93 Header data is wrapped in a SamHeader object that can be conveniently queried & modified.
95 \note Modifying the retrieved SamHeader object does NOT affect the
96 current BAM file. This file has been opened in a read-only mode.
97 However, your modified SamHeader object can be used in conjunction with
98 BamWriter to generate a new BAM file with the appropriate header information.
100 \returns header data object
103 SamHeader BamReader::GetHeader(void) const {
104 return d->GetSamHeader();
107 /*! \fn std::string BamReader::GetHeaderText(void) const
108 \brief Returns SAM header data, as SAM-formatted text.
110 \note Modifying the retrieved text does NOT affect the current
111 BAM file. This file has been opened in a read-only mode. However,
112 your modified header text can be used in conjunction with BamWriter
113 to generate a new BAM file with the appropriate header information.
115 \returns SAM-formatted header text
118 std::string BamReader::GetHeaderText(void) const {
119 return d->GetHeaderText();
122 /*! \fn bool BamReader::GetNextAlignment(BamAlignment& alignment)
123 \brief Retrieves next available alignment.
125 Attempts to read the next alignment record from BAM file, and checks to see
126 if it overlaps the current region. If no region is currently set, then the
127 next alignment available is always considered valid.
129 If a region has been set, via Jump() or SetRegion(), an alignment is only
130 considered valid if it overlaps the region. If the actual 'next' alignment record
131 in the BAM file does not overlap this region, then this function will read sequentially
132 through the file until the next alignment that overlaps this region is found.
133 Once the region has been exhausted (i.e. the next alignment loaded is beyond the region),
134 the function aborts and returns \c false. In this case, there is no point to continue
135 reading, assuming properly sorted alignments.
137 This function fully populates all of the alignment's available data fields,
138 including the string data fields (read name, bases, qualities, tags, filename).
139 If only positional data (refID, position, CIGAR ops, alignment flags, etc.)
140 are required, consider using GetNextAlignmentCore() for a significant
143 \param[out] alignment destination for alignment record data
144 \returns \c true if a valid alignment was found
146 bool BamReader::GetNextAlignment(BamAlignment& alignment) {
147 return d->GetNextAlignment(alignment);
150 /*! \fn bool BamReader::GetNextAlignmentCore(BamAlignment& alignment)
151 \brief Retrieves next available alignment, without populating the alignment's string data fields.
153 Equivalent to GetNextAlignment() with respect to what is a valid overlapping alignment.
155 However, this method does NOT populate the alignment's string data fields
156 (read name, bases, qualities, tags, filename). This provides a boost in speed
157 when these fields are not required for every alignment. These fields can be
158 populated 'lazily' (as needed) by calling BamAlignment::BuildCharData() later.
160 \param[out] alignment destination for alignment record data
161 \returns \c true if a valid alignment was found
164 bool BamReader::GetNextAlignmentCore(BamAlignment& alignment) {
165 return d->GetNextAlignmentCore(alignment);
168 /*! \fn int BamReader::GetReferenceCount(void) const
169 \brief Returns number of reference sequences.
171 int BamReader::GetReferenceCount(void) const {
172 return d->GetReferenceCount();
175 /*! \fn const RefVector& BamReader::GetReferenceData(void) const
176 \brief Returns all reference sequence entries.
179 const RefVector& BamReader::GetReferenceData(void) const {
180 return d->GetReferenceData();
183 /*! \fn int BamReader::GetReferenceID(const std::string& refName) const
184 \brief Returns the ID of the reference with this name.
186 If \a refName is not found, returns -1.
188 \param[in] refName name of reference to look up
190 int BamReader::GetReferenceID(const std::string& refName) const {
191 return d->GetReferenceID(refName);
194 /*! \fn bool BamReader::HasIndex(void) const
195 \brief Returns \c true if index data is available.
197 bool BamReader::HasIndex(void) const {
198 return d->HasIndex();
201 /*! \fn bool BamReader::IsOpen(void) const
202 \brief Returns \c true if a BAM file is open for reading.
204 bool BamReader::IsOpen(void) const {
208 /*! \fn bool BamReader::Jump(int refID, int position)
209 \brief Performs a random-access jump within BAM file.
211 This is a convenience method, equivalent to calling SetRegion()
212 with only a left boundary specified.
214 \param[in] refID left-bound reference ID
215 \param[in] position left-bound position
217 \returns \c true if jump was successful
220 bool BamReader::Jump(int refID, int position) {
221 return d->SetRegion( BamRegion(refID, position) );
224 /*! \fn bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType)
225 \brief Looks in BAM file's directory for a matching index file.
227 Use this function when you need an index file, and perhaps have a
228 preferred index format, but do not depend heavily on which format
229 actually gets loaded at runtime.
231 This function will defer to your \a preferredType whenever possible.
232 However, if an index file of \a preferredType can not be found, then
233 it will look for any other index file that corresponds to this BAM file.
235 If you want precise control over which index file is loaded, use OpenIndex()
236 with the desired index filename. If that function returns false, you can use
237 CreateIndex() to then build an index of the exact requested format.
239 \param[in] preferredType desired index file format, see BamIndex::IndexType for available formats
241 \returns \c true if (any) index file could be found
243 bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType) {
244 return d->LocateIndex(preferredType);
247 /*! \fn bool BamReader::Open(const std::string& filename)
248 \brief Opens a BAM file.
250 If BamReader is already opened on another file, this function closes
251 that file, then attempts to open requested \a filename.
253 \param[in] filename name of BAM file to open
255 \returns \c true if BAM file was opened successfully
256 \sa Close(), IsOpen(), OpenIndex()
258 bool BamReader::Open(const std::string& filename) {
259 return d->Open(filename);
262 /*! \fn bool BamReader::OpenIndex(const std::string& indexFilename)
263 \brief Opens a BAM index file.
265 \param[in] indexFilename name of BAM index file to open
267 \returns \c true if BAM index file was opened & data loaded successfully
268 \sa LocateIndex(), Open(), SetIndex()
270 bool BamReader::OpenIndex(const std::string& indexFilename) {
271 return d->OpenIndex(indexFilename);
274 /*! \fn bool BamReader::Rewind(void)
275 \brief Returns the internal file pointer to the first alignment record.
277 Useful for performing multiple sequential passes through a BAM file.
278 Calling this function clears any prior region that may have been set.
280 \note This function sets the file pointer to first alignment record
281 in the BAM file, NOT the beginning of the file.
283 \returns \c true if rewind operation was successful
284 \sa Jump(), SetRegion()
286 bool BamReader::Rewind(void) {
290 /*! \fn void BamReader::SetIndex(BamIndex* index)
291 \brief Sets a custom BamIndex on this reader.
293 Only necessary for custom BamIndex subclasses. Most clients should
294 never have to use this function.
299 reader.SetIndex(new MyCustomBamIndex);
302 \note BamReader takes ownership of \a index - i.e. the BamReader will
303 take care of deleting it when the reader is destructed, when the current
304 BAM file is closed, or when a new index is requested.
306 \param[in] index custom BamIndex subclass created by client
307 \sa CreateIndex(), LocateIndex(), OpenIndex()
309 void BamReader::SetIndex(BamIndex* index) {
313 /*! \fn bool BamReader::SetRegion(const BamRegion& region)
314 \brief Sets a target region of interest
316 Requires that index data be available. Attempts a random-access
317 jump in the BAM file, near \a region left boundary position.
319 Subsequent calls to GetNextAlignment() or GetNextAlignmentCore()
320 will only return \c true when alignments can be found that overlap
323 A \a region with no right boundary is considered open-ended, meaning
324 that all alignments that lie downstream of the left boundary are
325 considered valid, continuing to the end of the BAM file.
327 \warning BamRegion now represents a zero-based, HALF-OPEN interval.
328 In previous versions of BamTools (0.x & 1.x) all intervals were treated
329 as zero-based, CLOSED.
331 \param[in] region desired region-of-interest to activate
333 \returns \c true if reader was able to jump successfully to the region's left boundary
334 \sa HasIndex(), Jump()
336 bool BamReader::SetRegion(const BamRegion& region) {
337 return d->SetRegion(region);
340 /*! \fn bool BamReader::SetRegion(const int& leftRefID,
341 const int& leftPosition,
342 const int& rightRefID,
343 const int& rightPosition)
344 \brief Sets a target region of interest.
346 This is an overloaded function.
348 \warning This function expects a zero-based, HALF-OPEN interval.
349 In previous versions of BamTools (0.x & 1.x) all intervals were treated
350 as zero-based, CLOSED.
352 \param[in] leftRefID referenceID of region's left boundary
353 \param[in] leftPosition position of region's left boundary
354 \param[in] rightRefID reference ID of region's right boundary
355 \param[in] rightPosition position of region's right boundary
357 \returns \c true if reader was able to jump successfully to the region's left boundary
358 \sa HasIndex(), Jump()
360 bool BamReader::SetRegion(const int& leftRefID,
361 const int& leftBound,
362 const int& rightRefID,
363 const int& rightBound)
365 return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );