1 // ***************************************************************************
2 // BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 18 November 2012 (DB)
6 // ---------------------------------------------------------------------------
7 // Provides read access to BAM files.
8 // ***************************************************************************
10 #include "api/BamReader.h"
11 #include "api/internal/bam/BamReader_p.h"
12 using namespace BamTools;
13 using namespace BamTools::Internal;
22 /*! \class BamTools::BamReader
23 \brief Provides read access to BAM files.
26 /*! \fn BamReader::BamReader(void)
29 BamReader::BamReader(void)
30 : d(new BamReaderPrivate(this))
33 /*! \fn BamReader::~BamReader(void)
36 BamReader::~BamReader(void) {
41 /*! \fn bool BamReader::Close(void)
42 \brief Closes the current BAM file.
44 Also clears out all header and reference data.
46 \return \c true if file closed OK
49 bool BamReader::Close(void) {
53 /*! \fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)
54 \brief Creates an index file for current BAM file.
56 \param[in] type file format to create, see BamIndex::IndexType for available formats
57 \return \c true if index created OK
58 \sa LocateIndex(), OpenIndex()
60 bool BamReader::CreateIndex(const BamIndex::IndexType& type) {
61 return d->CreateIndex(type);
64 /*! \fn const SamHeader& BamReader::GetConstSamHeader(void) const
65 \brief Returns const reference to SAM header data.
67 Allows for read-only queries of SAM header data.
69 If you do not need to modify the SAM header, use this method to avoid the
70 potentially expensive copy used by GetHeader().
73 \returns const reference to header data object
74 \sa GetHeader(), GetHeaderText()
76 const SamHeader& BamReader::GetConstSamHeader(void) const {
77 return d->GetConstSamHeader();
80 /*! \fn std::string BamReader::GetErrorString(void) const
81 \brief Returns a human-readable description of the last error that occurred
83 This method allows elimination of STDERR pollution. Developers of client code
84 may choose how the messages are displayed to the user, if at all.
86 \return error description
88 string BamReader::GetErrorString(void) const {
89 return d->GetErrorString();
92 /*! \fn const std::string BamReader::GetFilename(void) const
93 \brief Returns name of current BAM file.
95 Retrieved filename will contain whatever was passed via Open().
96 If you need full directory paths here, be sure to include them
97 when you open the BAM file.
99 \returns name of open BAM file. If no file is open, returns an empty string.
102 const std::string BamReader::GetFilename(void) const {
103 return d->Filename();
106 /*! \fn SamHeader BamReader::GetHeader(void) const
107 \brief Returns SAM header data.
109 Header data is wrapped in a SamHeader object that can be conveniently queried and/or modified.
110 If you only need read access, consider using GetConstSamHeader() instead.
112 \note Modifying the retrieved SamHeader object does NOT affect the
113 current BAM file. This file has been opened in a read-only mode.
114 However, your modified SamHeader object can be used in conjunction with
115 BamWriter to generate a new BAM file with the appropriate header information.
117 \returns header data object
118 \sa GetConstSamHeader(), GetHeaderText()
120 SamHeader BamReader::GetHeader(void) const {
121 return d->GetSamHeader();
124 /*! \fn std::string BamReader::GetHeaderText(void) const
125 \brief Returns SAM header data, as SAM-formatted text.
127 \note Modifying the retrieved text does NOT affect the current
128 BAM file. This file has been opened in a read-only mode. However,
129 your modified header text can be used in conjunction with BamWriter
130 to generate a new BAM file with the appropriate header information.
132 \returns SAM-formatted header text
135 std::string BamReader::GetHeaderText(void) const {
136 return d->GetHeaderText();
139 /*! \fn bool BamReader::GetNextAlignment(BamAlignment& alignment)
140 \brief Retrieves next available alignment.
142 Attempts to read the next alignment record from BAM file, and checks to see
143 if it overlaps the current region. If no region is currently set, then the
144 next alignment available is always considered valid.
146 If a region has been set, via Jump() or SetRegion(), an alignment is only
147 considered valid if it overlaps the region. If the actual 'next' alignment record
148 in the BAM file does not overlap this region, then this function will read sequentially
149 through the file until the next alignment that overlaps this region is found.
150 Once the region has been exhausted (i.e. the next alignment loaded is beyond the region),
151 the function aborts and returns \c false. In this case, there is no point to continue
152 reading, assuming properly sorted alignments.
154 This function fully populates all of the alignment's available data fields,
155 including the string data fields (read name, bases, qualities, tags, filename).
156 If only positional data (refID, position, CIGAR ops, alignment flags, etc.)
157 are required, consider using GetNextAlignmentCore() for a significant
160 \param[out] alignment destination for alignment record data
161 \returns \c true if a valid alignment was found
163 bool BamReader::GetNextAlignment(BamAlignment& alignment) {
164 return d->GetNextAlignment(alignment);
167 /*! \fn bool BamReader::GetNextAlignmentCore(BamAlignment& alignment)
168 \brief Retrieves next available alignment, without populating the alignment's string data fields.
170 Equivalent to GetNextAlignment() with respect to what is a valid overlapping alignment.
172 However, this method does NOT populate the alignment's string data fields
173 (read name, bases, qualities, tags, filename). This provides a boost in speed
174 when these fields are not required for every alignment. These fields can be
175 populated 'lazily' (as needed) by calling BamAlignment::BuildCharData() later.
177 \param[out] alignment destination for alignment record data
178 \returns \c true if a valid alignment was found
181 bool BamReader::GetNextAlignmentCore(BamAlignment& alignment) {
182 return d->GetNextAlignmentCore(alignment);
185 /*! \fn int BamReader::GetReferenceCount(void) const
186 \brief Returns number of reference sequences.
188 int BamReader::GetReferenceCount(void) const {
189 return d->GetReferenceCount();
192 /*! \fn const RefVector& BamReader::GetReferenceData(void) const
193 \brief Returns all reference sequence entries.
196 const RefVector& BamReader::GetReferenceData(void) const {
197 return d->GetReferenceData();
200 /*! \fn int BamReader::GetReferenceID(const std::string& refName) const
201 \brief Returns the ID of the reference with this name.
203 If \a refName is not found, returns -1.
205 \param[in] refName name of reference to look up
207 int BamReader::GetReferenceID(const std::string& refName) const {
208 return d->GetReferenceID(refName);
211 /*! \fn bool BamReader::HasIndex(void) const
212 \brief Returns \c true if index data is available.
214 bool BamReader::HasIndex(void) const {
215 return d->HasIndex();
218 /*! \fn bool BamReader::IsOpen(void) const
219 \brief Returns \c true if a BAM file is open for reading.
221 bool BamReader::IsOpen(void) const {
225 /*! \fn bool BamReader::Jump(int refID, int position)
226 \brief Performs a random-access jump within BAM file.
228 This is a convenience method, equivalent to calling SetRegion()
229 with only a left boundary specified.
231 \param[in] refID left-bound reference ID
232 \param[in] position left-bound position
234 \returns \c true if jump was successful
237 bool BamReader::Jump(int refID, int position) {
238 return d->SetRegion( BamRegion(refID, position) );
241 /*! \fn bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType)
242 \brief Looks in BAM file's directory for a matching index file.
244 Use this function when you need an index file, and perhaps have a
245 preferred index format, but do not depend heavily on which format
246 actually gets loaded at runtime.
248 This function will defer to your \a preferredType whenever possible.
249 However, if an index file of \a preferredType can not be found, then
250 it will look for any other index file that corresponds to this BAM file.
252 If you want precise control over which index file is loaded, use OpenIndex()
253 with the desired index filename. If that function returns false, you can use
254 CreateIndex() to then build an index of the exact requested format.
256 \param[in] preferredType desired index file format, see BamIndex::IndexType for available formats
258 \returns \c true if (any) index file could be found
260 bool BamReader::LocateIndex(const BamIndex::IndexType& preferredType) {
261 return d->LocateIndex(preferredType);
264 /*! \fn bool BamReader::Open(const std::string& filename)
265 \brief Opens a BAM file.
267 If BamReader is already opened on another file, this function closes
268 that file, then attempts to open requested \a filename.
270 \param[in] filename name of BAM file to open
272 \returns \c true if BAM file was opened successfully
273 \sa Close(), IsOpen(), OpenIndex()
275 bool BamReader::Open(const std::string& filename) {
276 return d->Open(filename);
279 /*! \fn bool BamReader::OpenIndex(const std::string& indexFilename)
280 \brief Opens a BAM index file.
282 \param[in] indexFilename name of BAM index file to open
284 \returns \c true if BAM index file was opened & data loaded successfully
285 \sa LocateIndex(), Open(), SetIndex()
287 bool BamReader::OpenIndex(const std::string& indexFilename) {
288 return d->OpenIndex(indexFilename);
291 /*! \fn bool BamReader::Rewind(void)
292 \brief Returns the internal file pointer to the first alignment record.
294 Useful for performing multiple sequential passes through a BAM file.
295 Calling this function clears any prior region that may have been set.
297 \note This function sets the file pointer to first alignment record
298 in the BAM file, NOT the beginning of the file.
300 \returns \c true if rewind operation was successful
301 \sa Jump(), SetRegion()
303 bool BamReader::Rewind(void) {
307 /*! \fn void BamReader::SetIndex(BamIndex* index)
308 \brief Sets a custom BamIndex on this reader.
310 Only necessary for custom BamIndex subclasses. Most clients should
311 never have to use this function.
316 reader.SetIndex(new MyCustomBamIndex);
319 \note BamReader takes ownership of \a index - i.e. the BamReader will
320 take care of deleting it when the reader is destructed, when the current
321 BAM file is closed, or when a new index is requested.
323 \param[in] index custom BamIndex subclass created by client
324 \sa CreateIndex(), LocateIndex(), OpenIndex()
326 void BamReader::SetIndex(BamIndex* index) {
330 /*! \fn bool BamReader::SetRegion(const BamRegion& region)
331 \brief Sets a target region of interest
333 Requires that index data be available. Attempts a random-access
334 jump in the BAM file, near \a region left boundary position.
336 Subsequent calls to GetNextAlignment() or GetNextAlignmentCore()
337 will only return \c true when alignments can be found that overlap
340 A \a region with no right boundary is considered open-ended, meaning
341 that all alignments that lie downstream of the left boundary are
342 considered valid, continuing to the end of the BAM file.
344 \warning BamRegion now represents a zero-based, HALF-OPEN interval.
345 In previous versions of BamTools (0.x & 1.x) all intervals were treated
346 as zero-based, CLOSED.
348 \param[in] region desired region-of-interest to activate
350 \returns \c true if reader was able to jump successfully to the region's left boundary
351 \sa HasIndex(), Jump()
353 bool BamReader::SetRegion(const BamRegion& region) {
354 return d->SetRegion(region);
357 /*! \fn bool BamReader::SetRegion(const int& leftRefID,
358 const int& leftPosition,
359 const int& rightRefID,
360 const int& rightPosition)
361 \brief Sets a target region of interest.
363 This is an overloaded function.
365 \warning This function expects a zero-based, HALF-OPEN interval.
366 In previous versions of BamTools (0.x & 1.x) all intervals were treated
367 as zero-based, CLOSED.
369 \param[in] leftRefID referenceID of region's left boundary
370 \param[in] leftPosition position of region's left boundary
371 \param[in] rightRefID reference ID of region's right boundary
372 \param[in] rightPosition position of region's right boundary
374 \returns \c true if reader was able to jump successfully to the region's left boundary
375 \sa HasIndex(), Jump()
377 bool BamReader::SetRegion(const int& leftRefID,
378 const int& leftBound,
379 const int& rightRefID,
380 const int& rightBound)
382 return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );