// ***************************************************************************
// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
- // Last modified: 15 March 2011 (DB)
+ // Last modified: 1 October 2011 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
//
// ***************************************************************************
// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
- // Last modified: 15 March 2011 (DB)
+ // Last modified: 1 October 2011 (DB)
// ---------------------------------------------------------------------------
// Convenience class for reading multiple BAM files.
// ***************************************************************************
// ***************************************************************************
// SamReadGroupDictionary.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
- // Last modified: 18 April 2011 (DB)
+ // Last modified: 1 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides methods for operating on a collection of SamReadGroup entries.
// ***************************************************************************
// ***************************************************************************
// SamReadGroupDictionary.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
- // Last modified: 18 April 2011 (DB)
+ // Last modified: 1 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides methods for operating on a collection of SamReadGroup entries.
// ***************************************************************************
// ***************************************************************************
// SamSequenceDictionary.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
- // Last modified: 18 April 2011 (DB)
+ // Last modified: 1 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides methods for operating on a collection of SamSequence entries.
// *************************************************************************
// ***************************************************************************
// SamSequenceDictionary.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
- // Last modified: 18 April 2011
+ // Last modified: 1 October 2011
// ---------------------------------------------------------------------------
// Provides methods for operating on a collection of SamSequence entries.
// ***************************************************************************
// ***************************************************************************
// BamMultiMerger_p.h (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
- // Last modified: 28 September 2011 (DB)
++// Last modified: 3 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides merging functionality for BamMultiReader. At this point, supports
// sorting results by (refId, position) or by read name.
// opens BAM files
bool BamMultiReaderPrivate::Open(const vector<string>& filenames) {
- // create alignment cache if neccessary
- if ( m_alignments == 0 ) {
- m_alignments = CreateMergerForCurrentSortOrder();
- if ( m_alignments == 0 ) return false;
- }
+ bool openedOk = true;
+
+ // put all current readers back at beginning
+ openedOk &= Rewind();
+ // put all current readers back at beginning (refreshes alignment cache)
+ Rewind();
+
// iterate over filenames
vector<string>::const_iterator filenameIter = filenames.begin();
vector<string>::const_iterator filenameEnd = filenames.end();
return result;
}
- ReaderAlignment BamMultiReaderPrivate::OpenReader(const string& filename, bool* ok) {
+
- // clear status flag
- *ok = false;
-
- // create new BamReader & BamAlignment
- BamReader* reader = new BamReader;
- BamAlignment* alignment = new BamAlignment;
+ bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
- // if reader opens OK
- if ( reader->Open(filename) ) {
+ // skip if no alignments available
+ if ( m_alignmentCache == 0 || m_alignmentCache->IsEmpty() )
+ return false;
- // if first alignment reads OK
- if ( LoadNextAlignment(reader, alignment) ) {
- *ok = true;
- return make_pair(reader, alignment);
- }
+ // pop next merge item entry from cache
+ MergeItem item = m_alignmentCache->TakeFirst();
+ BamReader* reader = item.Reader;
+ BamAlignment* alignment = item.Alignment;
+ if ( reader == 0 || alignment == 0 )
+ return false;
- // could not read alignment
- else {
- cerr << "BamMultiReader WARNING: Could not read first alignment from "
- << filename << ", ignoring file" << endl;
- }
+ // set char data if requested
+ if ( needCharData ) {
+ alignment->BuildCharData();
+ alignment->Filename = reader->GetFilename();
}
- // reader could not open
- else {
- cerr << "BamMultiReader WARNING: Could not open "
- << filename << ", ignoring file" << endl;
- }
+ // store cached alignment into destination parameter (by copy)
+ al = *alignment;
- // if we get here, there was a problem with this BAM file (opening or reading)
- // clean up memory allocation & return null pointer
- delete reader;
- delete alignment;
- return ReaderAlignment();
- }
+ // load next alignment from reader & store in cache
+ SaveNextAlignment(reader, alignment);
- // print associated filenames to stdout
- void BamMultiReaderPrivate::PrintFilenames(void) const {
- const vector<string>& filenames = Filenames();
- vector<string>::const_iterator filenameIter = filenames.begin();
- vector<string>::const_iterator filenameEnd = filenames.end();
- for ( ; filenameIter != filenameEnd; ++filenameIter )
- cout << (*filenameIter) << endl;
+ // return success
+ return true;
}
+bool BamMultiReaderPrivate::PopNextCachedAlignment(BamAlignment& al, const bool needCharData) {
+
+ // bail out if no more data to process
+ if ( !HasAlignmentData() )
+ return false;
+
+ // pop next reader/alignment pair
+ ReaderAlignment nextReaderAlignment = m_alignments->TakeFirst();
+ BamReader* reader = nextReaderAlignment.first;
+ BamAlignment* alignment = nextReaderAlignment.second;
+
+ // store cached alignment into destination parameter (by copy)
+ al = *alignment;
+
+ // set char data if requested
+ if ( needCharData ) {
+ al.BuildCharData();
+ al.Filename = reader->GetFilename();
+ }
+
+ // load next alignment from reader & store in cache
+ SaveNextAlignment(reader, alignment);
+
+ // return success
+ return true;
+}
+
// returns BAM file pointers to beginning of alignment data & resets alignment cache
bool BamMultiReaderPrivate::Rewind(void) {
namespace BamTools {
namespace Internal {
- class IBamMultiMerger;
-
class BamMultiReaderPrivate {
+ // typedefs
+ public:
+ typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
+
// constructor / destructor
public:
BamMultiReaderPrivate(void);
// 'internal' methods
public:
- IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const;
- const std::string ExtractReadGroup(const std::string& headerLine) const;
- bool HasAlignmentData(void) const;
- bool LoadNextAlignment(BamReader* reader, BamAlignment* alignment);
- ReaderAlignment OpenReader(const std::string& filename, bool* ok);
++
+ IMultiMerger* CreateAlignmentCache(void) const;
bool PopNextCachedAlignment(BamAlignment& al, const bool needCharData);
bool RewindReaders(void);
void SaveNextAlignment(BamReader* reader, BamAlignment* alignment);
// ***************************************************************************
// bamtools_sort.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
- // Last modified: 28 September 2011 (DB)
+ // Last modified: 3 October 2011 (DB)
// ---------------------------------------------------------------------------
// Sorts an input BAM file
// ***************************************************************************