X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamMultiReader.h;h=27745628cb0f0dabf3c819d2eb659b15aca5a33b;hb=574a2bfb36f7107529e7ccda0f75e70a493460e5;hp=fa28bcd39fd3193f103c83ad09215829c62a8b2f;hpb=89028c85b3db7b4eb55c40fabfcb9c55a0d168d9;p=bamtools.git diff --git a/src/api/BamMultiReader.h b/src/api/BamMultiReader.h index fa28bcd..2774562 100644 --- a/src/api/BamMultiReader.h +++ b/src/api/BamMultiReader.h @@ -1,135 +1,127 @@ -// *************************************************************************** -// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 18 September 2010 (DB) -// --------------------------------------------------------------------------- -// Functionality for simultaneously reading multiple BAM files -// *************************************************************************** - -#ifndef BAMMULTIREADER_H -#define BAMMULTIREADER_H - -#include -#include -#include -#include -#include "BamReader.h" - -namespace BamTools { - -// index mapping reference/position pairings to bamreaders and their alignments -typedef std::multimap, std::pair > AlignmentIndex; - -class BamMultiReader { - - // constructor / destructor - public: - BamMultiReader(void); - ~BamMultiReader(void); - - // public interface - public: - - // positioning - int CurrentRefID; - int CurrentLeft; - - // region under analysis, specified using SetRegion - BamRegion Region; - - // ---------------------- - // BAM file operations - // ---------------------- - - // close BAM files - void Close(void); - - // opens BAM files (and optional BAM index files, if provided) - // @openIndexes - triggers index opening, useful for suppressing - // error messages during merging of files in which we may not have - // indexes. - // @coreMode - setup our first alignments using GetNextAlignmentCore(); - // also useful for merging - // @preferStandardIndex - look for standard BAM index ".bai" first. If false, - // will look for BamTools index ".bti". - bool Open(const std::vector& filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = false); - - // returns whether underlying BAM readers ALL have an index loaded - // this is useful to indicate whether Jump() or SetRegion() are possible - bool IsIndexLoaded(void) const; - - // performs random-access jump to reference, position - bool Jump(int refID, int position = 0); - - // sets the target region - bool SetRegion(const BamRegion& region); - bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above - - // returns file pointers to beginning of alignments - bool Rewind(void); - - // ---------------------- - // access alignment data - // ---------------------- - // updates the reference id marker to match the lower limit of our readers - void UpdateReferenceID(void); - - // retrieves next available alignment (returns success/fail) from all files - bool GetNextAlignment(BamAlignment&); - // retrieves next available alignment (returns success/fail) from all files - // and populates the support data with information about the alignment - // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT - bool GetNextAlignmentCore(BamAlignment&); - // ... should this be private? - bool HasOpenReaders(void); - - // ---------------------- - // access auxiliary data - // ---------------------- - - // returns unified SAM header text for all files - const std::string GetHeaderText(void) const; - // returns number of reference sequences - const int GetReferenceCount(void) const; - // returns vector of reference objects - const BamTools::RefVector GetReferenceData(void) const; - // returns reference id (used for BamMultiReader::Jump()) for the given reference name - const int GetReferenceID(const std::string& refName) const; - // validates that we have a congruent set of BAM files that are aligned against the same reference sequences - void ValidateReaders() const; - - // ---------------------- - // BAM index operations - // ---------------------- - - // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai") - bool CreateIndexes(bool useStandardIndex = true); - - // sets the index caching mode for the readers - void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode); - - //const int GetReferenceID(const string& refName) const; - - // utility - void PrintFilenames(void); - void DumpAlignmentIndex(void); - void UpdateAlignments(void); // updates our alignment cache - - // private implementation - private: - - // the set of readers and alignments which we operate on, maintained throughout the life of this class - std::vector > readers; - - // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment - // when a reader reaches EOF, its entry is removed from this index - AlignmentIndex alignments; - - std::vector fileNames; -}; - -} // namespace BamTools - -#endif // BAMMULTIREADER_H +// *************************************************************************** +// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett +// Marth Lab, Department of Biology, Boston College +// --------------------------------------------------------------------------- +// Last modified: 14 January 2013 (DB) +// --------------------------------------------------------------------------- +// Convenience class for reading multiple BAM files. +// *************************************************************************** + +#ifndef BAMMULTIREADER_H +#define BAMMULTIREADER_H + +#include "api/api_global.h" +#include "api/BamReader.h" +#include +#include +#include +#include + +namespace BamTools { + +namespace Internal { + class BamMultiReaderPrivate; +} // namespace Internal + +class API_EXPORT BamMultiReader { + + // enums + public: + // possible merge order strategies + enum MergeOrder { RoundRobinMerge = 0 + , MergeByCoordinate + , MergeByName + }; + + // constructor / destructor + public: + BamMultiReader(void); + ~BamMultiReader(void); + + // public interface + public: + + // ---------------------- + // BAM file operations + // ---------------------- + + // closes all open BAM files + bool Close(void); + // close only the requested BAM file + bool CloseFile(const std::string& filename); + // returns list of filenames for all open BAM files + const std::vector Filenames(void) const; + // returns curent merge order strategy + BamMultiReader::MergeOrder GetMergeOrder(void) const; + // returns true if multireader has any open BAM files + bool HasOpenReaders(void) const; + // performs random-access jump within current BAM files + bool Jump(int refID, int position = 0); + // opens BAM files + bool Open(const std::vector& filenames); + // opens a single BAM file, adding to any other current BAM files + bool OpenFile(const std::string& filename); + // returns file pointers to beginning of alignments + bool Rewind(void); + // sets an explicit merge order, regardless of the BAM files' SO header tag + void SetExplicitMergeOrder(BamMultiReader::MergeOrder order); + // sets the target region of interest + bool SetRegion(const BamRegion& region); + // sets the target region of interest + bool SetRegion(const int& leftRefID, + const int& leftPosition, + const int& rightRefID, + const int& rightPosition); + + // ---------------------- + // access alignment data + // ---------------------- + + // retrieves next available alignment + bool GetNextAlignment(BamAlignment& alignment); + // retrieves next available alignment (without populating the alignment's string data fields) + bool GetNextAlignmentCore(BamAlignment& alignment); + + // ---------------------- + // access auxiliary data + // ---------------------- + + // returns unified SAM header for all files + SamHeader GetHeader(void) const; + // returns unified SAM header text for all files + std::string GetHeaderText(void) const; + // returns number of reference sequences + int GetReferenceCount(void) const; + // returns all reference sequence entries. + const BamTools::RefVector GetReferenceData(void) const; + // returns the ID of the reference with this name. + int GetReferenceID(const std::string& refName) const; + + // ---------------------- + // BAM index operations + // ---------------------- + + // creates index files for current BAM files + bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD); + // returns true if all BAM files have index data available + bool HasIndexes(void) const; + // looks for index files that match current BAM files + bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD); + // opens index files for current BAM files. + bool OpenIndexes(const std::vector& indexFilenames); + + // ---------------------- + // error handling + // ---------------------- + + // returns a human-readable description of the last error that occurred + std::string GetErrorString(void) const; + + // private implementation + private: + Internal::BamMultiReaderPrivate* d; +}; + +} // namespace BamTools + +#endif // BAMMULTIREADER_H