#include "bamtools_convert.h"
#include "bamtools_options.h"
+#include "bamtools_pileup.h"
#include "bamtools_utilities.h"
#include "BGZF.h"
#include "BamReader.h"
static const string FORMAT_FASTQ = "fastq";
static const string FORMAT_JSON = "json";
static const string FORMAT_SAM = "sam";
+ static const string FORMAT_PILEUP = "pileup";
static const string FORMAT_WIGGLE = "wig";
// other constants
bool HasFormat;
bool HasRegion;
+ // pileup flags
+ bool HasFastaFilename;
+ bool IsPrintingPileupMapQualities;
+
// options
vector<string> InputFiles;
string OutputFilename;
string Format;
string Region;
+
+ // pileup options
+ string FastaFilename;
// constructor
ConvertSettings(void)
, HasOutputFilename(false)
, HasFormat(false)
, HasRegion(false)
+ , HasFastaFilename(false)
+ , IsPrintingPileupMapQualities(false)
, OutputFilename(Options::StandardOut())
{ }
};
OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
+
+ OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
+ Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts, "");
+ Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
}
ConvertTool::~ConvertTool(void) {
bool ConvertTool::ConvertToolPrivate::Run(void) {
+ bool convertedOk = true;
+
// ------------------------------------
// initialize conversion input/output
if ( !m_settings->HasInputFilenames )
m_settings->InputFiles.push_back(Options::StandardIn());
- // open files
+ // open input files
BamMultiReader reader;
reader.Open(m_settings->InputFiles);
m_references = reader.GetReferenceData();
+ // set region if specified
BamRegion region;
- if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
- if ( !reader.SetRegion(region) ) {
- cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl;
+ if ( m_settings->HasRegion ) {
+ if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
+ if ( !reader.SetRegion(region) )
+ cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl;
}
}
- // if an output filename given, open outfile
+ // if output file given
ofstream outFile;
- if ( m_settings->HasOutputFilename ) {
+ if ( m_settings->HasOutputFilename ) {
+
+ // open output file stream
outFile.open(m_settings->OutputFilename.c_str());
- if (!outFile) { cerr << "Could not open " << m_settings->OutputFilename << " for output" << endl; return false; }
- m_out.rdbuf(outFile.rdbuf()); // set m_out to file's streambuf
+ if ( !outFile ) {
+ cerr << "Could not open " << m_settings->OutputFilename << " for output" << endl;
+ return false;
+ }
+
+ // set m_out to file's streambuf
+ m_out.rdbuf(outFile.rdbuf());
}
// ------------------------
- // determine format type
- bool formatError = false;
- bool convertedOk = true;
- void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0;
- if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed;
- else if ( m_settings->Format == FORMAT_BEDGRAPH ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBedGraph;
- else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta;
- else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq;
- else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson;
- else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam;
- else if ( m_settings->Format == FORMAT_WIGGLE ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintWiggle;
- else {
- cerr << "Unrecognized format: " << m_settings->Format << endl;
- cerr << "Please see help|README (?) for details on supported formats " << endl;
- formatError = true;
- convertedOk = false;
+ // pileup is special case
+ if ( m_settings->Format == FORMAT_PILEUP ) {
+
+ // initialize pileup input/output
+ Pileup pileup(&reader, &m_out);
+
+ // ---------------------------
+ // configure pileup settings
+
+ if ( m_settings->HasRegion )
+ pileup.SetRegion(region);
+
+ if ( m_settings->HasFastaFilename )
+ pileup.SetFastaFilename(m_settings->FastaFilename);
+
+ pileup.SetIsPrintingMapQualities( m_settings->IsPrintingPileupMapQualities );
+
+ // run pileup
+ convertedOk = pileup.Run();
}
- // ------------------------
- // do conversion
- if ( !formatError ) {
- BamAlignment a;
- while ( reader.GetNextAlignment(a) ) {
- (this->*pFunction)(a);
+ // -------------------------------------
+ // else determine 'simpler' format type
+ else {
+
+ bool formatError = false;
+ void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0;
+ if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed;
+ else if ( m_settings->Format == FORMAT_BEDGRAPH ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBedGraph;
+ else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta;
+ else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq;
+ else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson;
+ else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam;
+ else if ( m_settings->Format == FORMAT_WIGGLE ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintWiggle;
+ else {
+ cerr << "Unrecognized format: " << m_settings->Format << endl;
+ cerr << "Please see help|README (?) for details on supported formats " << endl;
+ formatError = true;
+ convertedOk = false;
+ }
+
+ // ------------------------
+ // do conversion
+ if ( !formatError ) {
+ BamAlignment a;
+ while ( reader.GetNextAlignment(a) ) {
+ (this->*pFunction)(a);
+ }
}
}
--- /dev/null
+#include <vector>
+#include "BamMultiReader.h"
+#include "bamtools_pileup.h"
+using namespace std;
+using namespace BamTools;
+
+struct Pileup::PileupPrivate {
+
+ // ---------------------
+ // data members
+
+ // IO & settings
+ BamMultiReader* Reader;
+ ostream* OutStream;
+ string FastaFilename;
+ bool IsPrintingMapQualities;
+ BamRegion Region;
+
+ // parsing data
+ int CurrentId;
+ int CurrentPosition;
+ vector<BamAlignment> CurrentData;
+ RefVector References;
+
+ // ----------------------
+ // ctor
+
+ PileupPrivate(BamMultiReader* reader, ostream* outStream)
+ : Reader(reader)
+ , OutStream(outStream)
+ , FastaFilename("")
+ , IsPrintingMapQualities(false)
+ { }
+
+ // ----------------------
+ // internal methods
+
+ void PrintCurrentData(void);
+ bool Run(void);
+};
+
+void Pileup::PileupPrivate::PrintCurrentData(void) {
+
+ // remove any data that ends before CurrentPosition
+ size_t i = 0;
+ while ( i < CurrentData.size() ) {
+ if ( CurrentData[i].GetEndPosition() < CurrentPosition )
+ CurrentData.erase(CurrentData.begin() + i);
+ else
+ ++i;
+ }
+
+ // if not data remains, return
+ if ( CurrentData.empty() ) return;
+
+ // initialize empty strings
+ string bases = "";
+ string baseQuals = "";
+ string mapQuals = "";
+
+ // iterate over alignments
+ vector<BamAlignment>::const_iterator dataIter = CurrentData.begin();
+ vector<BamAlignment>::const_iterator dataEnd = CurrentData.end();
+ for ( ; dataIter != dataEnd; ++dataIter ) {
+
+ // retrieve alignment
+ const BamAlignment& al = (*dataIter);
+
+ // determine current base character & store
+ const char base = al.AlignedBases[CurrentPosition -al.Position];
+ if ( al.IsReverseStrand() )
+ bases.push_back( tolower(base) );
+ else
+ bases.push_back( toupper(base) );
+
+ // determine current base quality & store
+ baseQuals.push_back( al.Qualities[CurrentPosition - al.Position] );
+
+ // if using mapQuals, determine current mapQual & store
+ if ( IsPrintingMapQualities ) {
+ int mapQuality = (int)(al.MapQuality + 33);
+ if ( mapQuality > 126 ) mapQuality = 126;
+ mapQuals.push_back((char)mapQuality);
+ }
+ }
+
+ // print results to OutStream
+ const string& refName = References[CurrentId].RefName;
+ const char refBase = 'N';
+
+ *OutStream << refName << "\t" << CurrentPosition << "\t" << refBase << "\t" << CurrentData.size() << "\t" << bases << "\t" << baseQuals;
+ if ( IsPrintingMapQualities ) *OutStream << "\t" << mapQuals;
+ *OutStream << endl;
+}
+
+bool Pileup::PileupPrivate::Run(void) {
+
+ // -----------------------------
+ // validate input & output
+
+ if ( !Reader ) {
+ cerr << "Pileup::Run() : Invalid multireader" << endl;
+ return false;
+ }
+
+ if ( !OutStream) {
+ cerr << "Pileup::Run() : Invalid output stream" << endl;
+ return false;
+ }
+
+ References = Reader->GetReferenceData();
+
+ // -----------------------------
+ // process input data
+
+ // get first entry
+ BamAlignment al;
+ if ( !Reader->GetNextAlignment(al) ) {
+ cerr << "Pileup::Run() : Could not read from multireader" << endl;
+ return false;
+ }
+
+ // set initial markers & store first entry
+ CurrentId = al.RefID;
+ CurrentPosition = al.Position;
+ CurrentData.clear();
+ CurrentData.push_back(al);
+
+ // iterate over remaining data
+ while ( Reader->GetNextAlignment(al) ) {
+
+ // if same reference
+ if ( al.RefID == CurrentId ) {
+
+ // if same position, store and move on
+ if ( al.Position == CurrentPosition )
+ CurrentData.push_back(al);
+
+ // if less than CurrentPosition - sorting error => ABORT
+ else if ( al.Position < CurrentPosition ) {
+ cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
+ return false;
+ }
+
+ // else print pileup data until 'catching up' to CurrentPosition
+ else {
+ while ( al.Position > CurrentPosition ) {
+ PrintCurrentData();
+ ++CurrentPosition;
+ }
+ CurrentData.push_back(al);
+ }
+ }
+
+ // if reference ID less than CurrentID - sorting error => ABORT
+ else if ( al.RefID < CurrentId ) {
+ cerr << "Pileup::Run() : Data not sorted correctly!" << endl;
+ return false;
+ }
+
+ // else moved forward onto next reference
+ else {
+
+ // print any remaining pileup data from previous reference
+ while ( !CurrentData.empty() ) {
+ PrintCurrentData();
+ ++CurrentPosition;
+ }
+
+ // store first entry on this new reference, update markers
+ CurrentData.clear();
+ CurrentData.push_back(al);
+ CurrentId = al.RefID;
+ CurrentPosition = al.Position;
+ }
+ }
+
+ // ------------------------------------
+ // handle any remaining data entries
+
+ while ( !CurrentData.empty() ) {
+ PrintCurrentData();
+ ++CurrentPosition;
+ }
+
+ // -------------------------
+ // return success
+
+ return true;
+}
+
+// ----------------------------------------------------------
+// Pileup implementation
+
+Pileup::Pileup(BamMultiReader* reader, ostream* outStream) {
+ d = new PileupPrivate(reader, outStream);
+}
+
+Pileup::~Pileup(void) {
+ delete d;
+ d = 0;
+}
+
+bool Pileup::Run(void) {
+ return d->Run();
+}
+
+void Pileup::SetFastaFilename(const string& filename) {
+ d->FastaFilename = filename;
+}
+
+void Pileup::SetIsPrintingMapQualities(bool ok) {
+ d->IsPrintingMapQualities = ok;
+}
+
+void Pileup::SetRegion(const BamRegion& region) {
+ d->Region = region;
+}