+// Print BamAlignment in YAML format
+void ConvertTool::ConvertToolPrivate::PrintYaml(const BamAlignment& a) {
+
+ // write alignment name
+ m_out << "---" << endl;
+ m_out << a.Name << ":" << endl;
+
+ // write alignment data
+ m_out << " " << "AlndBases: " << a.AlignedBases << endl;
+ m_out << " " << "Qualities: " << a.Qualities << endl;
+ m_out << " " << "Name: " << a.Name << endl;
+ m_out << " " << "Length: " << a.Length << endl;
+ m_out << " " << "TagData: " << a.TagData << endl;
+ m_out << " " << "RefID: " << a.RefID << endl;
+ m_out << " " << "RefName: " << m_references[a.RefID].RefName << endl;
+ m_out << " " << "Position: " << a.Position << endl;
+ m_out << " " << "Bin: " << a.Bin << endl;
+ m_out << " " << "MapQuality: " << a.MapQuality << endl;
+ m_out << " " << "AlignmentFlag: " << a.AlignmentFlag << endl;
+ m_out << " " << "MateRefID: " << a.MateRefID << endl;
+ m_out << " " << "MatePosition: " << a.MatePosition << endl;
+ m_out << " " << "InsertSize: " << a.InsertSize << endl;
+ m_out << " " << "Filename: " << a.Filename << endl;
+
+ // write Cigar data
+ const vector<CigarOp>& cigarData = a.CigarData;
+ if ( !cigarData.empty() ) {
+ m_out << " " << "Cigar: ";
+ vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
+ vector<CigarOp>::const_iterator cigarIter = cigarBegin;
+ vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
+ for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+ const CigarOp& op = (*cigarIter);
+ m_out << op.Length << op.Type;
+ }
+ m_out << endl;
+ }
+}
+
+bool ConvertTool::ConvertToolPrivate::RunPileupConversion(BamMultiReader* reader) {
+
+ // check for valid BamMultiReader
+ if ( reader == 0 ) return false;
+
+ // set up our pileup format 'visitor'
+ ConvertPileupFormatVisitor* v = new ConvertPileupFormatVisitor(m_references,
+ m_settings->FastaFilename,
+ m_settings->IsPrintingPileupMapQualities,
+ &m_out);
+
+ // set up PileupEngine
+ PileupEngine pileup;
+ pileup.AddVisitor(v);
+
+ // iterate through data
+ BamAlignment al;
+ while ( reader->GetNextAlignment(al) )
+ pileup.AddAlignment(al);
+ pileup.Flush();
+
+ // clean up
+ delete v;
+ v = 0;
+
+ // return success
+ return true;
+}
+
+// ---------------------------------------------
+// ConvertTool implementation
+
+ConvertTool::ConvertTool(void)
+ : AbstractTool()
+ , m_settings(new ConvertSettings)
+ , m_impl(0)
+{
+ // set program details
+ Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [-region <REGION>] [format-specific options]");
+
+ // set up options
+ OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
+ Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
+
+ OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
+ Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts);
+ Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
+
+ OptionGroup* SamOpts = Options::CreateOptionGroup("SAM Options");
+ Options::AddOption("-noheader", "omit the SAM header from output", m_settings->IsOmittingSamHeader, SamOpts);
+}
+
+ConvertTool::~ConvertTool(void) {
+
+ delete m_settings;
+ m_settings = 0;
+
+ delete m_impl;
+ m_impl = 0;
+}
+
+int ConvertTool::Help(void) {
+ Options::DisplayHelp();
+ return 0;
+}
+
+int ConvertTool::Run(int argc, char* argv[]) {
+
+ // parse command line arguments
+ Options::Parse(argc, argv, 1);
+
+ // initialize ConvertTool with settings
+ m_impl = new ConvertToolPrivate(m_settings);
+
+ // run ConvertTool, return success/fail
+ if ( m_impl->Run() )
+ return 0;
+ else
+ return 1;
+}
+
+// ---------------------------------------------
+// ConvertPileupFormatVisitor implementation
+
+ConvertPileupFormatVisitor::ConvertPileupFormatVisitor(const RefVector& references,
+ const string& fastaFilename,
+ const bool isPrintingMapQualities,
+ ostream* out)
+ : PileupVisitor()
+ , m_hasFasta(false)
+ , m_isPrintingMapQualities(isPrintingMapQualities)
+ , m_out(out)
+ , m_references(references)
+{
+ // set up Fasta reader if file is provided
+ if ( !fastaFilename.empty() ) {
+
+ // check for FASTA index
+ string indexFilename = "";
+ if ( Utilities::FileExists(fastaFilename + ".fai") )
+ indexFilename = fastaFilename + ".fai";
+
+ // open FASTA file
+ if ( m_fasta.Open(fastaFilename, indexFilename) )
+ m_hasFasta = true;
+ }
+}
+
+ConvertPileupFormatVisitor::~ConvertPileupFormatVisitor(void) {
+ // be sure to close Fasta reader
+ if ( m_hasFasta ) {
+ m_fasta.Close();
+ m_hasFasta = false;
+ }
+}
+
+void ConvertPileupFormatVisitor::Visit(const PileupPosition& pileupData ) {
+
+ // skip if no alignments at this position
+ if ( pileupData.PileupAlignments.empty() ) return;
+
+ // retrieve reference name
+ const string& referenceName = m_references[pileupData.RefId].RefName;
+ const int& position = pileupData.Position;
+
+ // retrieve reference base from FASTA file, if one provided; otherwise default to 'N'
+ char referenceBase('N');
+ if ( m_hasFasta && (pileupData.Position < m_references[pileupData.RefId].RefLength) ) {
+ if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position, referenceBase ) ) {
+ cerr << "bamtools convert ERROR: pileup conversion - could not read reference base from FASTA file" << endl;
+ return;
+ }
+ }
+
+ // get count of alleles at this position
+ const int numberAlleles = pileupData.PileupAlignments.size();
+
+ // -----------------------------------------------------------
+ // build strings based on alleles at this positionInAlignment
+
+ stringstream bases("");
+ stringstream baseQualities("");
+ stringstream mapQualities("");
+
+ // iterate over alignments at this pileup position
+ vector<PileupAlignment>::const_iterator pileupIter = pileupData.PileupAlignments.begin();
+ vector<PileupAlignment>::const_iterator pileupEnd = pileupData.PileupAlignments.end();
+ for ( ; pileupIter != pileupEnd; ++pileupIter ) {
+ const PileupAlignment pa = (*pileupIter);
+ const BamAlignment& ba = pa.Alignment;
+
+ // if beginning of read segment
+ if ( pa.IsSegmentBegin )
+ bases << '^' << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
+
+ // if current base is not a DELETION
+ if ( !pa.IsCurrentDeletion ) {
+
+ // get base at current position
+ char base = ba.QueryBases.at(pa.PositionInAlignment);
+
+ // if base matches reference
+ if ( base == '=' ||
+ toupper(base) == toupper(referenceBase) ||
+ tolower(base) == tolower(referenceBase) )
+ {
+ base = ( ba.IsReverseStrand() ? ',' : '.' );
+ }
+
+ // mismatches reference
+ else base = ( ba.IsReverseStrand() ? tolower(base) : toupper(base) );
+
+ // store base
+ bases << base;
+
+ // if next position contains insertion
+ if ( pa.IsNextInsertion ) {
+ bases << '+' << pa.InsertionLength;
+ for (int i = 1; i <= pa.InsertionLength; ++i) {
+ char insertedBase = (char)ba.QueryBases.at(pa.PositionInAlignment + i);
+ bases << (ba.IsReverseStrand() ? (char)tolower(insertedBase) : (char)toupper(insertedBase) );
+ }
+ }
+
+ // if next position contains DELETION
+ else if ( pa.IsNextDeletion ) {
+ bases << '-' << pa.DeletionLength;
+ for (int i = 1; i <= pa.DeletionLength; ++i) {
+ char deletedBase('N');
+ if ( m_hasFasta && (pileupData.Position+i < m_references[pileupData.RefId].RefLength) ) {
+ if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position+i, deletedBase ) ) {
+ cerr << "bamtools convert ERROR: pileup conversion - could not read reference base from FASTA file" << endl;
+ return;
+ }
+ }
+ bases << (ba.IsReverseStrand() ? (char)tolower(deletedBase) : (char)toupper(deletedBase) );
+ }
+ }
+ }
+
+ // otherwise, DELETION
+ else bases << '*';
+
+ // if end of read segment
+ if ( pa.IsSegmentEnd )
+ bases << '$';
+
+ // store current base quality
+ baseQualities << ba.Qualities.at(pa.PositionInAlignment);
+
+ // save alignment map quality if desired
+ if ( m_isPrintingMapQualities )
+ mapQualities << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
+ }
+
+ // ----------------------
+ // print results
+
+ // tab-delimited
+ // <refName> <1-based pos> <refBase> <numberAlleles> <bases> <qualities> [mapQuals]
+
+ const string TAB = "\t";
+ *m_out << referenceName << TAB
+ << position + 1 << TAB
+ << referenceBase << TAB
+ << numberAlleles << TAB
+ << bases.str() << TAB
+ << baseQualities.str() << TAB
+ << mapQualities.str() << endl;