1 // ***************************************************************************
2 // bamtools_convert.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 10 December 2012
6 // ---------------------------------------------------------------------------
7 // Converts between BAM and a number of other formats
8 // ***************************************************************************
10 #include "bamtools_convert.h"
12 #include <api/BamConstants.h>
13 #include <api/BamMultiReader.h>
14 #include <utils/bamtools_fasta.h>
15 #include <utils/bamtools_options.h>
16 #include <utils/bamtools_pileup_engine.h>
17 #include <utils/bamtools_utilities.h>
18 using namespace BamTools;
29 // ---------------------------------------------
30 // ConvertTool constants
32 // supported conversion format command-line names
33 static const string FORMAT_BED = "bed";
34 static const string FORMAT_FASTA = "fasta";
35 static const string FORMAT_FASTQ = "fastq";
36 static const string FORMAT_JSON = "json";
37 static const string FORMAT_SAM = "sam";
38 static const string FORMAT_PILEUP = "pileup";
39 static const string FORMAT_YAML = "yaml";
42 static const unsigned int FASTA_LINE_MAX = 50;
44 // ---------------------------------------------
45 // ConvertPileupFormatVisitor declaration
47 class ConvertPileupFormatVisitor : public PileupVisitor {
51 ConvertPileupFormatVisitor(const RefVector& references,
52 const string& fastaFilename,
53 const bool isPrintingMapQualities,
55 ~ConvertPileupFormatVisitor(void);
57 // PileupVisitor interface implementation
59 void Visit(const PileupPosition& pileupData);
65 bool m_isPrintingMapQualities;
67 RefVector m_references;
70 } // namespace BamTools
72 // ---------------------------------------------
73 // ConvertSettings implementation
75 struct ConvertTool::ConvertSettings {
79 bool HasInputFilelist;
85 bool HasFastaFilename;
86 bool IsOmittingSamHeader;
87 bool IsPrintingPileupMapQualities;
90 vector<string> InputFiles;
92 string OutputFilename;
100 ConvertSettings(void)
102 , HasInputFilelist(false)
106 , HasFastaFilename(false)
107 , IsOmittingSamHeader(false)
108 , IsPrintingPileupMapQualities(false)
109 , OutputFilename(Options::StandardOut())
114 // ---------------------------------------------
115 // ConvertToolPrivate implementation
117 struct ConvertTool::ConvertToolPrivate {
121 ConvertToolPrivate(ConvertTool::ConvertSettings* settings)
122 : m_settings(settings)
123 , m_out(cout.rdbuf())
126 ~ConvertToolPrivate(void) { }
134 void PrintBed(const BamAlignment& a);
135 void PrintFasta(const BamAlignment& a);
136 void PrintFastq(const BamAlignment& a);
137 void PrintJson(const BamAlignment& a);
138 void PrintSam(const BamAlignment& a);
139 void PrintYaml(const BamAlignment& a);
141 // special case - uses the PileupEngine
142 bool RunPileupConversion(BamMultiReader* reader);
146 ConvertTool::ConvertSettings* m_settings;
147 RefVector m_references;
151 bool ConvertTool::ConvertToolPrivate::Run(void) {
153 // ------------------------------------
154 // initialize conversion input/output
156 // set to default input if none provided
157 if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
158 m_settings->InputFiles.push_back(Options::StandardIn());
160 // add files in the filelist to the input file list
161 if ( m_settings->HasInputFilelist ) {
163 ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
164 if ( !filelist.is_open() ) {
165 cerr << "bamtools convert ERROR: could not open input BAM file list... Aborting." << endl;
170 while ( getline(filelist, line) )
171 m_settings->InputFiles.push_back(line);
175 BamMultiReader reader;
176 if ( !reader.Open(m_settings->InputFiles) ) {
177 cerr << "bamtools convert ERROR: could not open input BAM file(s)... Aborting." << endl;
181 // if input is not stdin & a region is provided, look for index files
182 if ( m_settings->HasInput && m_settings->HasRegion ) {
183 if ( !reader.LocateIndexes() ) {
184 cerr << "bamtools convert ERROR: could not locate index file(s)... Aborting." << endl;
189 // retrieve reference data
190 m_references = reader.GetReferenceData();
192 // set region if specified
194 if ( m_settings->HasRegion ) {
195 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
197 if ( reader.HasIndexes() ) {
198 if ( !reader.SetRegion(region) ) {
199 cerr << "bamtools convert ERROR: set region failed. Check that REGION describes a valid range" << endl;
206 cerr << "bamtools convert ERROR: could not parse REGION: " << m_settings->Region << endl;
207 cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
214 // if output file given
216 if ( m_settings->HasOutput ) {
218 // open output file stream
219 outFile.open(m_settings->OutputFilename.c_str());
221 cerr << "bamtools convert ERROR: could not open " << m_settings->OutputFilename
222 << " for output" << endl;
226 // set m_out to file's streambuf
227 m_out.rdbuf(outFile.rdbuf());
230 // -------------------------------------
231 // do conversion based on format
233 bool convertedOk = true;
235 // pileup is special case
236 // conversion not done per alignment, like the other formats
237 if ( m_settings->Format == FORMAT_PILEUP )
238 convertedOk = RunPileupConversion(&reader);
243 bool formatError = false;
245 // set function pointer to proper conversion method
246 void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0;
247 if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed;
248 else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta;
249 else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq;
250 else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson;
251 else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam;
252 else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml;
254 cerr << "bamtools convert ERROR: unrecognized format: " << m_settings->Format << endl;
255 cerr << "Please see documentation for list of supported formats " << endl;
260 // if format selected ok
261 if ( !formatError ) {
263 // if SAM format & not omitting header, print SAM header first
264 if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader )
265 m_out << reader.GetHeaderText();
267 // iterate through file, doing conversion
269 while ( reader.GetNextAlignment(a) )
270 (this->*pFunction)(a);
272 // set flag for successful conversion
277 // ------------------------
280 if ( m_settings->HasOutput )
285 // ----------------------------------------------------------
286 // Conversion/output methods
287 // ----------------------------------------------------------
289 void ConvertTool::ConvertToolPrivate::PrintBed(const BamAlignment& a) {
291 // tab-delimited, 0-based half-open
292 // (e.g. a 50-base read aligned to pos 10 could have BED coordinates (10, 60) instead of BAM coordinates (10, 59) )
293 // <chromName> <chromStart> <chromEnd> <readName> <score> <strand>
295 m_out << m_references.at(a.RefID).RefName << "\t"
296 << a.Position << "\t"
297 << a.GetEndPosition() << "\t"
299 << a.MapQuality << "\t"
300 << (a.IsReverseStrand() ? "-" : "+") << endl;
303 // print BamAlignment in FASTA format
304 // N.B. - uses QueryBases NOT AlignedBases
305 void ConvertTool::ConvertToolPrivate::PrintFasta(const BamAlignment& a) {
307 // >BamAlignment.Name
308 // BamAlignment.QueryBases (up to FASTA_LINE_MAX bases per line)
311 // N.B. - QueryBases are reverse-complemented if aligned to reverse strand
314 m_out << ">" << a.Name << endl;
316 // handle reverse strand alignment - bases
317 string sequence = a.QueryBases;
318 if ( a.IsReverseStrand() )
319 Utilities::ReverseComplement(sequence);
321 // if sequence fits on single line
322 if ( sequence.length() <= FASTA_LINE_MAX )
323 m_out << sequence << endl;
325 // else split over multiple lines
329 size_t seqLength = sequence.length(); // handle reverse strand alignment - bases & qualitiesth();
331 // write subsequences to each line
332 while ( position < (seqLength - FASTA_LINE_MAX) ) {
333 m_out << sequence.substr(position, FASTA_LINE_MAX) << endl;
334 position += FASTA_LINE_MAX;
337 // write final subsequence
338 m_out << sequence.substr(position) << endl;
342 // print BamAlignment in FASTQ format
343 // N.B. - uses QueryBases NOT AlignedBases
344 void ConvertTool::ConvertToolPrivate::PrintFastq(const BamAlignment& a) {
346 // @BamAlignment.Name
347 // BamAlignment.QueryBases
349 // BamAlignment.Qualities
351 // N.B. - QueryBases are reverse-complemented (& Qualities reversed) if aligned to reverse strand .
352 // Name is appended "/1" or "/2" if paired-end, to reflect which mate this entry is.
354 // handle paired-end alignments
355 string name = a.Name;
357 name.append( (a.IsFirstMate() ? "/1" : "/2") );
359 // handle reverse strand alignment - bases & qualities
360 string qualities = a.Qualities;
361 string sequence = a.QueryBases;
362 if ( a.IsReverseStrand() ) {
363 Utilities::Reverse(qualities);
364 Utilities::ReverseComplement(sequence);
367 // write to output stream
368 m_out << "@" << name << endl
371 << qualities << endl;
374 // print BamAlignment in JSON format
375 void ConvertTool::ConvertToolPrivate::PrintJson(const BamAlignment& a) {
377 // write name & alignment flag
378 m_out << "{\"name\":\"" << a.Name << "\",\"alignmentFlag\":\"" << a.AlignmentFlag << "\",";
380 // write reference name
381 if ( (a.RefID >= 0) && (a.RefID < (int)m_references.size()) )
382 m_out << "\"reference\":\"" << m_references[a.RefID].RefName << "\",";
384 // write position & map quality
385 m_out << "\"position\":" << a.Position+1 << ",\"mapQuality\":" << a.MapQuality << ",";
388 const vector<CigarOp>& cigarData = a.CigarData;
389 if ( !cigarData.empty() ) {
390 m_out << "\"cigar\":[";
391 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
392 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
393 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
394 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
395 const CigarOp& op = (*cigarIter);
396 if (cigarIter != cigarBegin)
398 m_out << "\"" << op.Length << op.Type << "\"";
403 // write mate reference name, mate position, & insert size
404 if ( a.IsPaired() && (a.MateRefID >= 0) && (a.MateRefID < (int)m_references.size()) ) {
405 m_out << "\"mate\":{"
406 << "\"reference\":\"" << m_references[a.MateRefID].RefName << "\","
407 << "\"position\":" << a.MatePosition+1
408 << ",\"insertSize\":" << a.InsertSize << "},";
412 if ( !a.QueryBases.empty() )
413 m_out << "\"queryBases\":\"" << a.QueryBases << "\",";
416 if ( !a.Qualities.empty() && a.Qualities.at(0) != (char)0xFF ) {
417 string::const_iterator s = a.Qualities.begin();
418 m_out << "\"qualities\":[" << static_cast<short>(*s) - 33;
420 for ( ; s != a.Qualities.end(); ++s )
421 m_out << "," << static_cast<short>(*s) - 33;
425 // write alignment's source BAM file
426 m_out << "\"filename\":\"" << a.Filename << "\",";
429 const char* tagData = a.TagData.c_str();
430 const size_t tagDataLength = a.TagData.length();
432 if ( index < tagDataLength ) {
434 m_out << "\"tags\":{";
436 while ( index < tagDataLength ) {
442 m_out << "\"" << a.TagData.substr(index, 2) << "\":";
446 char type = a.TagData.at(index);
449 case (Constants::BAM_TAG_TYPE_ASCII) :
450 m_out << "\"" << tagData[index] << "\"";
454 case (Constants::BAM_TAG_TYPE_INT8) :
455 // force value into integer-type (instead of char value)
456 m_out << static_cast<int16_t>(tagData[index]);
460 case (Constants::BAM_TAG_TYPE_UINT8) :
461 // force value into integer-type (instead of char value)
462 m_out << static_cast<uint16_t>(tagData[index]);
466 case (Constants::BAM_TAG_TYPE_INT16) :
467 m_out << BamTools::UnpackSignedShort(&tagData[index]);
468 index += sizeof(int16_t);
471 case (Constants::BAM_TAG_TYPE_UINT16) :
472 m_out << BamTools::UnpackUnsignedShort(&tagData[index]);
473 index += sizeof(uint16_t);
476 case (Constants::BAM_TAG_TYPE_INT32) :
477 m_out << BamTools::UnpackSignedInt(&tagData[index]);
478 index += sizeof(int32_t);
481 case (Constants::BAM_TAG_TYPE_UINT32) :
482 m_out << BamTools::UnpackUnsignedInt(&tagData[index]);
483 index += sizeof(uint32_t);
486 case (Constants::BAM_TAG_TYPE_FLOAT) :
487 m_out << BamTools::UnpackFloat(&tagData[index]);
488 index += sizeof(float);
491 case (Constants::BAM_TAG_TYPE_HEX) :
492 case (Constants::BAM_TAG_TYPE_STRING) :
494 while (tagData[index]) {
495 if (tagData[index] == '\"')
496 m_out << "\\\""; // escape for json
498 m_out << tagData[index];
506 if ( tagData[index] == '\0')
513 m_out << "}" << endl;
516 // print BamAlignment in SAM format
517 void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) {
520 // <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
522 // write name & alignment flag
523 m_out << a.Name << "\t" << a.AlignmentFlag << "\t";
525 // write reference name
526 if ( (a.RefID >= 0) && (a.RefID < (int)m_references.size()) )
527 m_out << m_references[a.RefID].RefName << "\t";
531 // write position & map quality
532 m_out << a.Position+1 << "\t" << a.MapQuality << "\t";
535 const vector<CigarOp>& cigarData = a.CigarData;
536 if ( cigarData.empty() ) m_out << "*\t";
538 vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
539 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
540 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
541 const CigarOp& op = (*cigarIter);
542 m_out << op.Length << op.Type;
547 // write mate reference name, mate position, & insert size
548 if ( a.IsPaired() && (a.MateRefID >= 0) && (a.MateRefID < (int)m_references.size()) ) {
549 if ( a.MateRefID == a.RefID )
552 m_out << m_references[a.MateRefID].RefName << "\t";
553 m_out << a.MatePosition+1 << "\t" << a.InsertSize << "\t";
556 m_out << "*\t0\t0\t";
559 if ( a.QueryBases.empty() )
562 m_out << a.QueryBases << "\t";
565 if ( a.Qualities.empty() || (a.Qualities.at(0) == (char)0xFF) )
568 m_out << a.Qualities;
571 const char* tagData = a.TagData.c_str();
572 const size_t tagDataLength = a.TagData.length();
575 while ( index < tagDataLength ) {
578 string tagName = a.TagData.substr(index, 2);
579 m_out << "\t" << tagName << ":";
583 char type = a.TagData.at(index);
586 case (Constants::BAM_TAG_TYPE_ASCII) :
587 m_out << "A:" << tagData[index];
591 case (Constants::BAM_TAG_TYPE_INT8) :
592 // force value into integer-type (instead of char value)
593 m_out << "i:" << static_cast<int16_t>(tagData[index]);
597 case (Constants::BAM_TAG_TYPE_UINT8) :
598 // force value into integer-type (instead of char value)
599 m_out << "i:" << static_cast<uint16_t>(tagData[index]);
603 case (Constants::BAM_TAG_TYPE_INT16) :
604 m_out << "i:" << BamTools::UnpackSignedShort(&tagData[index]);
605 index += sizeof(int16_t);
608 case (Constants::BAM_TAG_TYPE_UINT16) :
609 m_out << "i:" << BamTools::UnpackUnsignedShort(&tagData[index]);
610 index += sizeof(uint16_t);
613 case (Constants::BAM_TAG_TYPE_INT32) :
614 m_out << "i:" << BamTools::UnpackSignedInt(&tagData[index]);
615 index += sizeof(int32_t);
618 case (Constants::BAM_TAG_TYPE_UINT32) :
619 m_out << "i:" << BamTools::UnpackUnsignedInt(&tagData[index]);
620 index += sizeof(uint32_t);
623 case (Constants::BAM_TAG_TYPE_FLOAT) :
624 m_out << "f:" << BamTools::UnpackFloat(&tagData[index]);
625 index += sizeof(float);
628 case (Constants::BAM_TAG_TYPE_HEX) : // fall-through
629 case (Constants::BAM_TAG_TYPE_STRING) :
630 m_out << type << ":";
631 while (tagData[index]) {
632 m_out << tagData[index];
639 if ( tagData[index] == '\0' )
646 // Print BamAlignment in YAML format
647 void ConvertTool::ConvertToolPrivate::PrintYaml(const BamAlignment& a) {
649 // write alignment name
650 m_out << "---" << endl;
651 m_out << a.Name << ":" << endl;
653 // write alignment data
654 m_out << " " << "AlndBases: " << a.AlignedBases << endl;
655 m_out << " " << "Qualities: " << a.Qualities << endl;
656 m_out << " " << "Name: " << a.Name << endl;
657 m_out << " " << "Length: " << a.Length << endl;
658 m_out << " " << "TagData: " << a.TagData << endl;
659 m_out << " " << "RefID: " << a.RefID << endl;
660 m_out << " " << "RefName: " << m_references[a.RefID].RefName << endl;
661 m_out << " " << "Position: " << a.Position << endl;
662 m_out << " " << "Bin: " << a.Bin << endl;
663 m_out << " " << "MapQuality: " << a.MapQuality << endl;
664 m_out << " " << "AlignmentFlag: " << a.AlignmentFlag << endl;
665 m_out << " " << "MateRefID: " << a.MateRefID << endl;
666 m_out << " " << "MatePosition: " << a.MatePosition << endl;
667 m_out << " " << "InsertSize: " << a.InsertSize << endl;
668 m_out << " " << "Filename: " << a.Filename << endl;
671 const vector<CigarOp>& cigarData = a.CigarData;
672 if ( !cigarData.empty() ) {
673 m_out << " " << "Cigar: ";
674 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
675 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
676 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
677 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
678 const CigarOp& op = (*cigarIter);
679 m_out << op.Length << op.Type;
685 bool ConvertTool::ConvertToolPrivate::RunPileupConversion(BamMultiReader* reader) {
687 // check for valid BamMultiReader
688 if ( reader == 0 ) return false;
690 // set up our pileup format 'visitor'
691 ConvertPileupFormatVisitor* v = new ConvertPileupFormatVisitor(m_references,
692 m_settings->FastaFilename,
693 m_settings->IsPrintingPileupMapQualities,
696 // set up PileupEngine
698 pileup.AddVisitor(v);
700 // iterate through data
702 while ( reader->GetNextAlignment(al) )
703 pileup.AddAlignment(al);
714 // ---------------------------------------------
715 // ConvertTool implementation
717 ConvertTool::ConvertTool(void)
719 , m_settings(new ConvertSettings)
722 // set program details
723 Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats",
724 "-format <FORMAT> [-in <filename> -in <filename> ... | -list <filelist>] [-out <filename>] [-region <REGION>] [format-specific options]");
727 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
728 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
729 Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
730 Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
731 Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
732 Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
734 OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
735 Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts);
736 Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
738 OptionGroup* SamOpts = Options::CreateOptionGroup("SAM Options");
739 Options::AddOption("-noheader", "omit the SAM header from output", m_settings->IsOmittingSamHeader, SamOpts);
742 ConvertTool::~ConvertTool(void) {
751 int ConvertTool::Help(void) {
752 Options::DisplayHelp();
756 int ConvertTool::Run(int argc, char* argv[]) {
758 // parse command line arguments
759 Options::Parse(argc, argv, 1);
761 // initialize ConvertTool with settings
762 m_impl = new ConvertToolPrivate(m_settings);
764 // run ConvertTool, return success/fail
771 // ---------------------------------------------
772 // ConvertPileupFormatVisitor implementation
774 ConvertPileupFormatVisitor::ConvertPileupFormatVisitor(const RefVector& references,
775 const string& fastaFilename,
776 const bool isPrintingMapQualities,
780 , m_isPrintingMapQualities(isPrintingMapQualities)
782 , m_references(references)
784 // set up Fasta reader if file is provided
785 if ( !fastaFilename.empty() ) {
787 // check for FASTA index
788 string indexFilename = "";
789 if ( Utilities::FileExists(fastaFilename + ".fai") )
790 indexFilename = fastaFilename + ".fai";
793 if ( m_fasta.Open(fastaFilename, indexFilename) )
798 ConvertPileupFormatVisitor::~ConvertPileupFormatVisitor(void) {
799 // be sure to close Fasta reader
806 void ConvertPileupFormatVisitor::Visit(const PileupPosition& pileupData ) {
808 // skip if no alignments at this position
809 if ( pileupData.PileupAlignments.empty() ) return;
811 // retrieve reference name
812 const string& referenceName = m_references[pileupData.RefId].RefName;
813 const int& position = pileupData.Position;
815 // retrieve reference base from FASTA file, if one provided; otherwise default to 'N'
816 char referenceBase('N');
817 if ( m_hasFasta && (pileupData.Position < m_references[pileupData.RefId].RefLength) ) {
818 if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position, referenceBase ) ) {
819 cerr << "bamtools convert ERROR: pileup conversion - could not read reference base from FASTA file" << endl;
824 // get count of alleles at this position
825 const int numberAlleles = pileupData.PileupAlignments.size();
827 // -----------------------------------------------------------
828 // build strings based on alleles at this positionInAlignment
830 stringstream bases("");
831 stringstream baseQualities("");
832 stringstream mapQualities("");
834 // iterate over alignments at this pileup position
835 vector<PileupAlignment>::const_iterator pileupIter = pileupData.PileupAlignments.begin();
836 vector<PileupAlignment>::const_iterator pileupEnd = pileupData.PileupAlignments.end();
837 for ( ; pileupIter != pileupEnd; ++pileupIter ) {
838 const PileupAlignment pa = (*pileupIter);
839 const BamAlignment& ba = pa.Alignment;
841 // if beginning of read segment
842 if ( pa.IsSegmentBegin )
843 bases << '^' << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
845 // if current base is not a DELETION
846 if ( !pa.IsCurrentDeletion ) {
848 // get base at current position
849 char base = ba.QueryBases.at(pa.PositionInAlignment);
851 // if base matches reference
853 toupper(base) == toupper(referenceBase) ||
854 tolower(base) == tolower(referenceBase) )
856 base = ( ba.IsReverseStrand() ? ',' : '.' );
859 // mismatches reference
860 else base = ( ba.IsReverseStrand() ? tolower(base) : toupper(base) );
865 // if next position contains insertion
866 if ( pa.IsNextInsertion ) {
867 bases << '+' << pa.InsertionLength;
868 for (int i = 1; i <= pa.InsertionLength; ++i) {
869 char insertedBase = (char)ba.QueryBases.at(pa.PositionInAlignment + i);
870 bases << (ba.IsReverseStrand() ? (char)tolower(insertedBase) : (char)toupper(insertedBase) );
874 // if next position contains DELETION
875 else if ( pa.IsNextDeletion ) {
876 bases << '-' << pa.DeletionLength;
877 for (int i = 1; i <= pa.DeletionLength; ++i) {
878 char deletedBase('N');
879 if ( m_hasFasta && (pileupData.Position+i < m_references[pileupData.RefId].RefLength) ) {
880 if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position+i, deletedBase ) ) {
881 cerr << "bamtools convert ERROR: pileup conversion - could not read reference base from FASTA file" << endl;
885 bases << (ba.IsReverseStrand() ? (char)tolower(deletedBase) : (char)toupper(deletedBase) );
890 // otherwise, DELETION
893 // if end of read segment
894 if ( pa.IsSegmentEnd )
897 // store current base quality
898 baseQualities << ba.Qualities.at(pa.PositionInAlignment);
900 // save alignment map quality if desired
901 if ( m_isPrintingMapQualities )
902 mapQualities << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
905 // ----------------------
909 // <refName> <1-based pos> <refBase> <numberAlleles> <bases> <qualities> [mapQuals]
911 const string TAB = "\t";
912 *m_out << referenceName << TAB
913 << position + 1 << TAB
914 << referenceBase << TAB
915 << numberAlleles << TAB
916 << bases.str() << TAB
917 << baseQualities.str() << TAB
918 << mapQualities.str() << endl;