1 // ***************************************************************************
2 // bamtools_convert.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 21 March 2011
7 // ---------------------------------------------------------------------------
8 // Converts between BAM and a number of other formats
9 // ***************************************************************************
11 #include "bamtools_convert.h"
13 #include <api/BamConstants.h>
14 #include <api/BamMultiReader.h>
15 #include <utils/bamtools_fasta.h>
16 #include <utils/bamtools_options.h>
17 #include <utils/bamtools_pileup_engine.h>
18 #include <utils/bamtools_utilities.h>
19 using namespace BamTools;
30 // ---------------------------------------------
31 // ConvertTool constants
33 // supported conversion format command-line names
34 static const string FORMAT_BED = "bed";
35 static const string FORMAT_FASTA = "fasta";
36 static const string FORMAT_FASTQ = "fastq";
37 static const string FORMAT_JSON = "json";
38 static const string FORMAT_SAM = "sam";
39 static const string FORMAT_PILEUP = "pileup";
40 static const string FORMAT_YAML = "yaml";
43 static const unsigned int FASTA_LINE_MAX = 50;
45 // ---------------------------------------------
46 // ConvertPileupFormatVisitor declaration
48 class ConvertPileupFormatVisitor : public PileupVisitor {
52 ConvertPileupFormatVisitor(const RefVector& references,
53 const string& fastaFilename,
54 const bool isPrintingMapQualities,
56 ~ConvertPileupFormatVisitor(void);
58 // PileupVisitor interface implementation
60 void Visit(const PileupPosition& pileupData);
66 bool m_isPrintingMapQualities;
68 RefVector m_references;
71 } // namespace BamTools
73 // ---------------------------------------------
74 // ConvertSettings implementation
76 struct ConvertTool::ConvertSettings {
85 bool HasFastaFilename;
86 bool IsOmittingSamHeader;
87 bool IsPrintingPileupMapQualities;
90 vector<string> InputFiles;
91 string OutputFilename;
104 , HasFastaFilename(false)
105 , IsOmittingSamHeader(false)
106 , IsPrintingPileupMapQualities(false)
107 , OutputFilename(Options::StandardOut())
112 // ---------------------------------------------
113 // ConvertToolPrivate implementation
115 struct ConvertTool::ConvertToolPrivate {
119 ConvertToolPrivate(ConvertTool::ConvertSettings* settings);
120 ~ConvertToolPrivate(void);
128 void PrintBed(const BamAlignment& a);
129 void PrintFasta(const BamAlignment& a);
130 void PrintFastq(const BamAlignment& a);
131 void PrintJson(const BamAlignment& a);
132 void PrintSam(const BamAlignment& a);
133 void PrintYaml(const BamAlignment& a);
135 // special case - uses the PileupEngine
136 bool RunPileupConversion(BamMultiReader* reader);
140 ConvertTool::ConvertSettings* m_settings;
141 RefVector m_references;
145 ConvertTool::ConvertToolPrivate::ConvertToolPrivate(ConvertTool::ConvertSettings* settings)
146 : m_settings(settings)
147 , m_out(cout.rdbuf()) // default output to cout
150 ConvertTool::ConvertToolPrivate::~ConvertToolPrivate(void) { }
152 bool ConvertTool::ConvertToolPrivate::Run(void) {
154 // ------------------------------------
155 // initialize conversion input/output
157 // set to default input if none provided
158 if ( !m_settings->HasInput )
159 m_settings->InputFiles.push_back(Options::StandardIn());
162 BamMultiReader reader;
163 if ( !reader.Open(m_settings->InputFiles) ) {
164 cerr << "bamtools convert ERROR: could not open input BAM file(s)... Aborting." << endl;
168 // if input is not stdin & a region is provided, look for index files
169 if ( m_settings->HasInput && m_settings->HasRegion ) {
170 if ( !reader.LocateIndexes() ) {
171 cerr << "bamtools convert ERROR: could not locate index file(s)... Aborting." << endl;
176 // retrieve reference data
177 m_references = reader.GetReferenceData();
179 // set region if specified
181 if ( m_settings->HasRegion ) {
182 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
184 if ( reader.HasIndexes() ) {
185 if ( !reader.SetRegion(region) ) {
186 cerr << "bamtools convert ERROR: set region failed. Check that REGION describes a valid range" << endl;
193 cerr << "bamtools convert ERROR: could not parse REGION: " << m_settings->Region << endl;
194 cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
201 // if output file given
203 if ( m_settings->HasOutput ) {
205 // open output file stream
206 outFile.open(m_settings->OutputFilename.c_str());
208 cerr << "bamtools convert ERROR: could not open " << m_settings->OutputFilename
209 << " for output" << endl;
213 // set m_out to file's streambuf
214 m_out.rdbuf(outFile.rdbuf());
217 // -------------------------------------
218 // do conversion based on format
220 bool convertedOk = true;
222 // pileup is special case
223 // conversion not done per alignment, like the other formats
224 if ( m_settings->Format == FORMAT_PILEUP )
225 convertedOk = RunPileupConversion(&reader);
230 bool formatError = false;
232 // set function pointer to proper conversion method
233 void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0;
234 if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed;
235 else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta;
236 else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq;
237 else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson;
238 else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam;
239 else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml;
241 cerr << "bamtools convert ERROR: unrecognized format: " << m_settings->Format << endl;
242 cerr << "Please see documentation for list of supported formats " << endl;
247 // if format selected ok
248 if ( !formatError ) {
250 // if SAM format & not omitting header, print SAM header first
251 if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader )
252 m_out << reader.GetHeaderText();
254 // iterate through file, doing conversion
256 while ( reader.GetNextAlignment(a) )
257 (this->*pFunction)(a);
259 // set flag for successful conversion
264 // ------------------------
267 if ( m_settings->HasOutput )
272 // ----------------------------------------------------------
273 // Conversion/output methods
274 // ----------------------------------------------------------
276 void ConvertTool::ConvertToolPrivate::PrintBed(const BamAlignment& a) {
278 // tab-delimited, 0-based half-open
279 // (e.g. a 50-base read aligned to pos 10 could have BED coordinates (10, 60) instead of BAM coordinates (10, 59) )
280 // <chromName> <chromStart> <chromEnd> <readName> <score> <strand>
282 m_out << m_references.at(a.RefID).RefName << "\t"
283 << a.Position << "\t"
284 << a.GetEndPosition() + 1 << "\t"
286 << a.MapQuality << "\t"
287 << (a.IsReverseStrand() ? "-" : "+") << endl;
290 // print BamAlignment in FASTA format
291 // N.B. - uses QueryBases NOT AlignedBases
292 void ConvertTool::ConvertToolPrivate::PrintFasta(const BamAlignment& a) {
294 // >BamAlignment.Name
295 // BamAlignment.QueryBases (up to FASTA_LINE_MAX bases per line)
298 // N.B. - QueryBases are reverse-complemented if aligned to reverse strand
301 m_out << "> " << a.Name << endl;
303 // handle reverse strand alignment - bases
304 string sequence = a.QueryBases;
305 if ( a.IsReverseStrand() )
306 Utilities::ReverseComplement(sequence);
308 // if sequence fits on single line
309 if ( sequence.length() <= FASTA_LINE_MAX )
310 m_out << sequence << endl;
312 // else split over multiple lines
316 size_t seqLength = sequence.length(); // handle reverse strand alignment - bases & qualitiesth();
318 // write subsequences to each line
319 while ( position < (seqLength - FASTA_LINE_MAX) ) {
320 m_out << sequence.substr(position, FASTA_LINE_MAX) << endl;
321 position += FASTA_LINE_MAX;
324 // write final subsequence
325 m_out << sequence.substr(position) << endl;
329 // print BamAlignment in FASTQ format
330 // N.B. - uses QueryBases NOT AlignedBases
331 void ConvertTool::ConvertToolPrivate::PrintFastq(const BamAlignment& a) {
333 // @BamAlignment.Name
334 // BamAlignment.QueryBases
336 // BamAlignment.Qualities
338 // N.B. - QueryBases are reverse-complemented (& Qualities reversed) if aligned to reverse strand .
339 // Name is appended "/1" or "/2" if paired-end, to reflect which mate this entry is.
341 // handle paired-end alignments
342 string name = a.Name;
344 name.append( (a.IsFirstMate() ? "/1" : "/2") );
346 // handle reverse strand alignment - bases & qualities
347 string qualities = a.Qualities;
348 string sequence = a.QueryBases;
349 if ( a.IsReverseStrand() ) {
350 Utilities::Reverse(qualities);
351 Utilities::ReverseComplement(sequence);
354 // write to output stream
355 m_out << "@" << name << endl
358 << qualities << endl;
361 // print BamAlignment in JSON format
362 void ConvertTool::ConvertToolPrivate::PrintJson(const BamAlignment& a) {
364 // write name & alignment flag
365 m_out << "{\"name\":\"" << a.Name << "\",\"alignmentFlag\":\"" << a.AlignmentFlag << "\",";
367 // write reference name
368 if ( (a.RefID >= 0) && (a.RefID < (int)m_references.size()) )
369 m_out << "\"reference\":\"" << m_references[a.RefID].RefName << "\",";
371 // write position & map quality
372 m_out << "\"position\":" << a.Position+1 << ",\"mapQuality\":" << a.MapQuality << ",";
375 const vector<CigarOp>& cigarData = a.CigarData;
376 if ( !cigarData.empty() ) {
377 m_out << "\"cigar\":[";
378 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
379 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
380 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
381 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
382 const CigarOp& op = (*cigarIter);
383 if (cigarIter != cigarBegin)
385 m_out << "\"" << op.Length << op.Type << "\"";
390 // write mate reference name, mate position, & insert size
391 if ( a.IsPaired() && (a.MateRefID >= 0) && (a.MateRefID < (int)m_references.size()) ) {
392 m_out << "\"mate\":{"
393 << "\"reference\":\"" << m_references[a.MateRefID].RefName << "\","
394 << "\"position\":" << a.MatePosition+1
395 << ",\"insertSize\":" << a.InsertSize << "},";
399 if ( !a.QueryBases.empty() )
400 m_out << "\"queryBases\":\"" << a.QueryBases << "\",";
403 if ( !a.Qualities.empty() ) {
404 string::const_iterator s = a.Qualities.begin();
405 m_out << "\"qualities\":[" << static_cast<short>(*s) - 33;
407 for ( ; s != a.Qualities.end(); ++s )
408 m_out << "," << static_cast<short>(*s) - 33;
412 // write alignment's source BAM file
413 m_out << "\"filename\":" << a.Filename << ",";
416 const char* tagData = a.TagData.c_str();
417 const size_t tagDataLength = a.TagData.length();
419 if ( index < tagDataLength ) {
421 m_out << "\"tags\":{";
423 while ( index < tagDataLength ) {
429 m_out << "\"" << a.TagData.substr(index, 2) << "\":";
433 char type = a.TagData.at(index);
436 case (Constants::BAM_TAG_TYPE_ASCII) :
437 m_out << "\"" << tagData[index] << "\"";
441 case (Constants::BAM_TAG_TYPE_INT8) :
442 case (Constants::BAM_TAG_TYPE_UINT8) :
443 m_out << (int)tagData[index];
447 case (Constants::BAM_TAG_TYPE_INT16) :
448 m_out << BamTools::UnpackSignedShort(&tagData[index]);
449 index += sizeof(int16_t);
452 case (Constants::BAM_TAG_TYPE_UINT16) :
453 m_out << BamTools::UnpackUnsignedShort(&tagData[index]);
454 index += sizeof(uint16_t);
457 case (Constants::BAM_TAG_TYPE_INT32) :
458 m_out << BamTools::UnpackSignedInt(&tagData[index]);
459 index += sizeof(int32_t);
462 case (Constants::BAM_TAG_TYPE_UINT32) :
463 m_out << BamTools::UnpackUnsignedInt(&tagData[index]);
464 index += sizeof(uint32_t);
467 case (Constants::BAM_TAG_TYPE_FLOAT) :
468 m_out << BamTools::UnpackFloat(&tagData[index]);
469 index += sizeof(float);
472 case (Constants::BAM_TAG_TYPE_HEX) :
473 case (Constants::BAM_TAG_TYPE_STRING) :
475 while (tagData[index]) {
476 if (tagData[index] == '\"')
477 m_out << "\\\""; // escape for json
479 m_out << tagData[index];
487 if ( tagData[index] == '\0')
494 m_out << "}" << endl;
497 // print BamAlignment in SAM format
498 void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) {
501 // <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
503 // write name & alignment flag
504 m_out << a.Name << "\t" << a.AlignmentFlag << "\t";
506 // write reference name
507 if ( (a.RefID >= 0) && (a.RefID < (int)m_references.size()) )
508 m_out << m_references[a.RefID].RefName << "\t";
512 // write position & map quality
513 m_out << a.Position+1 << "\t" << a.MapQuality << "\t";
516 const vector<CigarOp>& cigarData = a.CigarData;
517 if ( cigarData.empty() ) m_out << "*\t";
519 vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
520 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
521 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
522 const CigarOp& op = (*cigarIter);
523 m_out << op.Length << op.Type;
528 // write mate reference name, mate position, & insert size
529 if ( a.IsPaired() && (a.MateRefID >= 0) && (a.MateRefID < (int)m_references.size()) ) {
530 if ( a.MateRefID == a.RefID )
533 m_out << m_references[a.MateRefID].RefName << "\t";
534 m_out << a.MatePosition+1 << "\t" << a.InsertSize << "\t";
537 m_out << "*\t0\t0\t";
540 if ( a.QueryBases.empty() )
543 m_out << a.QueryBases << "\t";
546 if ( a.Qualities.empty() )
549 m_out << a.Qualities;
552 const char* tagData = a.TagData.c_str();
553 const size_t tagDataLength = a.TagData.length();
556 while ( index < tagDataLength ) {
559 string tagName = a.TagData.substr(index, 2);
560 m_out << "\t" << tagName << ":";
564 char type = a.TagData.at(index);
567 case (Constants::BAM_TAG_TYPE_ASCII) :
568 m_out << "A:" << tagData[index];
572 case (Constants::BAM_TAG_TYPE_INT8) :
573 case (Constants::BAM_TAG_TYPE_UINT8) :
574 m_out << "i:" << (int)tagData[index];
578 case (Constants::BAM_TAG_TYPE_INT16) :
579 m_out << "i:" << BamTools::UnpackSignedShort(&tagData[index]);
580 index += sizeof(int16_t);
583 case (Constants::BAM_TAG_TYPE_UINT16) :
584 m_out << "i:" << BamTools::UnpackUnsignedShort(&tagData[index]);
585 index += sizeof(uint16_t);
588 case (Constants::BAM_TAG_TYPE_INT32) :
589 m_out << "i:" << BamTools::UnpackSignedInt(&tagData[index]);
590 index += sizeof(int32_t);
593 case (Constants::BAM_TAG_TYPE_UINT32) :
594 m_out << "i:" << BamTools::UnpackUnsignedInt(&tagData[index]);
595 index += sizeof(uint32_t);
598 case (Constants::BAM_TAG_TYPE_FLOAT) :
599 m_out << "f:" << BamTools::UnpackFloat(&tagData[index]);
600 index += sizeof(float);
603 case (Constants::BAM_TAG_TYPE_HEX) :
604 case (Constants::BAM_TAG_TYPE_STRING) :
605 m_out << type << ":";
606 while (tagData[index]) {
607 m_out << tagData[index];
614 if ( tagData[index] == '\0')
621 // Print BamAlignment in YAML format
622 void ConvertTool::ConvertToolPrivate::PrintYaml(const BamAlignment& a) {
624 // write alignment name
625 m_out << "---" << endl;
626 m_out << a.Name << ":" << endl;
628 // write alignment data
629 m_out << " " << "AlndBases: " << a.AlignedBases << endl;
630 m_out << " " << "Qualities: " << a.Qualities << endl;
631 m_out << " " << "Name: " << a.Name << endl;
632 m_out << " " << "Length: " << a.Length << endl;
633 m_out << " " << "TagData: " << a.TagData << endl;
634 m_out << " " << "RefID: " << a.RefID << endl;
635 m_out << " " << "RefName: " << m_references[a.RefID].RefName << endl;
636 m_out << " " << "Position: " << a.Position << endl;
637 m_out << " " << "Bin: " << a.Bin << endl;
638 m_out << " " << "MapQuality: " << a.MapQuality << endl;
639 m_out << " " << "AlignmentFlag: " << a.AlignmentFlag << endl;
640 m_out << " " << "MateRefID: " << a.MateRefID << endl;
641 m_out << " " << "MatePosition: " << a.MatePosition << endl;
642 m_out << " " << "InsertSize: " << a.InsertSize << endl;
643 m_out << " " << "Filename: " << a.Filename << endl;
646 const vector<CigarOp>& cigarData = a.CigarData;
647 if ( !cigarData.empty() ) {
648 m_out << " " << "Cigar: ";
649 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
650 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
651 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
652 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
653 const CigarOp& op = (*cigarIter);
654 m_out << op.Length << op.Type;
660 bool ConvertTool::ConvertToolPrivate::RunPileupConversion(BamMultiReader* reader) {
662 // check for valid BamMultiReader
663 if ( reader == 0 ) return false;
665 // set up our pileup format 'visitor'
666 ConvertPileupFormatVisitor* v = new ConvertPileupFormatVisitor(m_references,
667 m_settings->FastaFilename,
668 m_settings->IsPrintingPileupMapQualities,
671 // set up PileupEngine
673 pileup.AddVisitor(v);
675 // iterate through data
677 while ( reader->GetNextAlignment(al) )
678 pileup.AddAlignment(al);
689 // ---------------------------------------------
690 // ConvertTool implementation
692 ConvertTool::ConvertTool(void)
694 , m_settings(new ConvertSettings)
697 // set program details
698 Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [-region <REGION>] [format-specific options]");
701 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
702 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
703 Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
704 Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
705 Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
707 OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
708 Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts);
709 Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
711 OptionGroup* SamOpts = Options::CreateOptionGroup("SAM Options");
712 Options::AddOption("-noheader", "omit the SAM header from output", m_settings->IsOmittingSamHeader, SamOpts);
715 ConvertTool::~ConvertTool(void) {
723 int ConvertTool::Help(void) {
724 Options::DisplayHelp();
728 int ConvertTool::Run(int argc, char* argv[]) {
730 // parse command line arguments
731 Options::Parse(argc, argv, 1);
733 // run internal ConvertTool implementation, return success/fail
734 m_impl = new ConvertToolPrivate(m_settings);
742 // ---------------------------------------------
743 // ConvertPileupFormatVisitor implementation
745 ConvertPileupFormatVisitor::ConvertPileupFormatVisitor(const RefVector& references,
746 const string& fastaFilename,
747 const bool isPrintingMapQualities,
751 , m_isPrintingMapQualities(isPrintingMapQualities)
753 , m_references(references)
755 // set up Fasta reader if file is provided
756 if ( !fastaFilename.empty() ) {
758 // check for FASTA index
759 string indexFilename = "";
760 if ( Utilities::FileExists(fastaFilename + ".fai") )
761 indexFilename = fastaFilename + ".fai";
764 if ( m_fasta.Open(fastaFilename, indexFilename) )
769 ConvertPileupFormatVisitor::~ConvertPileupFormatVisitor(void) {
770 // be sure to close Fasta reader
777 void ConvertPileupFormatVisitor::Visit(const PileupPosition& pileupData ) {
779 // skip if no alignments at this position
780 if ( pileupData.PileupAlignments.empty() ) return;
782 // retrieve reference name
783 const string& referenceName = m_references[pileupData.RefId].RefName;
784 const int& position = pileupData.Position;
786 // retrieve reference base from FASTA file, if one provided; otherwise default to 'N'
787 char referenceBase('N');
788 if ( m_hasFasta && (pileupData.Position < m_references[pileupData.RefId].RefLength) ) {
789 if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position, referenceBase ) ) {
790 cerr << "bamtools convert ERROR: pileup conversion - could not read reference base from FASTA file" << endl;
795 // get count of alleles at this position
796 const int numberAlleles = pileupData.PileupAlignments.size();
798 // -----------------------------------------------------------
799 // build strings based on alleles at this positionInAlignment
801 stringstream bases("");
802 stringstream baseQualities("");
803 stringstream mapQualities("");
805 // iterate over alignments at this pileup position
806 vector<PileupAlignment>::const_iterator pileupIter = pileupData.PileupAlignments.begin();
807 vector<PileupAlignment>::const_iterator pileupEnd = pileupData.PileupAlignments.end();
808 for ( ; pileupIter != pileupEnd; ++pileupIter ) {
809 const PileupAlignment pa = (*pileupIter);
810 const BamAlignment& ba = pa.Alignment;
812 // if beginning of read segment
813 if ( pa.IsSegmentBegin )
814 bases << '^' << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
816 // if current base is not a DELETION
817 if ( !pa.IsCurrentDeletion ) {
819 // get base at current position
820 char base = ba.QueryBases.at(pa.PositionInAlignment);
822 // if base matches reference
824 toupper(base) == toupper(referenceBase) ||
825 tolower(base) == tolower(referenceBase) )
827 base = ( ba.IsReverseStrand() ? ',' : '.' );
830 // mismatches reference
831 else base = ( ba.IsReverseStrand() ? tolower(base) : toupper(base) );
836 // if next position contains insertion
837 if ( pa.IsNextInsertion ) {
838 bases << '+' << pa.InsertionLength;
839 for (int i = 1; i <= pa.InsertionLength; ++i) {
840 char insertedBase = (char)ba.QueryBases.at(pa.PositionInAlignment + i);
841 bases << (ba.IsReverseStrand() ? (char)tolower(insertedBase) : (char)toupper(insertedBase) );
845 // if next position contains DELETION
846 else if ( pa.IsNextDeletion ) {
847 bases << '-' << pa.DeletionLength;
848 for (int i = 1; i <= pa.DeletionLength; ++i) {
849 char deletedBase('N');
850 if ( m_hasFasta && (pileupData.Position+i < m_references[pileupData.RefId].RefLength) ) {
851 if ( !m_fasta.GetBase(pileupData.RefId, pileupData.Position+i, deletedBase ) ) {
852 cerr << "bamtools convert ERROR: pileup conversion - could not read reference base from FASTA file" << endl;
856 bases << (ba.IsReverseStrand() ? (char)tolower(deletedBase) : (char)toupper(deletedBase) );
861 // otherwise, DELETION
864 // if end of read segment
865 if ( pa.IsSegmentEnd )
868 // store current base quality
869 baseQualities << ba.Qualities.at(pa.PositionInAlignment);
871 // save alignment map quality if desired
872 if ( m_isPrintingMapQualities )
873 mapQualities << ( ((int)ba.MapQuality > 93) ? (char)126 : (char)((int)ba.MapQuality+33) );
876 // ----------------------
880 // <refName> <1-based pos> <refBase> <numberAlleles> <bases> <qualities> [mapQuals]
882 const string TAB = "\t";
883 *m_out << referenceName << TAB
884 << position + 1 << TAB
885 << referenceBase << TAB
886 << numberAlleles << TAB
887 << bases.str() << TAB
888 << baseQualities.str() << TAB
889 << mapQualities.str() << endl;