// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 16 September 2010
+// Last modified: 4 October 2010
// ---------------------------------------------------------------------------
// Converts between BAM and a number of other formats
// ***************************************************************************
static const string FORMAT_SAM = "sam";
static const string FORMAT_PILEUP = "pileup";
static const string FORMAT_WIGGLE = "wig";
+ static const string FORMAT_YAML = "yaml";
// other constants
static const unsigned int FASTA_LINE_MAX = 50;
// PileupVisitor interface implementation
public:
-// void Visit(const int& refId, const int& position, const vector<BamAlignment>& alignments);
-
void Visit(const PileupPosition& pileupData);
// data members
void PrintJson(const BamAlignment& a);
void PrintSam(const BamAlignment& a);
void PrintWiggle(const BamAlignment& a);
+ void PrintYaml(const BamAlignment& a);
// special case - uses the PileupEngine
bool RunPileupConversion(BamMultiReader* reader);
// open input files
BamMultiReader reader;
- reader.Open(m_settings->InputFiles);
+ if ( !m_settings->HasInput ) { // don't attempt to open index for stdin
+ if ( !reader.Open(m_settings->InputFiles, false) ) {
+ cerr << "Could not open input files" << endl;
+ return false;
+ }
+ } else {
+ if ( !reader.Open(m_settings->InputFiles, true) ) {
+ if ( !reader.Open(m_settings->InputFiles, false) ) {
+ cerr << "Could not open input files" << endl;
+ return false;
+ } else {
+ cerr << "Opened reader without index file, jumping is disabled." << endl;
+ }
+ }
+ }
m_references = reader.GetReferenceData();
// set region if specified
else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson;
else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam;
else if ( m_settings->Format == FORMAT_WIGGLE ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintWiggle;
+ else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml;
else {
cerr << "Unrecognized format: " << m_settings->Format << endl;
cerr << "Please see help|README (?) for details on supported formats " << endl;
// >BamAlignment.Name
// BamAlignment.QueryBases (up to FASTA_LINE_MAX bases per line)
// ...
+ //
+ // N.B. - QueryBases are reverse-complemented if aligned to reverse strand
// print header
m_out << "> " << a.Name << endl;
+ // handle reverse strand alignment - bases
+ string sequence = a.QueryBases;
+ if ( a.IsReverseStrand() )
+ Utilities::ReverseComplement(sequence);
+
// if sequence fits on single line
- if ( a.QueryBases.length() <= FASTA_LINE_MAX )
- m_out << a.QueryBases << endl;
+ if ( sequence.length() <= FASTA_LINE_MAX )
+ m_out << sequence << endl;
// else split over multiple lines
else {
size_t position = 0;
- size_t seqLength = a.QueryBases.length();
+ size_t seqLength = sequence.length(); // handle reverse strand alignment - bases & qualitiesth();
// write subsequences to each line
while ( position < (seqLength - FASTA_LINE_MAX) ) {
- m_out << a.QueryBases.substr(position, FASTA_LINE_MAX) << endl;
+ m_out << sequence.substr(position, FASTA_LINE_MAX) << endl;
position += FASTA_LINE_MAX;
}
// write final subsequence
- m_out << a.QueryBases.substr(position) << endl;
+ m_out << sequence.substr(position) << endl;
}
}
// BamAlignment.QueryBases
// +
// BamAlignment.Qualities
+ //
+ // N.B. - QueryBases are reverse-complemented (& Qualities reversed) if aligned to reverse strand .
+ // Name is appended "/1" or "/2" if paired-end, to reflect which mate this entry is.
- m_out << "@" << a.Name << endl
- << a.QueryBases << endl
- << "+" << endl
- << a.Qualities << endl;
+ // handle paired-end alignments
+ string name = a.Name;
+ if ( a.IsPaired() )
+ name.append( (a.IsFirstMate() ? "/1" : "/2") );
+
+ // handle reverse strand alignment - bases & qualities
+ string qualities = a.Qualities;
+ string sequence = a.QueryBases;
+ if ( a.IsReverseStrand() ) {
+ Utilities::Reverse(qualities);
+ Utilities::ReverseComplement(sequence);
+ }
+
+ // write to output stream
+ m_out << "@" << name << endl
+ << sequence << endl
+ << "+" << endl
+ << qualities << endl;
}
// print BamAlignment in JSON format
case('H') :
m_out << "\"";
while (tagData[index]) {
- m_out << tagData[index];
+ if (tagData[index] == '\"')
+ m_out << "\\\""; // escape for json
+ else
+ m_out << tagData[index];
++index;
}
m_out << "\"";
;
}
+// Print BamAlignment in YAML format
+void ConvertTool::ConvertToolPrivate::PrintYaml(const BamAlignment& a) {
+
+ // write alignment name
+ m_out << "---" << endl;
+ m_out << a.Name << ":" << endl;
+
+ // write alignment data
+ m_out << " " << "AlndBases: " << a.AlignedBases << endl;
+ m_out << " " << "Qualities: " << a.Qualities << endl;
+ m_out << " " << "Name: " << a.Name << endl;
+ m_out << " " << "Length: " << a.Length << endl;
+ m_out << " " << "TagData: " << a.TagData << endl;
+ m_out << " " << "RefID: " << a.RefID << endl;
+ m_out << " " << "RefName: " << m_references[a.RefID].RefName << endl;
+ m_out << " " << "Position: " << a.Position << endl;
+ m_out << " " << "Bin: " << a.Bin << endl;
+ m_out << " " << "MapQuality: " << a.MapQuality << endl;
+ m_out << " " << "AlignmentFlag: " << a.AlignmentFlag << endl;
+ m_out << " " << "MateRefID: " << a.MateRefID << endl;
+ m_out << " " << "MatePosition: " << a.MatePosition << endl;
+ m_out << " " << "InsertSize: " << a.InsertSize << endl;
+
+ // write Cigar data
+ const vector<CigarOp>& cigarData = a.CigarData;
+ if ( !cigarData.empty() ) {
+ m_out << " " << "Cigar: ";
+ vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
+ vector<CigarOp>::const_iterator cigarIter = cigarBegin;
+ vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
+ for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+ const CigarOp& op = (*cigarIter);
+ m_out << op.Length << op.Type;
+ }
+ m_out << endl;
+ }
+}
+
bool ConvertTool::ConvertToolPrivate::RunPileupConversion(BamMultiReader* reader) {
// check for valid BamMultiReader
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [other options]");
+ Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [-region <REGION>] [format-specific options]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
-
- OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters");
- Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as <filename>.bai. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts);
+ Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
OptionGroup* PileupOpts = Options::CreateOptionGroup("Pileup Options");
- Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts, "");
+ Options::AddValueOption("-fasta", "FASTA filename", "FASTA reference file", "", m_settings->HasFastaFilename, m_settings->FastaFilename, PileupOpts);
Options::AddOption("-mapqual", "print the mapping qualities", m_settings->IsPrintingPileupMapQualities, PileupOpts);
OptionGroup* SamOpts = Options::CreateOptionGroup("SAM Options");