From 82ded957c29023424742e76084b8aa6f2ee0cb2c Mon Sep 17 00:00:00 2001 From: Derek Date: Mon, 7 Jun 2010 13:58:46 -0400 Subject: [PATCH] Began rolling over bamtools_sam into bamtools_convert. Kinda hacky setup for now, but ok to get conversion routines tested --- bamtools_convert.cpp | 200 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 196 insertions(+), 4 deletions(-) diff --git a/bamtools_convert.cpp b/bamtools_convert.cpp index bf54b03..ace8191 100644 --- a/bamtools_convert.cpp +++ b/bamtools_convert.cpp @@ -3,24 +3,42 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 2 June 2010 +// Last modified: 7 June 2010 // --------------------------------------------------------------------------- // Converts between BAM and a number of other formats // *************************************************************************** #include +#include #include #include #include "bamtools_convert.h" #include "bamtools_format.h" #include "bamtools_options.h" +#include "BGZF.h" #include "BamReader.h" #include "BamMultiReader.h" using namespace std; using namespace BamTools; +static RefVector references; + +namespace BamTools { + + static const string FORMAT_FASTA = "fasta"; + static const string FORMAT_FASTQ = "fastq"; + static const string FORMAT_JSON = "json"; + static const string FORMAT_SAM = "sam"; + + void PrintFASTA(const BamAlignment& a); + void PrintFASTQ(const BamAlignment& a); + void PrintJSON(const BamAlignment& a); + void PrintSAM(const BamAlignment& a); + +} // namespace BamTools + // --------------------------------------------- // ConvertSettings implementation @@ -74,19 +92,193 @@ int ConvertTool::Help(void) { int ConvertTool::Run(int argc, char* argv[]) { + bool convertedOk = true; + // parse command line arguments Options::Parse(argc, argv, 1); // open files BamReader reader; reader.Open(m_settings->InputFilename); + references = reader.GetReferenceData(); - // do conversion + // ---------------------------------------- + // do conversion,depending on desired output format + + // FASTA + if ( m_settings->Format == FORMAT_FASTA ) { + cout << "Converting to FASTA" << endl; + } + // FASTQ + else if ( m_settings->Format == FORMAT_FASTQ) { + cout << "Converting to FASTQ" << endl; + } + // JSON + else if ( m_settings->Format == FORMAT_JSON ) { + cout << "Converting to JSON" << endl; + } + // SAM + else if ( m_settings->Format == FORMAT_SAM ) { + BamAlignment alignment; + while ( reader.GetNextAlignment(alignment) ) { + PrintSAM(alignment); + } + } + // uncrecognized format + else { + cerr << "Unrecognized format: " << m_settings->Format << endl; + cerr << "Please see help|README (?) for details on supported formats " << endl; + convertedOk = false; + } + + // ------------------------ // clean up & exit reader.Close(); - return 0; -} \ No newline at end of file + return (int)convertedOk; +} + +// ---------------------------------------------------------- +// Conversion/output methods +// ---------------------------------------------------------- + +// print BamAlignment in FASTA format +void BamTools::PrintFASTA(const BamAlignment& a) { + +} + +// print BamAlignment in FASTQ format +void BamTools::PrintFASTQ(const BamAlignment& a) { + +} + +// print BamAlignment in JSON format +void BamTools::PrintJSON(const BamAlignment& a) { + +} + +// print BamAlignment in SAM format +void BamTools::PrintSAM(const BamAlignment& a) { + + // tab-delimited + // [ :: [...] ] + + ostringstream sb(""); + + // write name & alignment flag + cout << a.Name << "\t" << a.AlignmentFlag << "\t"; + + // write reference name + if ( (a.RefID >= 0) && (a.RefID < (int)references.size()) ) cout << references[a.RefID].RefName << "\t"; + else cout << "*\t"; + + // write position & map quality + cout << a.Position+1 << "\t" << a.MapQuality << "\t"; + + // write CIGAR + const vector& cigarData = a.CigarData; + if ( cigarData.empty() ) cout << "*\t"; + else { + vector::const_iterator cigarIter = cigarData.begin(); + vector::const_iterator cigarEnd = cigarData.end(); + for ( ; cigarIter != cigarEnd; ++cigarIter ) { + const CigarOp& op = (*cigarIter); + cout << op.Length << op.Type; + } + cout << "\t"; + } + + // write mate reference name, mate position, & insert size + if ( a.IsPaired() && (a.MateRefID >= 0) && (a.MateRefID < (int)references.size()) ) { + if ( a.MateRefID == a.RefID ) cout << "=\t"; + else cout << references[a.MateRefID].RefName << "\t"; + cout << a.MatePosition+1 << "\t" << a.InsertSize << "\t"; + } + else cout << "*\t0\t0\t"; + + // write sequence + if ( a.QueryBases.empty() ) cout << "*\t"; + else cout << a.QueryBases << "\t"; + + // write qualities + if ( a.Qualities.empty() ) cout << "*"; + else cout << a.Qualities; + + // write tag data + const char* tagData = a.TagData.c_str(); + const size_t tagDataLength = a.TagData.length(); + size_t index = 0; + while ( index < tagDataLength ) { + + // write tag name + cout << "\t" << a.TagData.substr(index, 2) << ":"; + index += 2; + + // get data type + char type = a.TagData.at(index); + ++index; + + switch (type) { + case('A') : + cout << "A:" << tagData[index]; + ++index; + break; + + case('C') : + cout << "i:" << atoi(&tagData[index]); + ++index; + break; + + case('c') : + cout << "i:" << atoi(&tagData[index]); + ++index; + break; + + case('S') : + cout << "i:" << BgzfData::UnpackUnsignedShort(&tagData[index]); + index += 2; + break; + + case('s') : + cout << "i:" << BgzfData::UnpackSignedShort(&tagData[index]); + index += 2; + break; + + case('I') : + cout << "i:" << BgzfData::UnpackUnsignedInt(&tagData[index]); + index += 4; + break; + + case('i') : + cout << "i:" << BgzfData::UnpackSignedInt(&tagData[index]); + index += 4; + break; + + case('f') : + cout << "f:" << BgzfData::UnpackFloat(&tagData[index]); + index += 4; + break; + + case('d') : + cout << "d:" << BgzfData::UnpackDouble(&tagData[index]); + index += 8; + break; + + case('Z') : + case('H') : + cout << type << ":"; + while (tagData[index]) { + cout << tagData[index]; + ++index; + } + ++index; + break; + } + } + + // write stream to stdout + cout << sb.str() << endl; +} -- 2.39.5