1 // ***************************************************************************
2 // bamtools_convert.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 7 June 2010
7 // ---------------------------------------------------------------------------
8 // Converts between BAM and a number of other formats
9 // ***************************************************************************
16 #include "bamtools_convert.h"
17 #include "bamtools_format.h"
18 #include "bamtools_options.h"
20 #include "BamReader.h"
21 #include "BamMultiReader.h"
24 using namespace BamTools;
26 static RefVector references;
30 static const string FORMAT_FASTA = "fasta";
31 static const string FORMAT_FASTQ = "fastq";
32 static const string FORMAT_JSON = "json";
33 static const string FORMAT_SAM = "sam";
35 void PrintFASTA(const BamAlignment& a);
36 void PrintFASTQ(const BamAlignment& a);
37 void PrintJSON(const BamAlignment& a);
38 void PrintSAM(const BamAlignment& a);
40 } // namespace BamTools
42 // ---------------------------------------------
43 // ConvertSettings implementation
45 struct ConvertTool::ConvertSettings {
48 bool HasInputBamFilename;
49 bool HasOutputBamFilename;
54 string OutputFilename;
59 : HasInputBamFilename(false)
60 , HasOutputBamFilename(false)
61 , InputFilename(Options::StandardIn())
62 , OutputFilename(Options::StandardOut())
66 // ---------------------------------------------
67 // ConvertTool implementation
69 ConvertTool::ConvertTool(void)
71 , m_settings(new ConvertSettings)
73 // set program details
74 Options::SetProgramInfo("bamtools convert", "converts between BAM and a number of other formats", "-in <filename> -out <filename> -format <FORMAT>");
77 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
78 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFilename, IO_Opts, Options::StandardIn());
79 Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
80 Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
83 ConvertTool::~ConvertTool(void) {
88 int ConvertTool::Help(void) {
89 Options::DisplayHelp();
93 int ConvertTool::Run(int argc, char* argv[]) {
95 bool convertedOk = true;
97 // parse command line arguments
98 Options::Parse(argc, argv, 1);
102 reader.Open(m_settings->InputFilename);
103 references = reader.GetReferenceData();
105 // ----------------------------------------
106 // do conversion,depending on desired output format
109 if ( m_settings->Format == FORMAT_FASTA ) {
110 cout << "Converting to FASTA" << endl;
114 else if ( m_settings->Format == FORMAT_FASTQ) {
115 cout << "Converting to FASTQ" << endl;
119 else if ( m_settings->Format == FORMAT_JSON ) {
120 cout << "Converting to JSON" << endl;
124 else if ( m_settings->Format == FORMAT_SAM ) {
125 BamAlignment alignment;
126 while ( reader.GetNextAlignment(alignment) ) {
131 // uncrecognized format
133 cerr << "Unrecognized format: " << m_settings->Format << endl;
134 cerr << "Please see help|README (?) for details on supported formats " << endl;
138 // ------------------------
141 return (int)convertedOk;
144 // ----------------------------------------------------------
145 // Conversion/output methods
146 // ----------------------------------------------------------
148 // print BamAlignment in FASTA format
149 void BamTools::PrintFASTA(const BamAlignment& a) {
153 // print BamAlignment in FASTQ format
154 void BamTools::PrintFASTQ(const BamAlignment& a) {
158 // print BamAlignment in JSON format
159 void BamTools::PrintJSON(const BamAlignment& a) {
163 // print BamAlignment in SAM format
164 void BamTools::PrintSAM(const BamAlignment& a) {
167 // <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
169 ostringstream sb("");
171 // write name & alignment flag
172 cout << a.Name << "\t" << a.AlignmentFlag << "\t";
174 // write reference name
175 if ( (a.RefID >= 0) && (a.RefID < (int)references.size()) ) cout << references[a.RefID].RefName << "\t";
178 // write position & map quality
179 cout << a.Position+1 << "\t" << a.MapQuality << "\t";
182 const vector<CigarOp>& cigarData = a.CigarData;
183 if ( cigarData.empty() ) cout << "*\t";
185 vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
186 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
187 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
188 const CigarOp& op = (*cigarIter);
189 cout << op.Length << op.Type;
194 // write mate reference name, mate position, & insert size
195 if ( a.IsPaired() && (a.MateRefID >= 0) && (a.MateRefID < (int)references.size()) ) {
196 if ( a.MateRefID == a.RefID ) cout << "=\t";
197 else cout << references[a.MateRefID].RefName << "\t";
198 cout << a.MatePosition+1 << "\t" << a.InsertSize << "\t";
200 else cout << "*\t0\t0\t";
203 if ( a.QueryBases.empty() ) cout << "*\t";
204 else cout << a.QueryBases << "\t";
207 if ( a.Qualities.empty() ) cout << "*";
208 else cout << a.Qualities;
211 const char* tagData = a.TagData.c_str();
212 const size_t tagDataLength = a.TagData.length();
214 while ( index < tagDataLength ) {
217 cout << "\t" << a.TagData.substr(index, 2) << ":";
221 char type = a.TagData.at(index);
226 cout << "A:" << tagData[index];
231 cout << "i:" << atoi(&tagData[index]);
236 cout << "i:" << atoi(&tagData[index]);
241 cout << "i:" << BgzfData::UnpackUnsignedShort(&tagData[index]);
246 cout << "i:" << BgzfData::UnpackSignedShort(&tagData[index]);
251 cout << "i:" << BgzfData::UnpackUnsignedInt(&tagData[index]);
256 cout << "i:" << BgzfData::UnpackSignedInt(&tagData[index]);
261 cout << "f:" << BgzfData::UnpackFloat(&tagData[index]);
266 cout << "d:" << BgzfData::UnpackDouble(&tagData[index]);
273 while (tagData[index]) {
274 cout << tagData[index];
282 // write stream to stdout
283 cout << sb.str() << endl;