// ***************************************************************************
// bamtools_convert.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011
+// Last modified: 10 December 2012
// ---------------------------------------------------------------------------
// Converts between BAM and a number of other formats
// ***************************************************************************
// flag
bool HasInput;
+ bool HasInputFilelist;
bool HasOutput;
bool HasFormat;
bool HasRegion;
// options
vector<string> InputFiles;
+ string InputFilelist;
string OutputFilename;
string Format;
string Region;
// constructor
ConvertSettings(void)
: HasInput(false)
+ , HasInputFilelist(false)
, HasOutput(false)
, HasFormat(false)
, HasRegion(false)
// initialize conversion input/output
// set to default input if none provided
- if ( !m_settings->HasInput )
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools convert ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// open input files
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
m_out << m_references.at(a.RefID).RefName << "\t"
<< a.Position << "\t"
- << a.GetEndPosition() + 1 << "\t"
+ << a.GetEndPosition() << "\t"
<< a.Name << "\t"
<< a.MapQuality << "\t"
<< (a.IsReverseStrand() ? "-" : "+") << endl;
// N.B. - QueryBases are reverse-complemented if aligned to reverse strand
// print header
- m_out << "> " << a.Name << endl;
+ m_out << ">" << a.Name << endl;
// handle reverse strand alignment - bases
string sequence = a.QueryBases;
m_out << "\"queryBases\":\"" << a.QueryBases << "\",";
// write qualities
- if ( !a.Qualities.empty() ) {
+ if ( !a.Qualities.empty() && a.Qualities.at(0) != (char)0xFF ) {
string::const_iterator s = a.Qualities.begin();
m_out << "\"qualities\":[" << static_cast<short>(*s) - 33;
++s;
++index;
break;
- case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ // force value into integer-type (instead of char value)
+ m_out << static_cast<int16_t>(tagData[index]);
+ ++index;
+ break;
+
case (Constants::BAM_TAG_TYPE_UINT8) :
- m_out << (int)tagData[index];
+ // force value into integer-type (instead of char value)
+ m_out << static_cast<uint16_t>(tagData[index]);
++index;
break;
// <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
// write name & alignment flag
- m_out << a.Name << "\t" << a.AlignmentFlag << "\t";
-
+ m_out << a.Name << "\t" << a.AlignmentFlag << "\t";
+
// write reference name
if ( (a.RefID >= 0) && (a.RefID < (int)m_references.size()) )
m_out << m_references[a.RefID].RefName << "\t";
if ( a.MateRefID == a.RefID )
m_out << "=\t";
else
- m_out << m_references[a.MateRefID].RefName << "\t";
+ m_out << m_references[a.MateRefID].RefName << "\t";
m_out << a.MatePosition+1 << "\t" << a.InsertSize << "\t";
}
else
m_out << a.QueryBases << "\t";
// write qualities
- if ( a.Qualities.empty() )
+ if ( a.Qualities.empty() || (a.Qualities.at(0) == (char)0xFF) )
m_out << "*";
else
m_out << a.Qualities;
++index;
break;
- case (Constants::BAM_TAG_TYPE_INT8) :
+ case (Constants::BAM_TAG_TYPE_INT8) :
+ // force value into integer-type (instead of char value)
+ m_out << "i:" << static_cast<int16_t>(tagData[index]);
+ ++index;
+ break;
+
case (Constants::BAM_TAG_TYPE_UINT8) :
- m_out << "i:" << (int)tagData[index];
+ // force value into integer-type (instead of char value)
+ m_out << "i:" << static_cast<uint16_t>(tagData[index]);
++index;
break;
index += sizeof(float);
break;
- case (Constants::BAM_TAG_TYPE_HEX) :
+ case (Constants::BAM_TAG_TYPE_HEX) : // fall-through
case (Constants::BAM_TAG_TYPE_STRING) :
m_out << type << ":";
while (tagData[index]) {
break;
}
- if ( tagData[index] == '\0')
+ if ( tagData[index] == '\0' )
break;
}
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format <FORMAT> [-in <filename> -in <filename> ...] [-out <filename>] [-region <REGION>] [format-specific options]");
+ Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats",
+ "-format <FORMAT> [-in <filename> -in <filename> ... | -list <filelist>] [-out <filename>] [-region <REGION>] [format-specific options]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts);
Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);