X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ftoolkit%2Fbamtools_convert.cpp;h=54820e7d01c719b621d4aff9c5c2dd7822bd1d5a;hb=2126ee0d204be8293df9492b48bce076a41a2a25;hp=907c4bafc7357cde60940f3737f120ff6a4aebf8;hpb=d776d518237008a804656ff27e9f06707d032ae2;p=bamtools.git diff --git a/src/toolkit/bamtools_convert.cpp b/src/toolkit/bamtools_convert.cpp index 907c4ba..54820e7 100644 --- a/src/toolkit/bamtools_convert.cpp +++ b/src/toolkit/bamtools_convert.cpp @@ -2,7 +2,7 @@ // bamtools_convert.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 8 October 2011 +// Last modified: 10 December 2012 // --------------------------------------------------------------------------- // Converts between BAM and a number of other formats // *************************************************************************** @@ -76,6 +76,7 @@ struct ConvertTool::ConvertSettings { // flag bool HasInput; + bool HasInputFilelist; bool HasOutput; bool HasFormat; bool HasRegion; @@ -87,6 +88,7 @@ struct ConvertTool::ConvertSettings { // options vector InputFiles; + string InputFilelist; string OutputFilename; string Format; string Region; @@ -97,6 +99,7 @@ struct ConvertTool::ConvertSettings { // constructor ConvertSettings(void) : HasInput(false) + , HasInputFilelist(false) , HasOutput(false) , HasFormat(false) , HasRegion(false) @@ -151,9 +154,23 @@ bool ConvertTool::ConvertToolPrivate::Run(void) { // initialize conversion input/output // set to default input if none provided - if ( !m_settings->HasInput ) + if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); + // add files in the filelist to the input file list + if ( m_settings->HasInputFilelist ) { + + ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); + if ( !filelist.is_open() ) { + cerr << "bamtools convert ERROR: could not open input BAM file list... Aborting." << endl; + return false; + } + + string line; + while ( getline(filelist, line) ) + m_settings->InputFiles.push_back(line); + } + // open input files BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { @@ -396,7 +413,7 @@ void ConvertTool::ConvertToolPrivate::PrintJson(const BamAlignment& a) { m_out << "\"queryBases\":\"" << a.QueryBases << "\","; // write qualities - if ( !a.Qualities.empty() ) { + if ( !a.Qualities.empty() && a.Qualities.at(0) != (char)0xFF ) { string::const_iterator s = a.Qualities.begin(); m_out << "\"qualities\":[" << static_cast(*s) - 33; ++s; @@ -406,7 +423,7 @@ void ConvertTool::ConvertToolPrivate::PrintJson(const BamAlignment& a) { } // write alignment's source BAM file - m_out << "\"filename\":" << a.Filename << ","; + m_out << "\"filename\":\"" << a.Filename << "\","; // write tag data const char* tagData = a.TagData.c_str(); @@ -434,9 +451,15 @@ void ConvertTool::ConvertToolPrivate::PrintJson(const BamAlignment& a) { ++index; break; - case (Constants::BAM_TAG_TYPE_INT8) : + case (Constants::BAM_TAG_TYPE_INT8) : + // force value into integer-type (instead of char value) + m_out << static_cast(tagData[index]); + ++index; + break; + case (Constants::BAM_TAG_TYPE_UINT8) : - m_out << (int)tagData[index]; + // force value into integer-type (instead of char value) + m_out << static_cast(tagData[index]); ++index; break; @@ -497,8 +520,8 @@ void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) { // [ :: [...] ] // write name & alignment flag - m_out << a.Name << "\t" << a.AlignmentFlag << "\t"; - + m_out << a.Name << "\t" << a.AlignmentFlag << "\t"; + // write reference name if ( (a.RefID >= 0) && (a.RefID < (int)m_references.size()) ) m_out << m_references[a.RefID].RefName << "\t"; @@ -526,7 +549,7 @@ void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) { if ( a.MateRefID == a.RefID ) m_out << "=\t"; else - m_out << m_references[a.MateRefID].RefName << "\t"; + m_out << m_references[a.MateRefID].RefName << "\t"; m_out << a.MatePosition+1 << "\t" << a.InsertSize << "\t"; } else @@ -539,7 +562,7 @@ void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) { m_out << a.QueryBases << "\t"; // write qualities - if ( a.Qualities.empty() ) + if ( a.Qualities.empty() || (a.Qualities.at(0) == (char)0xFF) ) m_out << "*"; else m_out << a.Qualities; @@ -565,9 +588,15 @@ void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) { ++index; break; - case (Constants::BAM_TAG_TYPE_INT8) : + case (Constants::BAM_TAG_TYPE_INT8) : + // force value into integer-type (instead of char value) + m_out << "i:" << static_cast(tagData[index]); + ++index; + break; + case (Constants::BAM_TAG_TYPE_UINT8) : - m_out << "i:" << (int)tagData[index]; + // force value into integer-type (instead of char value) + m_out << "i:" << static_cast(tagData[index]); ++index; break; @@ -596,7 +625,7 @@ void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) { index += sizeof(float); break; - case (Constants::BAM_TAG_TYPE_HEX) : + case (Constants::BAM_TAG_TYPE_HEX) : // fall-through case (Constants::BAM_TAG_TYPE_STRING) : m_out << type << ":"; while (tagData[index]) { @@ -607,7 +636,7 @@ void ConvertTool::ConvertToolPrivate::PrintSam(const BamAlignment& a) { break; } - if ( tagData[index] == '\0') + if ( tagData[index] == '\0' ) break; } @@ -691,11 +720,13 @@ ConvertTool::ConvertTool(void) , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", "-format [-in -in ...] [-out ] [-region ] [format-specific options]"); + Options::SetProgramInfo("bamtools convert", "converts BAM to a number of other formats", + "-format [-in -in ... | -list ] [-out ] [-region ] [format-specific options]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); Options::AddValueOption("-format", "FORMAT", "the output file format - see README for recognized formats", "", m_settings->HasFormat, m_settings->Format, IO_Opts); Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);