X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Ftoolkit%2Fbamtools_split.cpp;h=e6602a9e590e250d3dd204fea990db579db20db4;hb=e7d0ca0a024a13d405872be90f381ff397cdaeec;hp=f4d3db821ed45901a6bd28cf27a41bfbcce99ccd;hpb=8c80d760637f8df39262683cd2570f0589423d36;p=bamtools.git diff --git a/src/toolkit/bamtools_split.cpp b/src/toolkit/bamtools_split.cpp index f4d3db8..e6602a9 100644 --- a/src/toolkit/bamtools_split.cpp +++ b/src/toolkit/bamtools_split.cpp @@ -1,12 +1,11 @@ // *************************************************************************** // bamtools_split.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 21 March 2011 (DB) +// Last modified: 8 December 2011 (DB) // --------------------------------------------------------------------------- // Splits a BAM file on user-specified property, creating a new BAM output -// file for each value found. +// file for each value found // *************************************************************************** #include "bamtools_split.h" @@ -70,6 +69,7 @@ struct SplitTool::SplitSettings { // flags bool HasInputFilename; bool HasCustomOutputStub; + bool HasCustomRefPrefix; bool IsSplittingMapped; bool IsSplittingPaired; bool IsSplittingReference; @@ -77,6 +77,7 @@ struct SplitTool::SplitSettings { // string args string CustomOutputStub; + string CustomRefPrefix; string InputFilename; string TagToSplit; @@ -84,11 +85,13 @@ struct SplitTool::SplitSettings { SplitSettings(void) : HasInputFilename(false) , HasCustomOutputStub(false) + , HasCustomRefPrefix(false) , IsSplittingMapped(false) , IsSplittingPaired(false) , IsSplittingReference(false) , IsSplittingTag(false) , CustomOutputStub("") + , CustomRefPrefix("") , InputFilename(Options::StandardIn()) , TagToSplit("") { } @@ -101,8 +104,13 @@ class SplitTool::SplitToolPrivate { // ctor & dtor public: - SplitToolPrivate(SplitTool::SplitSettings* settings); - ~SplitToolPrivate(void); + SplitToolPrivate(SplitTool::SplitSettings* settings) + : m_settings(settings) + { } + + ~SplitToolPrivate(void) { + m_reader.Close(); + } // 'public' interface public: @@ -140,16 +148,6 @@ class SplitTool::SplitToolPrivate { RefVector m_references; }; -// constructor -SplitTool::SplitToolPrivate::SplitToolPrivate(SplitTool::SplitSettings* settings) - : m_settings(settings) -{ } - -// destructor -SplitTool::SplitToolPrivate::~SplitToolPrivate(void) { - m_reader.Close(); -} - void SplitTool::SplitToolPrivate::DetermineOutputFilenameStub(void) { // if user supplied output filename stub, use that @@ -304,6 +302,16 @@ bool SplitTool::SplitToolPrivate::SplitReference(void) { map outputFiles; map::iterator writerIter; + // determine reference prefix + string refPrefix = SPLIT_REFERENCE_TOKEN; + if ( m_settings->HasCustomRefPrefix ) + refPrefix = m_settings->CustomRefPrefix; + + // make sure prefix starts with '.' + const size_t dotFound = refPrefix.find('.'); + if ( dotFound != 0 ) + refPrefix = string(".") + refPrefix; + // iterate through alignments BamAlignment al; BamWriter* writer; @@ -317,9 +325,17 @@ bool SplitTool::SplitToolPrivate::SplitReference(void) { // if no writer associated with this value if ( writerIter == outputFiles.end() ) { + // fetch reference name for ID + string refName; + if ( currentRefId == -1 ) + refName = "unmapped"; + else + refName = m_references.at(currentRefId).RefName; + + // construct new output filename + const string outputFilename = m_outputFilenameStub + refPrefix + refName + ".bam"; + // open new BamWriter - const string refName = m_references.at(currentRefId).RefName; - const string outputFilename = m_outputFilenameStub + SPLIT_REFERENCE_TOKEN + refName + ".bam"; writer = new BamWriter; if ( !writer->Open(outputFilename, m_header, m_references) ) { cerr << "bamtools split ERROR: could not open " << outputFilename @@ -379,9 +395,13 @@ bool SplitTool::SplitToolPrivate::SplitTag(void) { case (Constants::BAM_TAG_TYPE_STRING) : case (Constants::BAM_TAG_TYPE_HEX) : return SplitTagImpl(al); + + case (Constants::BAM_TAG_TYPE_ARRAY) : + cerr << "bamtools split ERROR: array tag types are not supported" << endl; + return false; default: - fprintf(stderr, "bamtools split ERROR: unknown tag type encountered: [%c]\n", tagType); + cerr << "bamtools split ERROR: unknown tag type encountered: " << tagType << endl; return false; } } @@ -392,8 +412,9 @@ bool SplitTool::SplitToolPrivate::SplitTag(void) { // -------------------------------------------------------------------------------- // template method implementation -// N.B. - *technical note* - use of template methods defined in ".cpp" goes against normal practices -// but works here because these are purely internal (no one can call from outside this file) +// *Technical Note* - use of template methods declared & defined in ".cpp" file +// goes against normal practices, but works here because these +// are purely internal (no one can call from outside this file) // close BamWriters & delete pointers template @@ -407,13 +428,17 @@ void SplitTool::SplitToolPrivate::CloseWriters(map& writers) { WriterMapIterator writerEnd = writers.end(); for ( ; writerIter != writerEnd; ++writerIter ) { BamWriter* writer = (*writerIter).second; - if (writer == 0 ) continue; + if ( writer == 0 ) continue; - // close & delete writer + // close BamWriter writer->Close(); + + // destroy BamWriter delete writer; writer = 0; } + + // clear the container (destroying the items doesn't remove them) writers.clear(); } @@ -507,12 +532,18 @@ SplitTool::SplitTool(void) , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools split", "splits a BAM file on user-specified property, creating a new BAM output file for each value found", "[-in ] [-stub ] < -mapped | -paired | -reference | -tag > "); + const string name = "bamtools split"; + const string description = "splits a BAM file on user-specified property, creating a new BAM output file for each value found"; + const string args = "[-in ] [-stub ] < -mapped | -paired | -reference [-refPrefix ] | -tag > "; + Options::SetProgramInfo(name, description, args); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputFilename, m_settings->InputFilename, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-stub", "filename stub", "prefix stub for output BAM files (default behavior is to use input filename, without .bam extension, as stub). If input is stdin and no stub provided, a timestamp is generated as the stub.", "", m_settings->HasCustomOutputStub, m_settings->CustomOutputStub, IO_Opts); + Options::AddValueOption("-refPrefix", "string", "custom prefix for splitting by references. Currently files end with REF_.bam. This option allows you to replace \"REF_\" with a prefix of your choosing.", "", + m_settings->HasCustomRefPrefix, m_settings->CustomRefPrefix, IO_Opts); + Options::AddValueOption("-stub", "filename stub", "prefix stub for output BAM files (default behavior is to use input filename, without .bam extension, as stub). If input is stdin and no stub provided, a timestamp is generated as the stub.", "", + m_settings->HasCustomOutputStub, m_settings->CustomOutputStub, IO_Opts); OptionGroup* SplitOpts = Options::CreateOptionGroup("Split Options"); Options::AddOption("-mapped", "split mapped/unmapped alignments", m_settings->IsSplittingMapped, SplitOpts); @@ -541,10 +572,10 @@ int SplitTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); - // initialize internal implementation + // initialize SplitTool with settings m_impl = new SplitToolPrivate(m_settings); - // run tool, return success/fail + // run SplitTool, return success/fail if ( m_impl->Run() ) return 0; else