// ***************************************************************************
// bamtools_split.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 21 March 2011 (DB)
+// Last modified: 8 December 2011 (DB)
// ---------------------------------------------------------------------------
// Splits a BAM file on user-specified property, creating a new BAM output
-// file for each value found.
+// file for each value found
// ***************************************************************************
#include "bamtools_split.h"
// flags
bool HasInputFilename;
bool HasCustomOutputStub;
+ bool HasCustomRefPrefix;
bool IsSplittingMapped;
bool IsSplittingPaired;
bool IsSplittingReference;
// string args
string CustomOutputStub;
+ string CustomRefPrefix;
string InputFilename;
string TagToSplit;
SplitSettings(void)
: HasInputFilename(false)
, HasCustomOutputStub(false)
+ , HasCustomRefPrefix(false)
, IsSplittingMapped(false)
, IsSplittingPaired(false)
, IsSplittingReference(false)
, IsSplittingTag(false)
, CustomOutputStub("")
+ , CustomRefPrefix("")
, InputFilename(Options::StandardIn())
, TagToSplit("")
{ }
// ctor & dtor
public:
- SplitToolPrivate(SplitTool::SplitSettings* settings);
- ~SplitToolPrivate(void);
+ SplitToolPrivate(SplitTool::SplitSettings* settings)
+ : m_settings(settings)
+ { }
+
+ ~SplitToolPrivate(void) {
+ m_reader.Close();
+ }
// 'public' interface
public:
RefVector m_references;
};
-// constructor
-SplitTool::SplitToolPrivate::SplitToolPrivate(SplitTool::SplitSettings* settings)
- : m_settings(settings)
-{ }
-
-// destructor
-SplitTool::SplitToolPrivate::~SplitToolPrivate(void) {
- m_reader.Close();
-}
-
void SplitTool::SplitToolPrivate::DetermineOutputFilenameStub(void) {
// if user supplied output filename stub, use that
map<int32_t, BamWriter*> outputFiles;
map<int32_t, BamWriter*>::iterator writerIter;
+ // determine reference prefix
+ string refPrefix = SPLIT_REFERENCE_TOKEN;
+ if ( m_settings->HasCustomRefPrefix )
+ refPrefix = m_settings->CustomRefPrefix;
+
+ // make sure prefix starts with '.'
+ const size_t dotFound = refPrefix.find('.');
+ if ( dotFound != 0 )
+ refPrefix = string(".") + refPrefix;
+
// iterate through alignments
BamAlignment al;
BamWriter* writer;
// if no writer associated with this value
if ( writerIter == outputFiles.end() ) {
+ // fetch reference name for ID
+ string refName;
+ if ( currentRefId == -1 )
+ refName = "unmapped";
+ else
+ refName = m_references.at(currentRefId).RefName;
+
+ // construct new output filename
+ const string outputFilename = m_outputFilenameStub + refPrefix + refName + ".bam";
+
// open new BamWriter
- const string refName = m_references.at(currentRefId).RefName;
- const string outputFilename = m_outputFilenameStub + SPLIT_REFERENCE_TOKEN + refName + ".bam";
writer = new BamWriter;
if ( !writer->Open(outputFilename, m_header, m_references) ) {
cerr << "bamtools split ERROR: could not open " << outputFilename
case (Constants::BAM_TAG_TYPE_STRING) :
case (Constants::BAM_TAG_TYPE_HEX) :
return SplitTagImpl<string>(al);
+
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
+ cerr << "bamtools split ERROR: array tag types are not supported" << endl;
+ return false;
default:
- fprintf(stderr, "bamtools split ERROR: unknown tag type encountered: [%c]\n", tagType);
+ cerr << "bamtools split ERROR: unknown tag type encountered: " << tagType << endl;
return false;
}
}
// --------------------------------------------------------------------------------
// template method implementation
-// N.B. - *technical note* - use of template methods defined in ".cpp" goes against normal practices
-// but works here because these are purely internal (no one can call from outside this file)
+// *Technical Note* - use of template methods declared & defined in ".cpp" file
+// goes against normal practices, but works here because these
+// are purely internal (no one can call from outside this file)
// close BamWriters & delete pointers
template<typename T>
WriterMapIterator writerEnd = writers.end();
for ( ; writerIter != writerEnd; ++writerIter ) {
BamWriter* writer = (*writerIter).second;
- if (writer == 0 ) continue;
+ if ( writer == 0 ) continue;
- // close & delete writer
+ // close BamWriter
writer->Close();
+
+ // destroy BamWriter
delete writer;
writer = 0;
}
+
+ // clear the container (destroying the items doesn't remove them)
writers.clear();
}
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools split", "splits a BAM file on user-specified property, creating a new BAM output file for each value found", "[-in <filename>] [-stub <filename stub>] < -mapped | -paired | -reference | -tag <TAG> > ");
+ const string name = "bamtools split";
+ const string description = "splits a BAM file on user-specified property, creating a new BAM output file for each value found";
+ const string args = "[-in <filename>] [-stub <filename stub>] < -mapped | -paired | -reference [-refPrefix <prefix>] | -tag <TAG> > ";
+ Options::SetProgramInfo(name, description, args);
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputFilename, m_settings->InputFilename, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-stub", "filename stub", "prefix stub for output BAM files (default behavior is to use input filename, without .bam extension, as stub). If input is stdin and no stub provided, a timestamp is generated as the stub.", "", m_settings->HasCustomOutputStub, m_settings->CustomOutputStub, IO_Opts);
+ Options::AddValueOption("-refPrefix", "string", "custom prefix for splitting by references. Currently files end with REF_<refName>.bam. This option allows you to replace \"REF_\" with a prefix of your choosing.", "",
+ m_settings->HasCustomRefPrefix, m_settings->CustomRefPrefix, IO_Opts);
+ Options::AddValueOption("-stub", "filename stub", "prefix stub for output BAM files (default behavior is to use input filename, without .bam extension, as stub). If input is stdin and no stub provided, a timestamp is generated as the stub.", "",
+ m_settings->HasCustomOutputStub, m_settings->CustomOutputStub, IO_Opts);
OptionGroup* SplitOpts = Options::CreateOptionGroup("Split Options");
Options::AddOption("-mapped", "split mapped/unmapped alignments", m_settings->IsSplittingMapped, SplitOpts);
// parse command line arguments
Options::Parse(argc, argv, 1);
- // initialize internal implementation
+ // initialize SplitTool with settings
m_impl = new SplitToolPrivate(m_settings);
- // run tool, return success/fail
+ // run SplitTool, return success/fail
if ( m_impl->Run() )
return 0;
else