// bamtools_split.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011 (DB)
+// Last modified: 8 December 2011 (DB)
// ---------------------------------------------------------------------------
// Splits a BAM file on user-specified property, creating a new BAM output
// file for each value found
// flags
bool HasInputFilename;
bool HasCustomOutputStub;
+ bool HasCustomRefPrefix;
bool IsSplittingMapped;
bool IsSplittingPaired;
bool IsSplittingReference;
// string args
string CustomOutputStub;
+ string CustomRefPrefix;
string InputFilename;
string TagToSplit;
SplitSettings(void)
: HasInputFilename(false)
, HasCustomOutputStub(false)
+ , HasCustomRefPrefix(false)
, IsSplittingMapped(false)
, IsSplittingPaired(false)
, IsSplittingReference(false)
, IsSplittingTag(false)
, CustomOutputStub("")
+ , CustomRefPrefix("")
, InputFilename(Options::StandardIn())
, TagToSplit("")
{ }
map<int32_t, BamWriter*> outputFiles;
map<int32_t, BamWriter*>::iterator writerIter;
+ // determine reference prefix
+ string refPrefix = SPLIT_REFERENCE_TOKEN;
+ if ( m_settings->HasCustomRefPrefix )
+ refPrefix = m_settings->CustomRefPrefix;
+
+ // make sure prefix starts with '.'
+ const size_t dotFound = refPrefix.find('.');
+ if ( dotFound != 0 )
+ refPrefix = string(".") + refPrefix;
+
// iterate through alignments
BamAlignment al;
BamWriter* writer;
// if no writer associated with this value
if ( writerIter == outputFiles.end() ) {
+ // fetch reference name for ID
+ string refName;
+ if ( currentRefId == -1 )
+ refName = "unmapped";
+ else
+ refName = m_references.at(currentRefId).RefName;
+
+ // construct new output filename
+ const string outputFilename = m_outputFilenameStub + refPrefix + refName + ".bam";
+
// open new BamWriter
- const string refName = m_references.at(currentRefId).RefName;
- const string outputFilename = m_outputFilenameStub + SPLIT_REFERENCE_TOKEN + refName + ".bam";
writer = new BamWriter;
if ( !writer->Open(outputFilename, m_header, m_references) ) {
cerr << "bamtools split ERROR: could not open " << outputFilename
case (Constants::BAM_TAG_TYPE_STRING) :
case (Constants::BAM_TAG_TYPE_HEX) :
return SplitTagImpl<string>(al);
+
+ case (Constants::BAM_TAG_TYPE_ARRAY) :
+ cerr << "bamtools split ERROR: array tag types are not supported" << endl;
+ return false;
default:
- fprintf(stderr, "bamtools split ERROR: unknown tag type encountered: [%c]\n", tagType);
+ cerr << "bamtools split ERROR: unknown tag type encountered: " << tagType << endl;
return false;
}
}
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools split", "splits a BAM file on user-specified property, creating a new BAM output file for each value found", "[-in <filename>] [-stub <filename stub>] < -mapped | -paired | -reference | -tag <TAG> > ");
+ const string name = "bamtools split";
+ const string description = "splits a BAM file on user-specified property, creating a new BAM output file for each value found";
+ const string args = "[-in <filename>] [-stub <filename stub>] < -mapped | -paired | -reference [-refPrefix <prefix>] | -tag <TAG> > ";
+ Options::SetProgramInfo(name, description, args);
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputFilename, m_settings->InputFilename, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-stub", "filename stub", "prefix stub for output BAM files (default behavior is to use input filename, without .bam extension, as stub). If input is stdin and no stub provided, a timestamp is generated as the stub.", "", m_settings->HasCustomOutputStub, m_settings->CustomOutputStub, IO_Opts);
+ Options::AddValueOption("-refPrefix", "string", "custom prefix for splitting by references. Currently files end with REF_<refName>.bam. This option allows you to replace \"REF_\" with a prefix of your choosing.", "",
+ m_settings->HasCustomRefPrefix, m_settings->CustomRefPrefix, IO_Opts);
+ Options::AddValueOption("-stub", "filename stub", "prefix stub for output BAM files (default behavior is to use input filename, without .bam extension, as stub). If input is stdin and no stub provided, a timestamp is generated as the stub.", "",
+ m_settings->HasCustomOutputStub, m_settings->CustomOutputStub, IO_Opts);
OptionGroup* SplitOpts = Options::CreateOptionGroup("Split Options");
Options::AddOption("-mapped", "split mapped/unmapped alignments", m_settings->IsSplittingMapped, SplitOpts);