// bamtools_random.cpp (c) 2010 Derek Barnett, Erik Garrison
// Marth Lab, Department of Biology, Boston College
// ---------------------------------------------------------------------------
-// Last modified: 7 April 2011 (DB)
+// Last modified: 24 July 2013 (DB)
// ---------------------------------------------------------------------------
// Grab a random subset of alignments (testing tool)
// ***************************************************************************
#include <ctime>
#include <cstdlib>
+#include <fstream>
#include <iostream>
#include <string>
#include <vector>
// flags
bool HasAlignmentCount;
bool HasInput;
+ bool HasInputFilelist;
bool HasOutput;
+ bool HasRandomNumberSeed;
bool HasRegion;
bool IsForceCompression;
// parameters
unsigned int AlignmentCount;
vector<string> InputFiles;
+ string InputFilelist;
string OutputFilename;
+ unsigned int RandomNumberSeed;
string Region;
// constructor
RandomSettings(void)
: HasAlignmentCount(false)
, HasInput(false)
+ , HasInputFilelist(false)
, HasOutput(false)
+ , HasRandomNumberSeed(false)
, HasRegion(false)
, IsForceCompression(false)
, AlignmentCount(RANDOM_MAX_ALIGNMENT_COUNT)
, OutputFilename(Options::StandardOut())
+ , RandomNumberSeed(0)
{ }
};
bool RandomTool::RandomToolPrivate::Run(void) {
// set to default stdin if no input files provided
- if ( !m_settings->HasInput )
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
m_settings->InputFiles.push_back(Options::StandardIn());
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
// open our reader
BamMultiReader reader;
if ( !reader.Open(m_settings->InputFiles) ) {
}
// seed our random number generator
- srand( time(NULL) );
+ if ( m_settings->HasRandomNumberSeed )
+ srand( m_settings->RandomNumberSeed );
+ else
+ srand( time(NULL) );
// grab random alignments
BamAlignment al;
, m_impl(0)
{
// set program details
- Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in <filename> -in <filename> ...] [-out <filename>] [-forceCompression] [-n] [-region <REGION>]");
+ Options::SetProgramInfo("bamtools random", "grab a random subset of alignments",
+ "[-in <filename> -in <filename> ... | -list <filelist>] [-out <filename>] [-forceCompression] [-n] [-region <REGION>]");
// set up options
OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
- Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
- Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
+ Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts);
+ Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+ Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
- Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
OptionGroup* SettingsOpts = Options::CreateOptionGroup("Settings");
- Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT);
+ Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "",
+ m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT);
+ Options::AddValueOption("-seed", "unsigned integer", "random number generator seed (for repeatable results). Current time is used if no seed value is provided.", "",
+ m_settings->HasRandomNumberSeed, m_settings->RandomNumberSeed, SettingsOpts);
}
RandomTool::~RandomTool(void) {