// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 28 June 2011
+// Last modified: 6 July 2011
// ---------------------------------------------------------------------------
// Resolves paired-end reads (marking the IsProperPair flag as needed).
// ***************************************************************************
// general ResolveTool constants
// --------------------------------------------------------------------------
-static const int NUM_MODELS = 8;
-static const string READ_GROUP_TAG = "RG";
-static const double DEFAULT_CONFIDENCE_INTERVAL = 0.9973;
-static const double DEFAULT_UNUSEDMODEL_THRESHOLD = 0.1;
+static const int NUM_MODELS = 8;
+static const string READ_GROUP_TAG = "RG";
+static const double DEFAULT_CONFIDENCE_INTERVAL = 0.9973;
+static const uint16_t DEFAULT_MIN_MAPQUALITY = 1;
+static const double DEFAULT_UNUSEDMODEL_THRESHOLD = 0.1;
// --------------------------------------------------------------------------
// stats file constants
// option keywords
static const string OPTION_CONFIDENCEINTERVAL = "ConfidenceInterval";
+static const string OPTION_MINIMUMMAPQUALITY = "MinimumMapQuality";
static const string OPTION_UNUSEDMODELTHRESHOLD = "UnusedModelThreshold";
static const string OPTION_FORCEMARKREADGROUPS = "ForceMarkReadGroups";
struct ResolveTool::ResolveSettings {
- // flags
- bool HasInputBamFile;
- bool HasOutputBamFile;
- bool IsForceCompression;
- bool HasStatsFile;
+ // modes
bool IsMakeStats;
bool IsMarkPairs;
bool IsTwoPass;
+
+ // I/O flags
+ bool HasInputBamFile;
+ bool HasOutputBamFile;
+ bool HasStatsFile;
+ bool IsForceCompression;
+
+ // resolve option flags
bool HasConfidenceInterval;
+ bool HasForceMarkReadGroups;
+ bool HasMinimumMapQuality;
bool HasUnusedModelThreshold;
- // filenames
+ // I/O filenames
string InputBamFilename;
string OutputBamFilename;
string StatsFilename;
string ReadNamesFilename; // ** N.B. - Only used internally, not set from cmdline **
- // 'resolve options'
- double ConfidenceInterval;
- double UnusedModelThreshold;
- bool HasForceMarkReadGroups;
+ // resolve options
+ double ConfidenceInterval;
+ uint16_t MinimumMapQuality;
+ double UnusedModelThreshold;
// constructor
ResolveSettings(void)
- : HasInputBamFile(false)
- , HasOutputBamFile(false)
- , IsForceCompression(false)
- , HasStatsFile(false)
- , IsMakeStats(false)
+ : IsMakeStats(false)
, IsMarkPairs(false)
, IsTwoPass(false)
+ , HasInputBamFile(false)
+ , HasOutputBamFile(false)
+ , HasStatsFile(false)
+ , IsForceCompression(false)
, HasConfidenceInterval(false)
+ , HasForceMarkReadGroups(false)
+ , HasMinimumMapQuality(false)
, HasUnusedModelThreshold(false)
, InputBamFilename(Options::StandardIn())
, OutputBamFilename(Options::StandardOut())
, StatsFilename("")
, ReadNamesFilename(DEFAULT_READNAME_FILE)
, ConfidenceInterval(DEFAULT_CONFIDENCE_INTERVAL)
+ , MinimumMapQuality(DEFAULT_MIN_MAPQUALITY)
, UnusedModelThreshold(DEFAULT_UNUSEDMODEL_THRESHOLD)
- , HasForceMarkReadGroups(false)
{ }
};
return true;
}
+ // ForceMarkReadGroups
+ if ( option == OPTION_FORCEMARKREADGROUPS ) {
+ value >> settings->HasForceMarkReadGroups;
+ return true;
+ }
+
+ // MinimumMapQuality
+ if ( option == OPTION_MINIMUMMAPQUALITY ) {
+ value >> settings->MinimumMapQuality;
+ settings->HasMinimumMapQuality = true;
+ return true;
+ }
+
// UnusedModelThreshold
if ( option == OPTION_UNUSEDMODELTHRESHOLD ) {
value >> settings->UnusedModelThreshold;
return true;
}
- // ForceMarkReadGroups
- if ( option == OPTION_FORCEMARKREADGROUPS ) {
- value >> settings->HasForceMarkReadGroups;
- return true;
- }
-
// otherwise unknown option
cerr << "bamtools resolve ERROR - unrecognized option: " << option << " in stats file" << endl;
return false;
// [Options]
// ConfidenceInterval=<double>
- // UnusedModelThreshold=<double>
// ForceMarkReadGroups=<true|false>
+ // MinimumMapQuality=<uint16_t>
+ // UnusedModelThreshold=<double>
// \n
m_stream << OPTIONS_TOKEN << endl
<< OPTION_CONFIDENCEINTERVAL << EQUAL_CHAR << settings->ConfidenceInterval << endl
- << OPTION_UNUSEDMODELTHRESHOLD << EQUAL_CHAR << settings->UnusedModelThreshold << endl
+ << OPTION_MINIMUMMAPQUALITY << EQUAL_CHAR << settings->MinimumMapQuality << endl
<< OPTION_FORCEMARKREADGROUPS << EQUAL_CHAR << boolalpha << settings->HasForceMarkReadGroups << endl
+ << OPTION_UNUSEDMODELTHRESHOLD << EQUAL_CHAR << settings->UnusedModelThreshold << endl
<< endl;
}
if ( m_settings->ConfidenceInterval < 0.0 || m_settings->ConfidenceInterval > 1.0 )
errors.push_back("Invalid confidence interval. Must be between 0 and 1");
}
+ if ( m_settings->HasMinimumMapQuality ) {
+ if ( m_settings->MinimumMapQuality >= 256 )
+ errors.push_back("Invalid minimum map quality. Must be between 0 and 255");
+ }
if ( m_settings->HasUnusedModelThreshold ) {
if ( m_settings->UnusedModelThreshold < 0.0 || m_settings->UnusedModelThreshold > 1.0 )
errors.push_back("Invalid unused model threshold. Must be between 0 and 1");
ReadGroupResolver& resolver = (*rgIter).second;
// determine unique-ness of current alignment
- const bool isCurrentMateUnique = ( al.MapQuality != 0 );
+ const bool isCurrentMateUnique = ( al.MapQuality >= m_settings->MinimumMapQuality );
// look up read name
readNameIter = resolver.ReadNames.find(al.Name);
// quit check if alignment & its mate are on differenct references
if ( al.RefID != al.MateRefID ) return;
- // quit check if map quality is 0
- if ( al.MapQuality == 0 ) return;
+ // quit check if map quality less than cutoff
+ if ( al.MapQuality < m_settings->MinimumMapQuality ) return;
// get read group from alignment
// empty string if not found, this is OK - we handle empty read group case
"All MakeStats & MarkPairs Mode Settings are available. "
"The intermediate stats file is not necessary, but if the -stats options is used, then one will be generated. "
"You may find this useful for documentation purposes.";
+ const string minMapQualDescription = "minimum map quality. Used in -makeStats mode as a heuristic for determining a mate's "
+ "uniqueness. Used in -markPairs mode as a filter for marking candidate proper pairs.";
const string confidenceIntervalDescription = "confidence interval. Set min/max fragment lengths such that we capture "
"this fraction of pairs";
const string unusedModelThresholdDescription = "unused model threshold. The resolve tool considers 8 possible orientation models "
Options::AddOption("-markPairs", markPairsDescription, m_settings->IsMarkPairs, ModeOpts);
Options::AddOption("-twoPass", twoPassDescription, m_settings->IsTwoPass, ModeOpts);
+ OptionGroup* GeneralOpts = Options::CreateOptionGroup("General Resolve Options (available in all modes)");
+ Options::AddValueOption("-minMQ", "unsigned short", minMapQualDescription, "",
+ m_settings->HasMinimumMapQuality, m_settings->MinimumMapQuality, GeneralOpts);
+
OptionGroup* MakeStatsOpts = Options::CreateOptionGroup("MakeStats Mode Options (disabled in -markPairs mode)");
Options::AddValueOption("-ci", "double", confidenceIntervalDescription, "",
m_settings->HasConfidenceInterval, m_settings->ConfidenceInterval, MakeStatsOpts);