// ***************************************************************************
// bamtools_resolve.cpp (c) 2011
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 11 June 2011
+// Last modified: 24 July 2013 (DB)
// ---------------------------------------------------------------------------
// Resolves paired-end reads (marking the IsProperPair flag as needed).
// ***************************************************************************
#include <algorithm>
#include <cassert>
#include <cctype>
+#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
// general ResolveTool constants
// --------------------------------------------------------------------------
-static const int NUM_MODELS = 8;
-static const string READ_GROUP_TAG = "RG";
-static const double DEFAULT_CONFIDENCE_INTERVAL = 0.9973;
-static const double DEFAULT_UNUSEDMODEL_THRESHOLD = 0.1;
+static const int NUM_MODELS = 8;
+static const string READ_GROUP_TAG = "RG";
+static const double DEFAULT_CONFIDENCE_INTERVAL = 0.9973;
+static const uint16_t DEFAULT_MIN_MAPQUALITY = 1;
+static const double DEFAULT_UNUSEDMODEL_THRESHOLD = 0.1;
// --------------------------------------------------------------------------
// stats file constants
// option keywords
static const string OPTION_CONFIDENCEINTERVAL = "ConfidenceInterval";
+static const string OPTION_MINIMUMMAPQUALITY = "MinimumMapQuality";
static const string OPTION_UNUSEDMODELTHRESHOLD = "UnusedModelThreshold";
static const string OPTION_FORCEMARKREADGROUPS = "ForceMarkReadGroups";
+// other string constants
+static const string RG_FIELD_DESCRIPTION =
+ "#<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>";
+
+static const string MODEL_DESCRIPTION =
+ "# ------------- Model Types Description ---------------\n"
+ "#\n"
+ "# ID Position Orientation \n"
+ "# 1 mate1 < mate2 mate1:forward, mate2:forward \n"
+ "# 2 mate1 < mate2 mate1:forward, mate2:reverse \n"
+ "# 3 mate1 < mate2 mate1:reverse, mate2:forward \n"
+ "# 4 mate1 < mate2 mate1:reverse, mate2:reverse \n"
+ "# 5 mate2 < mate1 mate2:forward, mate1:forward \n"
+ "# 6 mate2 < mate1 mate2:forward, mate1:reverse \n"
+ "# 7 mate2 < mate1 mate2:reverse, mate1:forward \n"
+ "# 8 mate2 < mate1 mate2:reverse, mate1:reverse \n"
+ "# -----------------------------------------------------\n";
+
+// --------------------------------------------------------------------------
+// unique readname file constants
+// --------------------------------------------------------------------------
+
+static const string READNAME_FILE_SUFFIX = ".uniq_names.txt";
+static const string DEFAULT_READNAME_FILE = "bt_resolve_TEMP" + READNAME_FILE_SUFFIX;
+
// --------------------------------------------------------------------------
// ModelType implementation
// data members
uint16_t ID;
- vector<uint32_t> FragmentLengths;
+ vector<int32_t> FragmentLengths;
// ctor
ModelType(const uint16_t id)
}
// convenience access to internal fragment lengths vector
- vector<uint32_t>::iterator begin(void) { return FragmentLengths.begin(); }
- vector<uint32_t>::const_iterator begin(void) const { return FragmentLengths.begin(); }
+ vector<int32_t>::iterator begin(void) { return FragmentLengths.begin(); }
+ vector<int32_t>::const_iterator begin(void) const { return FragmentLengths.begin(); }
void clear(void) { FragmentLengths.clear(); }
- vector<uint32_t>::iterator end(void) { return FragmentLengths.end(); }
- vector<uint32_t>::const_iterator end(void) const { return FragmentLengths.end(); }
- void push_back(const uint32_t& x) { FragmentLengths.push_back(x); }
+ vector<int32_t>::iterator end(void) { return FragmentLengths.end(); }
+ vector<int32_t>::const_iterator end(void) const { return FragmentLengths.end(); }
+ void push_back(const int32_t& x) { FragmentLengths.push_back(x); }
size_t size(void) const { return FragmentLengths.size(); }
// constants
// determine 'model type'
if ( m1_begin < m2_begin ) {
- if ( !m1_isReverseStrand && !m2_isReverseStrand ) return 0;
- if ( !m1_isReverseStrand && m2_isReverseStrand ) return 1;
- if ( m1_isReverseStrand && !m2_isReverseStrand ) return 2;
- if ( m1_isReverseStrand && m2_isReverseStrand ) return 3;
+ if ( !m1_isReverseStrand && !m2_isReverseStrand ) return 0; // ID: 1
+ if ( !m1_isReverseStrand && m2_isReverseStrand ) return 1; // ID: 2
+ if ( m1_isReverseStrand && !m2_isReverseStrand ) return 2; // ID: 3
+ if ( m1_isReverseStrand && m2_isReverseStrand ) return 3; // ID: 4
} else {
- if ( !m2_isReverseStrand && !m1_isReverseStrand ) return 4;
- if ( !m2_isReverseStrand && m1_isReverseStrand ) return 5;
- if ( m2_isReverseStrand && !m1_isReverseStrand ) return 6;
- if ( m2_isReverseStrand && m1_isReverseStrand ) return 7;
+ if ( !m2_isReverseStrand && !m1_isReverseStrand ) return 4; // ID: 5
+ if ( !m2_isReverseStrand && m1_isReverseStrand ) return 5; // ID: 6
+ if ( m2_isReverseStrand && !m1_isReverseStrand ) return 6; // ID: 7
+ if ( m2_isReverseStrand && m1_isReverseStrand ) return 7; // ID: 8
}
// unknown model
bool IsAmbiguous;
bool HasData;
vector<ModelType> Models;
+ map<string, bool> ReadNames;
// ctor
ReadGroupResolver(void);
Models.push_back( ModelType(i+1) );
}
-bool ReadGroupResolver::IsValidInsertSize(const BamAlignment& al) const {
- return ( al.InsertSize >= MinFragmentLength &&
- al.InsertSize <= MaxFragmentLength );
+bool ReadGroupResolver::IsValidInsertSize(const BamAlignment& al) const {
+ const int32_t absInsertSize = abs(al.InsertSize);
+ return ( absInsertSize >= MinFragmentLength &&
+ absInsertSize <= MaxFragmentLength );
}
bool ReadGroupResolver::IsValidOrientation(const BamAlignment& al) const {
- const uint16_t currentModel = CalculateModelType(al);
- return ( currentModel == TopModelId || currentModel == NextTopModelId );
+ const uint16_t currentModelId = CalculateModelType(al) + 1; // convert model type (array index) to ID number
+ return ( currentModelId == TopModelId || currentModelId == NextTopModelId );
}
void ReadGroupResolver::DetermineTopModels(const string& readGroupName) {
- // sort models (most common -> least common)
+ // sort models (from most common to least common)
sort( Models.begin(), Models.end(), std::greater<ModelType>() );
// store top 2 models for later
- TopModelId = Models[0].ID;
+ TopModelId = Models[0].ID;
NextTopModelId = Models[1].ID;
// make sure that the 2 most common models are some threshold more common
}
// store only the fragments from the best alignment models, then sort
- vector<uint32_t> fragments;
+ vector<int32_t> fragments;
fragments.reserve( Models[0].size() + Models[1].size() );
fragments.insert( fragments.end(), Models[0].begin(), Models[0].end() );
fragments.insert( fragments.end(), Models[1].begin(), Models[1].end() );
// clear out Model fragment data, not needed anymore
Models.clear();
- // determine min,median, & max fragment lengths for each read group
- const double halfNonConfidenceInterval = (1.0 - ReadGroupResolver::ConfidenceInterval)/2.0;
- const unsigned int numFragmentLengths = fragments.size();
- if ( numFragmentLengths == 0 ) return;
+ // skip if no fragments found for this read group
+ if ( fragments.empty() ) {
+ HasData = false;
+ return;
+ } else
+ HasData = true;
+ // calculate & store the min,median, & max fragment lengths
+ const unsigned int numFragmentLengths = fragments.size();
+ const double halfNonConfidenceInterval = (1.0 - ReadGroupResolver::ConfidenceInterval)/2.0;
const unsigned int minIndex = (unsigned int)(numFragmentLengths * halfNonConfidenceInterval);
const unsigned int medianIndex = (unsigned int)(numFragmentLengths * 0.5);
const unsigned int maxIndex = (unsigned int)(numFragmentLengths * (1.0-halfNonConfidenceInterval));
MinFragmentLength = fragments[minIndex];
MedianFragmentLength = fragments[medianIndex];
MaxFragmentLength = fragments[maxIndex];
- HasData = true;
}
void ReadGroupResolver::SetConfidenceInterval(const double& ci) {
struct ResolveTool::ResolveSettings {
- // flags
- bool HasInputBamFile;
- bool HasOutputBamFile;
- bool IsForceCompression;
- bool HasStatsFile;
+ // modes
bool IsMakeStats;
bool IsMarkPairs;
bool IsTwoPass;
+
+ // I/O flags
+ bool HasInputBamFile;
+ bool HasOutputBamFile;
+ bool HasStatsFile;
+ bool IsForceCompression;
+
+ // resolve option flags
bool HasConfidenceInterval;
+ bool HasForceMarkReadGroups;
+ bool HasMinimumMapQuality;
bool HasUnusedModelThreshold;
- // filenames
+ // I/O filenames
string InputBamFilename;
string OutputBamFilename;
string StatsFilename;
+ string ReadNamesFilename; // ** N.B. - Only used internally, not set from cmdline **
- // 'resolve options'
- double ConfidenceInterval;
- double UnusedModelThreshold;
- bool HasForceMarkReadGroups;
+ // resolve options
+ double ConfidenceInterval;
+ uint16_t MinimumMapQuality;
+ double UnusedModelThreshold;
// constructor
ResolveSettings(void)
- : HasInputBamFile(false)
- , HasOutputBamFile(false)
- , IsForceCompression(false)
- , HasStatsFile(false)
- , IsMakeStats(false)
+ : IsMakeStats(false)
, IsMarkPairs(false)
, IsTwoPass(false)
+ , HasInputBamFile(false)
+ , HasOutputBamFile(false)
+ , HasStatsFile(false)
+ , IsForceCompression(false)
, HasConfidenceInterval(false)
+ , HasForceMarkReadGroups(false)
+ , HasMinimumMapQuality(false)
, HasUnusedModelThreshold(false)
, InputBamFilename(Options::StandardIn())
, OutputBamFilename(Options::StandardOut())
, StatsFilename("")
+ , ReadNamesFilename(DEFAULT_READNAME_FILE)
, ConfidenceInterval(DEFAULT_CONFIDENCE_INTERVAL)
+ , MinimumMapQuality(DEFAULT_MIN_MAPQUALITY)
, UnusedModelThreshold(DEFAULT_UNUSEDMODEL_THRESHOLD)
- , HasForceMarkReadGroups(false)
{ }
};
+// --------------------------------------------------------------------------
+// ReadNamesFileReader implementation
+
+struct ResolveTool::ReadNamesFileReader {
+
+ // ctor & dtor
+ ReadNamesFileReader(void) { }
+ ~ReadNamesFileReader(void) { Close(); }
+
+ // main reader interface
+ public:
+ void Close(void);
+ bool Open(const string& filename);
+ bool Read(map<string, ReadGroupResolver>& readGroups);
+
+ // data members
+ private:
+ ifstream m_stream;
+};
+
+void ResolveTool::ReadNamesFileReader::Close(void) {
+ if ( m_stream.is_open() )
+ m_stream.close();
+}
+
+bool ResolveTool::ReadNamesFileReader::Open(const string& filename) {
+
+ // make sure stream is fresh
+ Close();
+
+ // attempt to open filename, return status
+ m_stream.open(filename.c_str(), ifstream::in);
+ return m_stream.good();
+}
+
+bool ResolveTool::ReadNamesFileReader::Read(map<string, ReadGroupResolver>& readGroups) {
+
+ // up-front sanity check
+ if ( !m_stream.is_open() ) return false;
+
+ // parse read names file
+ string line;
+ vector<string> fields;
+ map<string, ReadGroupResolver>::iterator rgIter;
+ map<string, ReadGroupResolver>::iterator rgEnd = readGroups.end();
+ while ( getline(m_stream, line) ) {
+
+ // skip if empty line
+ if ( line.empty() ) continue;
+
+ // split line on '\t'
+ fields = Utilities::Split(line, TAB_CHAR);
+ if ( fields.size() != 2 ) continue;
+
+ // look up resolver for read group
+ rgIter = readGroups.find( fields[0] );
+ if ( rgIter == rgEnd ) return false;
+ ReadGroupResolver& resolver = (*rgIter).second;
+
+ // store read name with resolver
+ resolver.ReadNames.insert( make_pair<string,bool>(fields[1], true) ) ;
+ }
+
+ // if here, return success
+ return true;
+}
+
+// --------------------------------------------------------------------------
+// ReadNamesFileWriter implementation
+
+struct ResolveTool::ReadNamesFileWriter {
+
+ // ctor & dtor
+ ReadNamesFileWriter(void) { }
+ ~ReadNamesFileWriter(void) { Close(); }
+
+ // main reader interface
+ public:
+ void Close(void);
+ bool Open(const string& filename);
+ void Write(const string& readGroupName, const string& readName);
+
+ // data members
+ private:
+ ofstream m_stream;
+};
+
+void ResolveTool::ReadNamesFileWriter::Close(void) {
+ if ( m_stream.is_open() )
+ m_stream.close();
+}
+
+bool ResolveTool::ReadNamesFileWriter::Open(const string& filename) {
+
+ // make sure stream is fresh
+ Close();
+
+ // attempt to open filename, return status
+ m_stream.open(filename.c_str(), ofstream::out);
+ return m_stream.good();
+}
+
+void ResolveTool::ReadNamesFileWriter::Write(const string& readGroupName,
+ const string& readName)
+{
+ m_stream << readGroupName << TAB_CHAR << readName << endl;
+}
+
// --------------------------------------------------------------------------
// StatsFileReader implementation
// main reader interface
public:
void Close(void);
- bool Open(const std::string& filename);
+ bool Open(const string& filename);
bool Read(ResolveTool::ResolveSettings* settings,
map<string, ReadGroupResolver>& readGroups);
return true;
}
+ // ForceMarkReadGroups
+ if ( option == OPTION_FORCEMARKREADGROUPS ) {
+ value >> settings->HasForceMarkReadGroups;
+ return true;
+ }
+
+ // MinimumMapQuality
+ if ( option == OPTION_MINIMUMMAPQUALITY ) {
+ value >> settings->MinimumMapQuality;
+ settings->HasMinimumMapQuality = true;
+ return true;
+ }
+
// UnusedModelThreshold
if ( option == OPTION_UNUSEDMODELTHRESHOLD ) {
value >> settings->UnusedModelThreshold;
return true;
}
- // ForceMarkReadGroups
- if ( option == OPTION_FORCEMARKREADGROUPS ) {
- value >> settings->HasForceMarkReadGroups;
- return true;
- }
-
// otherwise unknown option
cerr << "bamtools resolve ERROR - unrecognized option: " << option << " in stats file" << endl;
return false;
// main reader interface
public:
void Close(void);
- bool Open(const std::string& filename);
+ bool Open(const string& filename);
bool Write(ResolveTool::ResolveSettings* settings,
const map<string, ReadGroupResolver>& readGroups);
<< BAMTOOLS_VERSION_BUILD;
// # bamtools resolve (vX.Y.Z)
+ // #
+ // # MODEL DESCRIPTION - see above for actual text
// \n
m_stream << COMMENT_CHAR << " bamtools resolve (" << versionStream.str() << ")" << endl
+ << COMMENT_CHAR << endl
+ << MODEL_DESCRIPTION
<< endl;
}
// [Options]
// ConfidenceInterval=<double>
- // UnusedModelThreshold=<double>
// ForceMarkReadGroups=<true|false>
+ // MinimumMapQuality=<uint16_t>
+ // UnusedModelThreshold=<double>
// \n
m_stream << OPTIONS_TOKEN << endl
<< OPTION_CONFIDENCEINTERVAL << EQUAL_CHAR << settings->ConfidenceInterval << endl
- << OPTION_UNUSEDMODELTHRESHOLD << EQUAL_CHAR << settings->UnusedModelThreshold << endl
<< OPTION_FORCEMARKREADGROUPS << EQUAL_CHAR << boolalpha << settings->HasForceMarkReadGroups << endl
+ << OPTION_MINIMUMMAPQUALITY << EQUAL_CHAR << settings->MinimumMapQuality << endl
+ << OPTION_UNUSEDMODELTHRESHOLD << EQUAL_CHAR << settings->UnusedModelThreshold << endl
<< endl;
}
void ResolveTool::StatsFileWriter::WriteReadGroups(const map<string, ReadGroupResolver>& readGroups) {
// [ReadGroups]
- m_stream << READGROUPS_TOKEN << endl;
+ // #<name> <medianFL> <minFL> <maxFL> <topModelID> <nextTopModelID> <isAmbiguous?>
+ m_stream << READGROUPS_TOKEN << endl
+ << RG_FIELD_DESCRIPTION << endl;
// iterate over read groups
map<string, ReadGroupResolver>::const_iterator rgIter = readGroups.begin();
continue;
// write read group data
- // <name> <medianFragmentLength> <minFragmentLength> <maxFragmentLength> <topModelId> <nextTopModelId> <isAmbiguous?>
m_stream << name << TAB_CHAR
<< resolver.MedianFragmentLength << TAB_CHAR
<< resolver.MinFragmentLength << TAB_CHAR
errors.push_back("Cannot use -forceMarkReadGroups. -markPairs options are DISABLED in -makeStats mode.");
}
- // if UseStats mode
+ // if MarkPairs mode
else if ( m_settings->IsMarkPairs ) {
// ensure mutex mode
if ( m_settings->ConfidenceInterval < 0.0 || m_settings->ConfidenceInterval > 1.0 )
errors.push_back("Invalid confidence interval. Must be between 0 and 1");
}
+ if ( m_settings->HasMinimumMapQuality ) {
+ if ( m_settings->MinimumMapQuality >= 256 )
+ errors.push_back("Invalid minimum map quality. Must be between 0 and 255");
+ }
if ( m_settings->HasUnusedModelThreshold ) {
if ( m_settings->UnusedModelThreshold < 0.0 || m_settings->UnusedModelThreshold > 1.0 )
errors.push_back("Invalid unused model threshold. Must be between 0 and 1");
ReadGroupResolver::SetUnusedModelThreshold(m_settings->UnusedModelThreshold);
// open our BAM reader
- BamReader reader;
- if ( !reader.Open(m_settings->InputBamFilename) ) {
+ BamReader bamReader;
+ if ( !bamReader.Open(m_settings->InputBamFilename) ) {
cerr << "bamtools resolve ERROR: could not open input BAM file: "
<< m_settings->InputBamFilename << endl;
return false;
}
- // retrieve header & reference dictionary info
- const SamHeader& header = reader.GetHeader();
-
- // parse BAM header for read groups
+ // retrieve header & parse for read groups
+ const SamHeader& header = bamReader.GetHeader();
ParseHeader(header);
+ // open ReadNamesFileWriter
+ ResolveTool::ReadNamesFileWriter readNamesWriter;
+ if ( !readNamesWriter.Open(m_settings->ReadNamesFilename) ) {
+ cerr << "bamtools resolve ERROR: could not open (temp) output read names file: "
+ << m_settings->ReadNamesFilename << endl;
+ bamReader.Close();
+ return false;
+ }
+
// read through BAM file
BamAlignment al;
string readGroup("");
map<string, ReadGroupResolver>::iterator rgIter;
- while ( reader.GetNextAlignmentCore(al) ) {
+ map<string, bool>::iterator readNameIter;
+ while ( bamReader.GetNextAlignmentCore(al) ) {
// skip if alignment is not paired, mapped, nor mate is mapped
if ( !al.IsPaired() || !al.IsMapped() || !al.IsMateMapped() )
continue;
- // determine model type, skip if model unknown
- const uint16_t currentModelType = CalculateModelType(al);
- assert( currentModelType != ModelType::DUMMY_ID );
+ // skip if alignment & mate not on same reference sequence
+ if ( al.RefID != al.MateRefID ) continue;
// flesh out the char data, so we can retrieve its read group ID
al.BuildCharData();
if ( rgIter == m_readGroups.end() ) {
cerr << "bamtools resolve ERROR - unable to calculate stats, unknown read group encountered: "
<< readGroup << endl;
- reader.Close();
+ bamReader.Close();
return false;
}
ReadGroupResolver& resolver = (*rgIter).second;
- // update stats for current read group, current model type
- resolver.Models[currentModelType].push_back(al.InsertSize);
+ // determine unique-ness of current alignment
+ const bool isCurrentMateUnique = ( al.MapQuality >= m_settings->MinimumMapQuality );
+
+ // look up read name
+ readNameIter = resolver.ReadNames.find(al.Name);
+
+ // if read name found (current alignment's mate already parsed)
+ if ( readNameIter != resolver.ReadNames.end() ) {
+
+ // if both unique mates are unique, store read name & insert size for later
+ const bool isStoredMateUnique = (*readNameIter).second;
+ if ( isCurrentMateUnique && isStoredMateUnique ) {
+
+ // save read name in temp file as candidates for later pair marking
+ readNamesWriter.Write(readGroup, al.Name);
+
+ // determine model type & store fragment length for stats calculation
+ const uint16_t currentModelType = CalculateModelType(al);
+ assert( currentModelType != ModelType::DUMMY_ID );
+ resolver.Models[currentModelType].push_back( abs(al.InsertSize) );
+ }
+
+ // unique or not, remove read name from map
+ resolver.ReadNames.erase(readNameIter);
+ }
+
+ // if read name not found, store new entry
+ else resolver.ReadNames.insert( make_pair<string, bool>(al.Name, isCurrentMateUnique) );
}
+ // close files
+ readNamesWriter.Close();
+ bamReader.Close();
+
// iterate back through read groups
map<string, ReadGroupResolver>::iterator rgEnd = m_readGroups.end();
for ( rgIter = m_readGroups.begin(); rgIter != rgEnd; ++rgIter ) {
// calculate acceptable orientation & insert sizes for this read group
resolver.DetermineTopModels(name);
+
+ // clear out left over read names
+ // (these have mates that did not pass filters or were already removed as non-unique)
+ resolver.ReadNames.clear();
}
- // close reader & return success
- reader.Close();
+ // if we get here, return success
return true;
}
// quit check if either alignment or its mate are unmapped
if ( !al.IsMapped() || !al.IsMateMapped() ) return;
- // quit check if map quality is 0
- if ( al.MapQuality == 0 ) return;
+ // quit check if alignment & its mate are on differenct references
+ if ( al.RefID != al.MateRefID ) return;
+
+ // quit check if map quality less than cutoff
+ if ( al.MapQuality < m_settings->MinimumMapQuality ) return;
// get read group from alignment
// empty string if not found, this is OK - we handle empty read group case
// quit check if pairs are not within "reasonable" distance (can differ for each RG)
if ( !resolver.IsValidInsertSize(al) ) return;
+ // quit check if alignment is not a "candidate proper pair"
+ map<string, bool>::const_iterator readNameIter;
+ readNameIter = resolver.ReadNames.find(al.Name);
+ if ( readNameIter == resolver.ReadNames.end() )
+ return;
+
// if we get here, alignment is OK - set 'proper pair' flag
al.SetIsProperPair(true);
}
bool ResolveTool::ResolveToolPrivate::ResolvePairs(void) {
+ // open file containing read names of candidate proper pairs
+ ResolveTool::ReadNamesFileReader readNamesReader;
+ if ( !readNamesReader.Open(m_settings->ReadNamesFilename) ) {
+ cerr << "bamtools resolve ERROR: could not open (temp) inputput read names file: "
+ << m_settings->ReadNamesFilename << endl;
+ return false;
+ }
+
+ // parse read names (matching with corresponding read groups)
+ if ( !readNamesReader.Read(m_readGroups) ) {
+ cerr << "bamtools resolve ERROR: could not read candidate read names from file: "
+ << m_settings->ReadNamesFilename << endl;
+ readNamesReader.Close();
+ return false;
+ }
+
+ // close read name file reader & delete temp file
+ readNamesReader.Close();
+ if ( remove(m_settings->ReadNamesFilename.c_str()) != 0 ) {
+ cerr << "bamtools resolve WARNING: could not delete temp file: "
+ << m_settings->ReadNamesFilename << endl;
+ }
+
// open our BAM reader
BamReader reader;
if ( !reader.Open(m_settings->InputBamFilename) ) {
// initialize read group map with default (empty name) read group
m_readGroups.insert( make_pair<string, ReadGroupResolver>("", ReadGroupResolver()) );
+ // init readname filename
+ // uses (adjusted) stats filename if provided (req'd for makeStats, markPairs modes; optional for twoPass)
+ // else keep default filename
+ if ( m_settings->HasStatsFile )
+ m_settings->ReadNamesFilename = m_settings->StatsFilename + READNAME_FILE_SUFFIX;
+
// -makeStats mode
if ( m_settings->IsMakeStats ) {
"All MakeStats & MarkPairs Mode Settings are available. "
"The intermediate stats file is not necessary, but if the -stats options is used, then one will be generated. "
"You may find this useful for documentation purposes.";
+ const string minMapQualDescription = "minimum map quality. Used in -makeStats mode as a heuristic for determining a mate's "
+ "uniqueness. Used in -markPairs mode as a filter for marking candidate proper pairs.";
const string confidenceIntervalDescription = "confidence interval. Set min/max fragment lengths such that we capture "
"this fraction of pairs";
const string unusedModelThresholdDescription = "unused model threshold. The resolve tool considers 8 possible orientation models "
Options::AddOption("-markPairs", markPairsDescription, m_settings->IsMarkPairs, ModeOpts);
Options::AddOption("-twoPass", twoPassDescription, m_settings->IsTwoPass, ModeOpts);
+ OptionGroup* GeneralOpts = Options::CreateOptionGroup("General Resolve Options (available in all modes)");
+ Options::AddValueOption("-minMQ", "unsigned short", minMapQualDescription, "",
+ m_settings->HasMinimumMapQuality, m_settings->MinimumMapQuality, GeneralOpts);
+
OptionGroup* MakeStatsOpts = Options::CreateOptionGroup("MakeStats Mode Options (disabled in -markPairs mode)");
Options::AddValueOption("-ci", "double", confidenceIntervalDescription, "",
m_settings->HasConfidenceInterval, m_settings->ConfidenceInterval, MakeStatsOpts);