+};
+
+// ---------------------------------------------
+// FilterToolPrivate declaration
+
+class FilterTool::FilterToolPrivate {
+
+ // ctor & dtor
+ public:
+ FilterToolPrivate(FilterTool::FilterSettings* settings);
+ ~FilterToolPrivate(void);
+
+ // 'public' interface
+ public:
+ bool Run(void);
+
+ // internal methods
+ private:
+ bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
+ bool CheckAlignment(const BamAlignment& al);
+ const string GetScriptContents(void);
+ void InitProperties(void);
+ bool ParseCommandLine(void);
+ bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
+ bool ParseScript(void);
+ bool SetupFilters(void);
+
+ // data members
+ private:
+ vector<string> m_propertyNames;
+ FilterTool::FilterSettings* m_settings;
+ FilterEngine<BamAlignmentChecker> m_filterEngine;
+};
+
+// ---------------------------------------------
+// FilterToolPrivate implementation
+
+// constructor
+FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
+ : m_settings(settings)
+{ }
+
+// destructor
+FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
+
+bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName,
+ const map<string,
+ string>& propertyTokens)
+{
+ // dummy temp values for token parsing
+ bool boolValue;
+ int32_t int32Value;
+ uint16_t uint16Value;
+ uint32_t uint32Value;
+ string stringValue;
+ PropertyFilterValue::ValueCompareType type;
+
+ // iterate over property token map
+ map<string, string>::const_iterator mapIter = propertyTokens.begin();
+ map<string, string>::const_iterator mapEnd = propertyTokens.end();
+ for ( ; mapIter != mapEnd; ++mapIter ) {
+
+ const string& propertyName = (*mapIter).first;
+ const string& token = (*mapIter).second;
+
+ // ------------------------------
+ // convert token to value & compare type
+ // then add to filter engine
+
+ // bool conversion
+ if ( propertyName == ISDUPLICATE_PROPERTY ||
+ propertyName == ISFAILEDQC_PROPERTY ||
+ propertyName == ISFIRSTMATE_PROPERTY ||
+ propertyName == ISMAPPED_PROPERTY ||
+ propertyName == ISMATEMAPPED_PROPERTY ||
+ propertyName == ISMATEREVERSESTRAND_PROPERTY ||
+ propertyName == ISPAIRED_PROPERTY ||
+ propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
+ propertyName == ISPROPERPAIR_PROPERTY ||
+ propertyName == ISREVERSESTRAND_PROPERTY ||
+ propertyName == ISSECONDMATE_PROPERTY ||
+ propertyName == ISSINGLETON_PROPERTY
+ )
+ {
+ FilterEngine<BamAlignmentChecker>::parseToken(token, boolValue, type);
+ m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
+ }
+
+ // int32_t conversion
+ else if ( propertyName == INSERTSIZE_PROPERTY ||
+ propertyName == LENGTH_PROPERTY ||
+ propertyName == MATEPOSITION_PROPERTY ||
+ propertyName == POSITION_PROPERTY
+ )
+ {
+ FilterEngine<BamAlignmentChecker>::parseToken(token, int32Value, type);
+ m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
+ }
+
+ // uint16_t conversion
+ else if ( propertyName == MAPQUALITY_PROPERTY )
+ {
+ FilterEngine<BamAlignmentChecker>::parseToken(token, uint16Value, type);
+ m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
+ }
+
+ // uint32_t conversion
+ else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
+ {
+ FilterEngine<BamAlignmentChecker>::parseToken(token, uint32Value, type);
+ m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
+ }
+
+ // string conversion
+ else if ( propertyName == CIGAR_PROPERTY ||
+ propertyName == MATEREFERENCE_PROPERTY ||
+ propertyName == NAME_PROPERTY ||
+ propertyName == QUERYBASES_PROPERTY ||
+ propertyName == REFERENCE_PROPERTY
+ )
+ {
+ FilterEngine<BamAlignmentChecker>::parseToken(token, stringValue, type);
+ m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
+ }
+
+ else if ( propertyName == TAG_PROPERTY ) {
+ // this will be stored directly as the TAG:VALUE token
+ // (VALUE may contain compare ops, will be parsed out later)
+ m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
+ }
+
+ // else unknown property
+ else {
+ cerr << "bamtools filter ERROR: unknown property - " << propertyName << endl;
+ return false;
+ }
+ }
+ return true;
+}
+
+bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
+ return m_filterEngine.check(al);
+}
+
+const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
+
+ // open script for reading
+ FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
+ if ( !inFile ) {
+ cerr << "bamtools filter ERROR: could not open script: "
+ << m_settings->ScriptFilename << " for reading" << endl;
+ return string();
+ }
+
+ // read in entire script contents
+ char buffer[1024];
+ ostringstream docStream("");
+ while ( true ) {
+
+ // peek ahead, make sure there is data available
+ char ch = fgetc(inFile);
+ ungetc(ch, inFile);
+ if( feof(inFile) )
+ break;
+
+ // read next block of data
+ if ( fgets(buffer, 1024, inFile) == 0 ) {
+ cerr << "bamtools filter ERROR: could not read script contents" << endl;
+ return string();
+ }
+
+ docStream << buffer;
+ }
+
+ // close script file
+ fclose(inFile);
+
+ // import buffer contents to document, return
+ return docStream.str();
+}
+
+void FilterTool::FilterToolPrivate::InitProperties(void) {
+
+ // store property names in vector
+ m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
+ m_propertyNames.push_back(CIGAR_PROPERTY);
+ m_propertyNames.push_back(INSERTSIZE_PROPERTY);
+ m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
+ m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
+ m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
+ m_propertyNames.push_back(ISMAPPED_PROPERTY);
+ m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
+ m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
+ m_propertyNames.push_back(ISPAIRED_PROPERTY);
+ m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
+ m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
+ m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
+ m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
+ m_propertyNames.push_back(ISSINGLETON_PROPERTY);
+ m_propertyNames.push_back(LENGTH_PROPERTY);
+ m_propertyNames.push_back(MAPQUALITY_PROPERTY);
+ m_propertyNames.push_back(MATEPOSITION_PROPERTY);
+ m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
+ m_propertyNames.push_back(NAME_PROPERTY);
+ m_propertyNames.push_back(POSITION_PROPERTY);
+ m_propertyNames.push_back(QUERYBASES_PROPERTY);
+ m_propertyNames.push_back(REFERENCE_PROPERTY);
+ m_propertyNames.push_back(TAG_PROPERTY);
+
+ // add vector contents to FilterEngine<BamAlignmentChecker>
+ vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
+ vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
+ for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
+ m_filterEngine.addProperty((*propertyNameIter));
+}
+
+bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
+
+ // add a rule set to filter engine
+ const string CMD = "COMMAND_LINE";
+ m_filterEngine.addFilter(CMD);
+
+ // map property names to command line args
+ map<string, string> propertyTokens;
+ if ( m_settings->HasAlignmentFlagFilter ) propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY, m_settings->AlignmentFlagFilter) );
+ if ( m_settings->HasInsertSizeFilter ) propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY, m_settings->InsertSizeFilter) );
+ if ( m_settings->HasIsDuplicateFilter ) propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY, m_settings->IsDuplicateFilter) );
+ if ( m_settings->HasIsFailedQCFilter ) propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY, m_settings->IsFailedQCFilter) );
+ if ( m_settings->HasIsFirstMateFilter ) propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY, m_settings->IsFirstMateFilter) );
+ if ( m_settings->HasIsMappedFilter ) propertyTokens.insert( make_pair(ISMAPPED_PROPERTY, m_settings->IsMappedFilter) );
+ if ( m_settings->HasIsMateMappedFilter ) propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY, m_settings->IsMateMappedFilter) );
+ if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
+ if ( m_settings->HasIsPairedFilter ) propertyTokens.insert( make_pair(ISPAIRED_PROPERTY, m_settings->IsPairedFilter) );
+ if ( m_settings->HasIsPrimaryAlignmentFilter ) propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY, m_settings->IsPrimaryAlignmentFilter) );
+ if ( m_settings->HasIsProperPairFilter ) propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY, m_settings->IsProperPairFilter) );
+ if ( m_settings->HasIsReverseStrandFilter ) propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY, m_settings->IsReverseStrandFilter) );
+ if ( m_settings->HasIsSecondMateFilter ) propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY, m_settings->IsSecondMateFilter) );
+ if ( m_settings->HasIsSingletonFilter ) propertyTokens.insert( make_pair(ISSINGLETON_PROPERTY, m_settings->IsSingletonFilter) );
+ if ( m_settings->HasLengthFilter ) propertyTokens.insert( make_pair(LENGTH_PROPERTY, m_settings->LengthFilter) );
+ if ( m_settings->HasMapQualityFilter ) propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY, m_settings->MapQualityFilter) );
+ if ( m_settings->HasNameFilter ) propertyTokens.insert( make_pair(NAME_PROPERTY, m_settings->NameFilter) );
+ if ( m_settings->HasQueryBasesFilter ) propertyTokens.insert( make_pair(QUERYBASES_PROPERTY, m_settings->QueryBasesFilter) );
+ if ( m_settings->HasTagFilter ) propertyTokens.insert( make_pair(TAG_PROPERTY, m_settings->TagFilter) );
+
+ // send add these properties to filter set "COMMAND_LINE"
+ return AddPropertyTokensToFilter(CMD, propertyTokens);
+}
+
+bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
+
+ // filter object parsing variables
+ Json::Value null(Json::nullValue);
+ Json::Value propertyValue;
+
+ // store results
+ map<string, string> propertyTokens;
+
+ // iterate over known properties
+ vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
+ vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
+ for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
+ const string& propertyName = (*propertyNameIter);
+
+ // if property defined in filter, add to token list
+ propertyValue = filterObject.get(propertyName, null);
+ if ( propertyValue != null )
+ propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
+ }
+
+ // add this filter to engin
+ m_filterEngine.addFilter(filterName);
+
+ // add token list to this filter
+ return AddPropertyTokensToFilter(filterName, propertyTokens);
+}
+
+bool FilterTool::FilterToolPrivate::ParseScript(void) {
+
+ // read in script contents from file
+ const string document = GetScriptContents();
+
+ // set up JsonCPP reader and attempt to parse script
+ Json::Value root;
+ Json::Reader reader;
+ if ( !reader.parse(document, root) ) {
+ // use built-in error reporting mechanism to alert user what was wrong with the script
+ cerr << "bamtools filter ERROR: failed to parse script - see error message(s) below" << endl
+ << reader.getFormatedErrorMessages();
+ return false;
+ }
+
+ // initialize return status
+ bool success = true;
+
+ // see if root object contains multiple filters
+ const Json::Value filters = root["filters"];
+ if ( !filters.isNull() ) {
+
+ // iterate over any filters found
+ int filterIndex = 0;
+ Json::Value::const_iterator filtersIter = filters.begin();
+ Json::Value::const_iterator filtersEnd = filters.end();
+ for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
+ Json::Value filter = (*filtersIter);
+
+ // convert filter index to string
+ string filterName;
+
+ // if id tag supplied
+ const Json::Value id = filter["id"];
+ if ( !id.isNull() )
+ filterName = id.asString();
+
+ // use array index
+ else {
+ stringstream convert;
+ convert << filterIndex;
+ filterName = convert.str();
+ }
+
+ // create & parse filter
+ success &= ParseFilterObject(filterName, filter);
+ }
+
+ // see if user defined a "rule" for these filters
+ // otherwise, use filter engine's default rule behavior
+ string ruleString("");
+ const Json::Value rule = root["rule"];
+ if ( rule.isString() )
+ ruleString = rule.asString();
+ m_filterEngine.setRule(ruleString);
+
+ // return success/fail
+ return success;
+ }
+
+ // otherwise, root is the only filter (just contains properties)
+ // create & parse filter named "ROOT"
+ else success = ParseFilterObject("ROOT", root);
+
+ // return success/failure
+ return success;
+}
+
+bool FilterTool::FilterToolPrivate::Run(void) {
+
+ // set to default input if none provided
+ if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
+ m_settings->InputFiles.push_back(Options::StandardIn());
+
+ // add files in the filelist to the input file list
+ if ( m_settings->HasInputFilelist ) {
+
+ ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+ if ( !filelist.is_open() ) {
+ cerr << "bamtools filter ERROR: could not open input BAM file list... Aborting." << endl;
+ return false;
+ }
+
+ string line;
+ while ( getline(filelist, line) )
+ m_settings->InputFiles.push_back(line);
+ }
+
+ // initialize defined properties & user-specified filters
+ // quit if failed
+ if ( !SetupFilters() )
+ return false;
+
+ // open reader without index
+ BamMultiReader reader;
+ if ( !reader.Open(m_settings->InputFiles) ) {
+ cerr << "bamtools filter ERROR: could not open input files for reading." << endl;
+ return false;
+ }
+
+ // retrieve reader header & reference data
+ const string headerText = reader.GetHeaderText();
+ filterToolReferences = reader.GetReferenceData();
+
+ // determine compression mode for BamWriter
+ bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
+ !m_settings->IsForceCompression );
+ BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
+ if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
+
+ // open BamWriter
+ BamWriter writer;
+ writer.SetCompressionMode(compressionMode);
+ if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences) ) {
+ cerr << "bamtools filter ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl;
+ reader.Close();
+ return false;
+ }
+
+ // if no region specified, filter entire file
+ BamAlignment al;
+ if ( !m_settings->HasRegion ) {
+ while ( reader.GetNextAlignment(al) ) {
+ if ( CheckAlignment(al) )
+ writer.SaveAlignment(al);
+ }
+ }
+
+ // otherwise attempt to use region as constraint
+ else {
+
+ // if region string parses OK
+ BamRegion region;
+ if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
+
+ // attempt to find index files
+ reader.LocateIndexes();
+
+ // if index data available for all BAM files, we can use SetRegion
+ if ( reader.HasIndexes() ) {
+
+ // attempt to use SetRegion(), if failed report error
+ if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
+ cerr << "bamtools filter ERROR: set region failed. Check that REGION describes a valid range" << endl;
+ reader.Close();
+ return false;
+ }
+
+ // everything checks out, just iterate through specified region, filtering alignments
+ while ( reader.GetNextAlignment(al) )
+ if ( CheckAlignment(al) )
+ writer.SaveAlignment(al);
+ }
+
+ // no index data available, we have to iterate through until we
+ // find overlapping alignments
+ else {
+ while ( reader.GetNextAlignment(al) ) {
+ if ( (al.RefID >= region.LeftRefID) && ((al.Position + al.Length) >= region.LeftPosition) &&
+ (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
+ {
+ if ( CheckAlignment(al) )
+ writer.SaveAlignment(al);
+ }
+ }
+ }
+ }
+
+ // error parsing REGION string
+ else {
+ cerr << "bamtools filter ERROR: could not parse REGION: " << m_settings->Region << endl;
+ cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
+ << endl;
+ reader.Close();
+ return false;
+ }
+ }
+
+ // clean up & exit
+ reader.Close();
+ writer.Close();
+ return true;
+}
+
+bool FilterTool::FilterToolPrivate::SetupFilters(void) {
+
+ // set up filter engine with supported properties
+ InitProperties();
+
+ // parse script for filter rules, if given
+ if ( m_settings->HasScript )
+ return ParseScript();
+
+ // otherwise check command line for filters
+ else return ParseCommandLine();
+}