]> git.donarmstrong.com Git - bamtools.git/blobdiff - src/toolkit/bamtools_filter.cpp
Major update to BamTools version 1.0
[bamtools.git] / src / toolkit / bamtools_filter.cpp
index 6941da2d56d7719d2950f7fa8cf7250f9f30e35f..023fbc99d053f15ef217d9903cba7714255b340f 100644 (file)
@@ -3,34 +3,29 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 30 August 2010
+// Last modified: 21 March 2011
 // ---------------------------------------------------------------------------
-// Filters a single BAM file (or filters multiple BAM files and merges) 
-// according to some user-specified criteria.
+// Filters BAM file(s) according to some user-specified criteria.
 // ***************************************************************************
 
-// std includes
+#include "bamtools_filter.h"
+
+#include <api/BamMultiReader.h>
+#include <api/BamWriter.h>
+#include <utils/bamtools_filter_engine.h>
+#include <utils/bamtools_options.h>
+#include <utils/bamtools_utilities.h>
+using namespace BamTools;
+
+#include <jsoncpp/json.h>
+using namespace Json;
+
 #include <cstdio>
 #include <iostream>
 #include <sstream>
 #include <string>
 #include <vector>
-
-// BamTools includes
-#include "bamtools_filter.h"
-#include "bamtools_filter_engine.h"
-#include "bamtools_options.h"
-#include "bamtools_utilities.h"
-#include "BamReader.h"
-#include "BamMultiReader.h"
-#include "BamWriter.h"
-
-//JsonCPP includes
-#include "jsoncpp/json.h"
-
 using namespace std;
-using namespace BamTools; 
-using namespace Json;
   
 namespace BamTools {
   
@@ -39,6 +34,7 @@ namespace BamTools {
 
 // property names
 const string ALIGNMENTFLAG_PROPERTY       = "alignmentFlag";
+const string CIGAR_PROPERTY               = "cigar";
 const string INSERTSIZE_PROPERTY          = "insertSize";
 const string ISDUPLICATE_PROPERTY         = "isDuplicate";
 const string ISFAILEDQC_PROPERTY          = "isFailedQC";
@@ -58,11 +54,173 @@ const string NAME_PROPERTY                = "name";
 const string POSITION_PROPERTY            = "position";
 const string QUERYBASES_PROPERTY          = "queryBases";
 const string REFERENCE_PROPERTY           = "reference";
+const string TAG_PROPERTY                 = "tag";
 
 // boolalpha
 const string TRUE_STR  = "true";
 const string FALSE_STR = "false";
     
+RefVector filterToolReferences;    
+    
+struct BamAlignmentChecker {
+    bool check(const PropertyFilter& filter, const BamAlignment& al) {
+      
+        bool keepAlignment = true;
+        const PropertyMap& properties = filter.Properties;
+        PropertyMap::const_iterator propertyIter = properties.begin();
+        PropertyMap::const_iterator propertyEnd  = properties.end();
+        for ( ; propertyIter != propertyEnd; ++propertyIter ) {
+          
+            // check alignment data field depending on propertyName
+            const string& propertyName = (*propertyIter).first;
+            const PropertyFilterValue& valueFilter = (*propertyIter).second;
+            
+            if      ( propertyName == ALIGNMENTFLAG_PROPERTY )  keepAlignment &= valueFilter.check(al.AlignmentFlag);
+            else if ( propertyName == CIGAR_PROPERTY ) {
+                stringstream cigarSs;
+                const vector<CigarOp>& cigarData = al.CigarData;
+                if ( !cigarData.empty() ) {
+                    vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
+                    vector<CigarOp>::const_iterator cigarIter = cigarBegin;
+                    vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
+                    for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+                        const CigarOp& op = (*cigarIter);
+                        cigarSs << op.Length << op.Type;
+                    }
+                    keepAlignment &= valueFilter.check(cigarSs.str());
+                }
+            }
+            else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= valueFilter.check(al.InsertSize);
+            else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsDuplicate());
+            else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= valueFilter.check(al.IsFailedQC());
+            else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsFirstMate());
+            else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsMapped());
+            else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= valueFilter.check(al.IsMateMapped());
+            else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
+            else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsPaired());
+            else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
+            else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
+            else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
+            else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
+            else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
+            else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
+            else if ( propertyName == MATEREFERENCE_PROPERTY ) {
+                if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
+                BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
+                const string& refName = filterToolReferences.at(al.MateRefID).RefName;
+                keepAlignment &= valueFilter.check(refName);
+            }
+            else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= valueFilter.check(al.Name);
+            else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= valueFilter.check(al.Position);
+            else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= valueFilter.check(al.QueryBases);
+            else if ( propertyName == REFERENCE_PROPERTY ) {
+                BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
+                const string& refName = filterToolReferences.at(al.RefID).RefName;
+                keepAlignment &= valueFilter.check(refName);
+            }
+            else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
+            else BAMTOOLS_ASSERT_UNREACHABLE;
+            
+            // if alignment fails at ANY point, just quit and return false
+            if ( !keepAlignment ) return false;
+        }
+      
+        BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
+        return keepAlignment;
+    }
+    
+    bool checkAlignmentTag(const PropertyFilterValue& valueFilter, const BamAlignment& al) {
+     
+        // ensure filter contains string data
+        Variant entireTagFilter = valueFilter.Value;
+        if ( !entireTagFilter.is_type<string>() ) return false;
+
+        // localize string from variant
+        const string& entireTagFilterString = entireTagFilter.get<string>();
+
+        // ensure we have at least "XX:x"
+        if ( entireTagFilterString.length() < 4 ) return false;
+
+        // get tagName & lookup in alignment
+        // if found, set tagType to tag type character
+        // if not found, return false
+        const string& tagName = entireTagFilterString.substr(0,2);
+        char tagType = '\0';
+        if ( !al.GetTagType(tagName, tagType) ) return false;
+
+        // remove tagName & ":" from beginning tagFilter
+        string tagFilterString = entireTagFilterString.substr(3);
+
+        // switch on tag type to set tag query value & parse filter token
+        int32_t  intFilterValue,    intQueryValue;
+        uint32_t uintFilterValue,   uintQueryValue;
+        float    realFilterValue,   realQueryValue;
+        string   stringFilterValue, stringQueryValue;
+
+        PropertyFilterValue tagFilter;
+        PropertyFilterValue::ValueCompareType compareType;
+        bool keepAlignment = false;
+        switch (tagType) {
+
+            // signed int tag type
+            case 'c' :
+            case 's' :
+            case 'i' :
+                if ( al.GetTag(tagName, intQueryValue) ) {
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
+                        tagFilter.Value = intFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(intQueryValue);
+                    }
+                }
+                break;
+
+            // unsigned int tag type
+            case 'C' :
+            case 'S' :
+            case 'I' :
+                if ( al.GetTag(tagName, uintQueryValue) ) {
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
+                        tagFilter.Value = uintFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(uintQueryValue);
+                    }
+                }
+                break;
+
+            // 'real' tag type
+            case 'f' :
+                if ( al.GetTag(tagName, realQueryValue) ) {
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
+                        tagFilter.Value = realFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(realQueryValue);
+                    }
+                }
+                break;
+
+            // string tag type
+            case 'A':
+            case 'Z':
+            case 'H':
+                if ( al.GetTag(tagName, stringQueryValue) ) {
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
+                        tagFilter.Value = stringFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(stringQueryValue);
+                    }
+                }
+                break;
+
+            // unknown tag type
+            default :
+                keepAlignment = false;
+        }
+
+        return keepAlignment;
+    }
+};    
+    
 } // namespace BamTools
   
 // ---------------------------------------------
@@ -94,7 +252,7 @@ class FilterTool::FilterToolPrivate {
     private:
         vector<string> m_propertyNames;
         FilterTool::FilterSettings* m_settings;
-        RefVector m_references;
+        FilterEngine<BamAlignmentChecker> m_filterEngine;
 };
   
 // ---------------------------------------------
@@ -110,6 +268,7 @@ struct FilterTool::FilterSettings {
     bool HasOutputBamFilename;
     bool HasRegion;
     bool HasScriptFilename;
+    bool IsForceCompression;
     
     // filenames
     vector<string> InputFiles;
@@ -126,8 +285,7 @@ struct FilterTool::FilterSettings {
     bool HasMapQualityFilter;
     bool HasNameFilter;
     bool HasQueryBasesFilter;
-    
-//     bool HasTagFilters;
+    bool HasTagFilter; //(s)
 
     // filters
     string AlignmentFlagFilter;
@@ -135,8 +293,7 @@ struct FilterTool::FilterSettings {
     string NameFilter;
     string MapQualityFilter;
     string QueryBasesFilter;
-    
-//     vector<string> TagFilters;
+    string TagFilter;  // support multiple ?
 
     // -----------------------------------
     // AlignmentFlag filter opts
@@ -175,13 +332,14 @@ struct FilterTool::FilterSettings {
         , HasOutputBamFilename(false)
         , HasRegion(false)
         , HasScriptFilename(false)
+        , IsForceCompression(false)
         , OutputFilename(Options::StandardOut())
         , HasAlignmentFlagFilter(false)
         , HasInsertSizeFilter(false)
         , HasMapQualityFilter(false)
         , HasNameFilter(false)
         , HasQueryBasesFilter(false)
-//         , HasTagFilters(false)
+        , HasTagFilter(false)
         , HasIsDuplicateFilter(false)
         , HasIsFailedQCFilter(false)
         , HasIsFirstMateFilter(false)
@@ -216,22 +374,22 @@ FilterTool::FilterTool(void)
     , m_impl(0)
 {
     // set program details
-    Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "-in <filename> [-in <filename> ... ] -out <filename> -region <REGION> [ [-script <filename] | [filterOptions] ]");
+    Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>] [ [-script <filename] | [filterOptions] ]");
     
     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename,  m_settings->InputFiles,     IO_Opts, Options::StandardIn());
     Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
-    Options::AddValueOption("-region", "REGION",       "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
-    Options::AddValueOption("-script", "filename",     "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
+    Options::AddValueOption("-region", "REGION",       "only read data from this genomic region (see documentation for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
+    Options::AddValueOption("-script", "filename",     "the filter script file (see documentation for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
+    Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
     
     OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
-    Options::AddValueOption("-alignmentFlag", "int",     "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
-    Options::AddValueOption("-insertSize",    "int",     "keep reads with insert size that mathces pattern",             "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
-    Options::AddValueOption("-mapQuality",    "[0-255]", "keep reads with map quality that matches pattern",             "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
-    Options::AddValueOption("-name",          "string",  "keep reads with name that matches pattern",                    "", m_settings->HasNameFilter,       m_settings->NameFilter,       FilterOpts);
-    Options::AddValueOption("-queryBases",    "string",  "keep reads with motif that mathces pattern",                   "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
-    
-//     Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair. If multiple tags are given, reads must match all", "", m_settings->HasTagFilters, m_settings->TagFilters, FilterOpts);
+    Options::AddValueOption("-alignmentFlag", "int",        "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
+    Options::AddValueOption("-insertSize",    "int",        "keep reads with insert size that mathces pattern",             "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
+    Options::AddValueOption("-mapQuality",    "[0-255]",    "keep reads with map quality that matches pattern",             "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
+    Options::AddValueOption("-name",          "string",     "keep reads with name that matches pattern",                    "", m_settings->HasNameFilter,       m_settings->NameFilter,       FilterOpts);
+    Options::AddValueOption("-queryBases",    "string",     "keep reads with motif that mathces pattern",                   "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
+    Options::AddValueOption("-tag",           "TAG:VALUE",  "keep reads with this key=>value pair",                         "", m_settings->HasTagFilter,        m_settings->TagFilter,        FilterOpts);
     
     OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
     Options::AddValueOption("-isDuplicate",         "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter,         m_settings->IsDuplicateFilter,         AlignmentFlagOpts, TRUE_STR);
@@ -283,9 +441,10 @@ FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* set
 // destructor
 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
 
-bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
-
-  
+bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName,
+                                                              const map<string,
+                                                              string>& propertyTokens)
+{
     // dummy temp values for token parsing
     bool boolValue;
     int32_t int32Value;
@@ -320,8 +479,8 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
              propertyName == ISSECONDMATE_PROPERTY
            ) 
         {
-            FilterEngine::parseToken(token, boolValue, type);
-            FilterEngine::setProperty(filterName, propertyName, boolValue, type);
+            FilterEngine<BamAlignmentChecker>::parseToken(token, boolValue, type);
+            m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
         }
         
         // int32_t conversion
@@ -330,38 +489,45 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
                   propertyName == POSITION_PROPERTY 
                 ) 
         {
-            FilterEngine::parseToken(token, int32Value, type);
-            FilterEngine::setProperty(filterName, propertyName, int32Value, type);
+            FilterEngine<BamAlignmentChecker>::parseToken(token, int32Value, type);
+            m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
         }
         
         // uint16_t conversion
         else if ( propertyName == MAPQUALITY_PROPERTY )
         {
-            FilterEngine::parseToken(token, uint16Value, type);
-            FilterEngine::setProperty(filterName, propertyName, uint16Value, type);
+            FilterEngine<BamAlignmentChecker>::parseToken(token, uint16Value, type);
+            m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
         }
         
         // uint32_t conversion
         else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
         {
-            FilterEngine::parseToken(token, uint32Value, type);
-            FilterEngine::setProperty(filterName, propertyName, uint32Value, type);
+            FilterEngine<BamAlignmentChecker>::parseToken(token, uint32Value, type);
+            m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
         }
         
         // string conversion
-        else if ( propertyName == MATEREFERENCE_PROPERTY ||
+        else if ( propertyName == CIGAR_PROPERTY || 
+                 propertyName == MATEREFERENCE_PROPERTY ||
                   propertyName == NAME_PROPERTY ||
                   propertyName == QUERYBASES_PROPERTY ||
-                  propertyName == REFERENCE_PROPERTY
+                  propertyName == REFERENCE_PROPERTY 
                 ) 
         {
-            FilterEngine::parseToken(token, stringValue, type);
-            FilterEngine::setProperty(filterName, propertyName, stringValue, type);
+            FilterEngine<BamAlignmentChecker>::parseToken(token, stringValue, type);
+            m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
         }
       
+    else if ( propertyName == TAG_PROPERTY ) {
+           // this will be stored directly as the TAG:VALUE token 
+           // (VALUE may contain compare ops, will be parsed out later)
+           m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
+       }
+      
         // else unknown property 
         else {
-            cerr << "Unknown property: " << propertyName << "!" << endl;
+            cerr << "bamtools filter ERROR: unknown property - " << propertyName << endl;
             return false;
         }
     }
@@ -369,55 +535,7 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
 }
 
 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
-
-    bool keepAlignment = true;
-  
-    // only consider properties that are actually enabled
-    // iterate over these enabled properties
-    const vector<string> enabledProperties = FilterEngine::enabledPropertyNames();
-    vector<string>::const_iterator propIter = enabledProperties.begin();
-    vector<string>::const_iterator propEnd  = enabledProperties.end();
-    for ( ; propIter != propEnd; ++propIter ) {
-     
-        // check alignment data field depending on propertyName
-        const string& propertyName = (*propIter);
-        if      ( propertyName == ALIGNMENTFLAG_PROPERTY )        keepAlignment &= FilterEngine::check(ALIGNMENTFLAG_PROPERTY,       al.AlignmentFlag);
-        else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= FilterEngine::check(INSERTSIZE_PROPERTY,          al.InsertSize);
-        else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= FilterEngine::check(ISDUPLICATE_PROPERTY,         al.IsDuplicate());
-        else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= FilterEngine::check(ISFAILEDQC_PROPERTY,          al.IsFailedQC());
-        else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= FilterEngine::check(ISFIRSTMATE_PROPERTY,         al.IsFirstMate());
-        else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= FilterEngine::check(ISMAPPED_PROPERTY,            al.IsMapped());
-        else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= FilterEngine::check(ISMATEMAPPED_PROPERTY,        al.IsMateMapped());
-        else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= FilterEngine::check(ISMATEREVERSESTRAND_PROPERTY, al.IsMateReverseStrand());
-        else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= FilterEngine::check(ISPAIRED_PROPERTY,            al.IsPaired());
-        else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= FilterEngine::check(ISPRIMARYALIGNMENT_PROPERTY,  al.IsPrimaryAlignment());
-        else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= FilterEngine::check(ISPROPERPAIR_PROPERTY,        al.IsProperPair());
-        else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= FilterEngine::check(ISREVERSESTRAND_PROPERTY,     al.IsReverseStrand());
-        else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= FilterEngine::check(ISSECONDMATE_PROPERTY,        al.IsSecondMate());
-        else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= FilterEngine::check(MAPQUALITY_PROPERTY,          al.MapQuality);
-        else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && FilterEngine::check(MATEPOSITION_PROPERTY, al.MateRefID) );
-        else if ( propertyName == MATEREFERENCE_PROPERTY ) {
-            if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
-            BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)m_references.size())), "Invalid MateRefID");
-            const string& refName = m_references.at(al.MateRefID).RefName;
-            keepAlignment &= FilterEngine::check(MATEREFERENCE_PROPERTY, refName);
-        }
-        else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= FilterEngine::check(NAME_PROPERTY,                al.Name);
-        else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= FilterEngine::check(POSITION_PROPERTY,            al.Position);
-        else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= FilterEngine::check(QUERYBASES_PROPERTY,          al.QueryBases);
-        else if ( propertyName == REFERENCE_PROPERTY ) {
-            BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)m_references.size())), "Invalid RefID");
-            const string& refName = m_references.at(al.RefID).RefName;
-            keepAlignment &= FilterEngine::check(REFERENCE_PROPERTY, refName);
-        }
-        else BAMTOOLS_ASSERT_MESSAGE( false, "Unknown property");
-        
-        // if alignment fails at ANY point, just quit and return false
-        if ( !keepAlignment ) return false;
-    }
-  
-    // return success (should still be true at this point)
-    return keepAlignment;
+    return m_filterEngine.check(al);
 }
 
 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
@@ -425,7 +543,8 @@ const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
     // open script for reading
     FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
     if ( !inFile ) {
-        cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl;
+        cerr << "bamtools filter ERROR: could not open script: "
+             << m_settings->ScriptFilename << " for reading" << endl;
         return false;
     }
     
@@ -441,7 +560,7 @@ const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
         
         // read next block of data
         if ( fgets(buffer, 1024, inFile) == 0 ) {
-            cerr << "FilterTool error : could not read from script" << endl;
+            cerr << "bamtools filter ERROR: could not read script contents" << endl;
             return false;
         }
         
@@ -460,6 +579,7 @@ void FilterTool::FilterToolPrivate::InitProperties(void) {
   
     // store property names in vector 
     m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
+    m_propertyNames.push_back(CIGAR_PROPERTY);
     m_propertyNames.push_back(INSERTSIZE_PROPERTY);
     m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
     m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
@@ -479,19 +599,20 @@ void FilterTool::FilterToolPrivate::InitProperties(void) {
     m_propertyNames.push_back(POSITION_PROPERTY);
     m_propertyNames.push_back(QUERYBASES_PROPERTY);
     m_propertyNames.push_back(REFERENCE_PROPERTY);
+    m_propertyNames.push_back(TAG_PROPERTY);
     
-    // add vector contents to FilterEngine
+    // add vector contents to FilterEngine<BamAlignmentChecker>
     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
-        FilterEngine::addProperty((*propertyNameIter));
+        m_filterEngine.addProperty((*propertyNameIter));
 }
 
 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
   
     // add a rule set to filter engine
     const string CMD = "COMMAND_LINE";
-    FilterEngine::addFilter(CMD);
+    m_filterEngine.addFilter(CMD);
 
     // map property names to command line args
     map<string, string> propertyTokens;
@@ -511,7 +632,8 @@ bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
     if ( m_settings->HasMapQualityFilter )          propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY,          m_settings->MapQualityFilter) );
     if ( m_settings->HasNameFilter )                propertyTokens.insert( make_pair(NAME_PROPERTY,                m_settings->NameFilter) );
     if ( m_settings->HasQueryBasesFilter )          propertyTokens.insert( make_pair(QUERYBASES_PROPERTY,          m_settings->QueryBasesFilter) );
-
+    if ( m_settings->HasTagFilter )                 propertyTokens.insert( make_pair(TAG_PROPERTY,                 m_settings->TagFilter) );
+    
     // send add these properties to filter set "COMMAND_LINE"
     return AddPropertyTokensToFilter(CMD, propertyTokens);
 }
@@ -538,7 +660,7 @@ bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName,
     }
   
     // add this filter to engin
-    FilterEngine::addFilter(filterName);
+    m_filterEngine.addFilter(filterName);
   
     // add token list to this filter
     return AddPropertyTokensToFilter(filterName, propertyTokens);
@@ -554,16 +676,18 @@ bool FilterTool::FilterToolPrivate::ParseScript(void) {
     Json::Reader reader;
     if ( !reader.parse(document, root) ) {
         // use built-in error reporting mechanism to alert user what was wrong with the script
-        cerr  << "Failed to parse configuration\n" << reader.getFormatedErrorMessages();
+        cerr  << "bamtools filter ERROR: failed to parse script - see error message(s) below" << endl
+              << reader.getFormatedErrorMessages();
         return false;     
     }
 
+    // initialize return status
+    bool success = true;
+    
     // see if root object contains multiple filters
     const Json::Value filters = root["filters"];
     if ( !filters.isNull() ) {
       
-        bool success = true;
-      
         // iterate over any filters found
         int filterIndex = 0;
         Json::Value::const_iterator filtersIter = filters.begin();
@@ -572,159 +696,153 @@ bool FilterTool::FilterToolPrivate::ParseScript(void) {
             Json::Value filter = (*filtersIter);
             
             // convert filter index to string
-            stringstream convert;
-            convert << filterIndex;
-            const string filterName = convert.str();
+            string filterName;
+            
+            // if id tag supplied
+            const Json::Value id = filter["id"];
+            if ( !id.isNull() ) 
+                filterName = id.asString();
+            
+            // use array index 
+            else {
+                stringstream convert;
+                convert << filterIndex;
+                filterName = convert.str();
+            }
             
             // create & parse filter 
             success &= ParseFilterObject(filterName, filter);
-            
         }
+        
+        // see if user defined a "rule" for these filters
+        // otherwise, use filter engine's default rule behavior
+        string ruleString("");
+        const Json::Value rule = root["rule"];
+        if ( rule.isString() )
+            ruleString = rule.asString();
+        m_filterEngine.setRule(ruleString);
+          
+        // return success/fail
         return success;
     } 
     
     // otherwise, root is the only filter (just contains properties)
     // create & parse filter named "ROOT"
-    else return ParseFilterObject("ROOT", root);
+    else success = ParseFilterObject("ROOT", root);
+    
+    // return success/failure
+    return success;
 }
 
-
 bool FilterTool::FilterToolPrivate::Run(void) {
     
     // set to default input if none provided
     if ( !m_settings->HasInputBamFilename ) 
         m_settings->InputFiles.push_back(Options::StandardIn());
-    
+
     // initialize defined properties & user-specified filters
     // quit if failed
-    if ( !SetupFilters() ) return 1;
+    if ( !SetupFilters() ) return false;
 
     // open reader without index
     BamMultiReader reader;
-    reader.Open(m_settings->InputFiles, false, true);
+    if ( !reader.Open(m_settings->InputFiles) ) {
+        cerr << "bamtools filter ERROR: could not open input files for reading." << endl;
+        return false;
+    }
+
+    // retrieve reader header & reference data
     const string headerText = reader.GetHeaderText();
-    m_references = reader.GetReferenceData();
+    filterToolReferences = reader.GetReferenceData();
     
-    // open writer
+    // determine compression mode for BamWriter
+    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
+                              !m_settings->IsForceCompression );
+    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
+    if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
+
+    // open BamWriter
     BamWriter writer;
-    writer.Open(m_settings->OutputFilename, headerText, m_references);
-    
-    // set up error handling
-    ostringstream errorStream("");
-    bool foundError(false);
-    
-    // if no REGION specified, run filter on entire file contents
+    writer.SetCompressionMode(compressionMode);
+    if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences) ) {
+        cerr << "bamtools filter ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl;
+        reader.Close();
+        return false;
+    }
+
+    // if no region specified, filter entire file 
+    BamAlignment al;
     if ( !m_settings->HasRegion ) {
-        BamAlignment al;
         while ( reader.GetNextAlignment(al) ) {
-            // perform main filter check
             if ( CheckAlignment(al) ) 
                 writer.SaveAlignment(al);
         }
     }
     
-    // REGION specified
+    // otherwise attempt to use region as constraint
     else {
-      
-        // attempt to parse string into BamRegion struct
+        
+        // if region string parses OK
         BamRegion region;
         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
 
-            // check if there are index files *.bai/*.bti corresponding to the input files
-            bool hasDefaultIndex   = false;
-            bool hasBamtoolsIndex  = false;
-            bool hasNoIndex        = false;
-            int defaultIndexCount   = 0;
-            int bamtoolsIndexCount = 0;
-            for (vector<string>::const_iterator f = m_settings->InputFiles.begin(); f != m_settings->InputFiles.end(); ++f) {
+            // attempt to find index files
+            reader.LocateIndexes();
+
+            // if index data available for all BAM files, we can use SetRegion
+            if ( reader.HasIndexes() ) {
+
+                // attempt to use SetRegion(), if failed report error
+                if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
+                    cerr << "bamtools filter ERROR: set region failed. Check that REGION describes a valid range" << endl;
+                    reader.Close();
+                    return false;
+                } 
               
-                if ( Utilities::FileExists(*f + ".bai") ) {
-                    hasDefaultIndex = true;
-                    ++defaultIndexCount;
-                }       
-                
-                if ( Utilities::FileExists(*f + ".bti") ) {
-                    hasBamtoolsIndex = true;
-                    ++bamtoolsIndexCount;
+                // everything checks out, just iterate through specified region, filtering alignments
+                while ( reader.GetNextAlignment(al) )
+                    if ( CheckAlignment(al) ) 
+                        writer.SaveAlignment(al);
                 }
-                  
-                if ( !hasDefaultIndex && !hasBamtoolsIndex ) {
-                    hasNoIndex = true;
-                    cerr << "*WARNING - could not find index file for " << *f  
-                         << ", parsing whole file(s) to get alignment counts for target region" 
-                         << " (could be slow)" << endl;
-                    break;
-                }
-            }
             
-            // determine if index file types are heterogeneous
-            bool hasDifferentIndexTypes = false;
-            if ( defaultIndexCount > 0 && bamtoolsIndexCount > 0 ) {
-                hasDifferentIndexTypes = true;
-                cerr << "*WARNING - different index file formats found"  
-                         << ", parsing whole file(s) to get alignment counts for target region" 
-                         << " (could be slow)" << endl;
-            }
-            
-            // if any input file has no index, or if input files use different index formats
-            // can't use BamMultiReader to jump directly (**for now**)
-            if ( hasNoIndex || hasDifferentIndexTypes ) {
-                
-                // read through sequentially, but onlt perform filter on reads overlapping REGION
-                BamAlignment al;
-                while( reader.GetNextAlignment(al) ) {
-                    if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
+            // no index data available, we have to iterate through until we
+            // find overlapping alignments
+            else {
+                while ( reader.GetNextAlignment(al) ) {
+                    if ( (al.RefID >= region.LeftRefID)  && ((al.Position + al.Length) >= region.LeftPosition) &&
                          (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
                     {
-                        // perform main filter check
                         if ( CheckAlignment(al) ) 
                             writer.SaveAlignment(al);
                     }
                 }
             }
-            
-            // has index file for each input file (and same format)
-            else {
-              
-                // this is kind of a hack...?
-                BamMultiReader reader;
-                reader.Open(m_settings->InputFiles, true, true, hasDefaultIndex );
-              
-                if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
-                   foundError = true;
-                   errorStream << "Could not set BamReader region to REGION: " << m_settings->Region << endl;
-                } else {
-                  
-                    // filter only alignments from specified region
-                    BamAlignment al;
-                    while ( reader.GetNextAlignment(al) ) {
-                        // perform main filter check
-                        if ( CheckAlignment(al) ) 
-                            writer.SaveAlignment(al);
-                    }
-                }
-            }
-            
-        } else {
-            foundError = true;
-            errorStream << "Could not parse REGION: " << m_settings->Region << endl;
-            errorStream << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
+        } 
+        
+        // error parsing REGION string
+        else {
+            cerr << "bamtools filter ERROR: could not parse REGION: " << m_settings->Region << endl;
+            cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
+                 << endl;
+            reader.Close();
+            return false;
         }
     }
 
     // clean up & exit
     reader.Close();
     writer.Close();
-    return 0;
+    return true;
 }
 
 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
   
-    // add known properties to FilterEngine
+    // set up filter engine with supported properties
     InitProperties();
     
     // parse script for filter rules, if given
-    if ( m_settings->HasScriptFilename ) return ParseScript();
+    if ( m_settings->HasScriptFilename )
+        return ParseScript();
     
     // otherwise check command line for filters
     else return ParseCommandLine();