]> git.donarmstrong.com Git - bamtools.git/blobdiff - src/toolkit/bamtools_filter.cpp
Fixed: error in FilterTool - mishandling of ASCII tag values
[bamtools.git] / src / toolkit / bamtools_filter.cpp
index b57cf0534d1ba4dfd15acd26d5f4dadf34148453..8334b59c6d3863b5cda49f9d4697a1a431a73cae 100644 (file)
@@ -1,29 +1,31 @@
 // ***************************************************************************
 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
 // Marth Lab, Department of Biology, Boston College
-// All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 17 November 2010
+// Last modified: 27 April 2013
 // ---------------------------------------------------------------------------
-// Filters BAM file(s) according to some user-specified criteria.
+// Filters BAM file(s) according to some user-specified criteria
 // ***************************************************************************
 
+#include "bamtools_filter.h"
+
+#include <api/BamMultiReader.h>
+#include <api/BamWriter.h>
+#include <utils/bamtools_filter_engine.h>
+#include <utils/bamtools_options.h>
+#include <utils/bamtools_utilities.h>
+using namespace BamTools;
+
+#include <jsoncpp/json.h>
+using namespace Json;
+
 #include <cstdio>
+#include <fstream>
 #include <iostream>
 #include <sstream>
 #include <string>
 #include <vector>
-#include "bamtools_filter.h"
-#include "bamtools_filter_engine.h"
-#include "bamtools_options.h"
-#include "bamtools_utilities.h"
-#include "BamReader.h"
-#include "BamMultiReader.h"
-#include "BamWriter.h"
-#include "jsoncpp/json.h"
 using namespace std;
-using namespace BamTools; 
-using namespace Json;
   
 namespace BamTools {
   
@@ -45,6 +47,7 @@ const string ISPRIMARYALIGNMENT_PROPERTY  = "isPrimaryAlignment";
 const string ISPROPERPAIR_PROPERTY        = "isProperPair";
 const string ISREVERSESTRAND_PROPERTY     = "isReverseStrand";
 const string ISSECONDMATE_PROPERTY        = "isSecondMate";
+const string ISSINGLETON_PROPERTY         = "isSingleton";
 const string MAPQUALITY_PROPERTY          = "mapQuality";
 const string MATEPOSITION_PROPERTY        = "matePosition";
 const string MATEREFERENCE_PROPERTY       = "mateReference";
@@ -74,7 +77,7 @@ struct BamAlignmentChecker {
             const PropertyFilterValue& valueFilter = (*propertyIter).second;
             
             if      ( propertyName == ALIGNMENTFLAG_PROPERTY )  keepAlignment &= valueFilter.check(al.AlignmentFlag);
-           else if ( propertyName == CIGAR_PROPERTY ) {
+            else if ( propertyName == CIGAR_PROPERTY ) {
                 stringstream cigarSs;
                 const vector<CigarOp>& cigarData = al.CigarData;
                 if ( !cigarData.empty() ) {
@@ -100,6 +103,10 @@ struct BamAlignmentChecker {
             else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
             else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
             else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
+            else if ( propertyName == ISSINGLETON_PROPERTY ) {
+                const bool isSingleton = al.IsPaired() && al.IsMapped() && !al.IsMateMapped();
+                keepAlignment &= valueFilter.check(isSingleton);
+            }
             else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
             else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
             else if ( propertyName == MATEREFERENCE_PROPERTY ) {
@@ -129,130 +136,110 @@ struct BamAlignmentChecker {
     
     bool checkAlignmentTag(const PropertyFilterValue& valueFilter, const BamAlignment& al) {
      
-       // ensure filter contains string data
-       Variant entireTagFilter = valueFilter.Value;
-       if ( !entireTagFilter.is_type<string>() ) return false;
-       
-       // localize string from variant
-       const string& entireTagFilterString = entireTagFilter.get<string>();
-       
-       // ensure we have at least "XX:x"
-       if ( entireTagFilterString.length() < 4 ) return false;
-       
-       // get tagName & lookup in alignment
-       // if found, set tagType to tag type character
-       // if not found, return false
-       const string& tagName = entireTagFilterString.substr(0,2);
-       char tagType = '\0';
-       if ( !al.GetTagType(tagName, tagType) ) return false;
-         
-       // remove tagName & ":" from beginning tagFilter
-       string tagFilterString = entireTagFilterString.substr(3);
-         
-       // switch on tag type to set tag query value & parse filter token
-       int32_t  intFilterValue,    intQueryValue;
-       uint32_t uintFilterValue,   uintQueryValue;
-       float    realFilterValue,   realQueryValue;
-       string   stringFilterValue, stringQueryValue;
-       
-       PropertyFilterValue tagFilter;
-       PropertyFilterValue::ValueCompareType compareType;
-       bool keepAlignment = false;
-       switch (tagType) {
-            
-           // signed int tag type
-           case 'c' :
-            case 's' : 
+        // ensure filter contains string data
+        Variant entireTagFilter = valueFilter.Value;
+        if ( !entireTagFilter.is_type<string>() ) return false;
+
+        // localize string from variant
+        const string& entireTagFilterString = entireTagFilter.get<string>();
+
+        // ensure we have at least "XX:x"
+        if ( entireTagFilterString.length() < 4 ) return false;
+
+        // get tagName & lookup in alignment
+        // if found, set tagType to tag type character
+        // if not found, return false
+        const string& tagName = entireTagFilterString.substr(0,2);
+        char tagType = '\0';
+        if ( !al.GetTagType(tagName, tagType) ) return false;
+
+        // remove tagName & ":" from beginning tagFilter
+        string tagFilterString = entireTagFilterString.substr(3);
+
+        // switch on tag type to set tag query value & parse filter token
+        int8_t   asciiFilterValue,   asciiQueryValue;
+        int32_t  intFilterValue,    intQueryValue;
+        uint32_t uintFilterValue,   uintQueryValue;
+        float    realFilterValue,   realQueryValue;
+        string   stringFilterValue, stringQueryValue;
+
+        PropertyFilterValue tagFilter;
+        PropertyFilterValue::ValueCompareType compareType;
+        bool keepAlignment = false;
+        switch (tagType) {
+
+            // ASCII tag type
+            case 'A':
+                if ( al.GetTag(tagName, asciiQueryValue) ) {
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, asciiFilterValue, compareType) ) {
+                        tagFilter.Value = asciiFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(asciiQueryValue);
+                    }
+                }
+                break;
+
+            // signed int tag type
+            case 'c' :
+            case 's' :
             case 'i' :
-               if ( al.GetTag(tagName, intQueryValue) ) {
-                   if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
-                       tagFilter.Value = intFilterValue;
-                       tagFilter.Type  = compareType;
-                       keepAlignment   = tagFilter.check(intQueryValue);
-                   }
-               }
-               break;
-               
-           // unsigned int tag type
+                if ( al.GetTag(tagName, intQueryValue) ) {
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
+                        tagFilter.Value = intFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(intQueryValue);
+                    }
+                }
+                break;
+
+            // unsigned int tag type
             case 'C' :
             case 'S' :
-            case 'I' : 
-               if ( al.GetTag(tagName, uintQueryValue) ) {
-                   if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
-                       tagFilter.Value = uintFilterValue;
-                       tagFilter.Type  = compareType;
-                       keepAlignment   = tagFilter.check(uintQueryValue);
-                   }
-               }
-               break;
-               
-           // 'real' tag type
+            case 'I' :
+                if ( al.GetTag(tagName, uintQueryValue) ) {
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
+                        tagFilter.Value = uintFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(uintQueryValue);
+                    }
+                }
+                break;
+
+            // 'real' tag type
             case 'f' :
-               if ( al.GetTag(tagName, realQueryValue) ) {
-                   if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
-                       tagFilter.Value = realFilterValue;
-                       tagFilter.Type  = compareType;
-                       keepAlignment   = tagFilter.check(realQueryValue);
-                   }
-               }
+                if ( al.GetTag(tagName, realQueryValue) ) {
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
+                        tagFilter.Value = realFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(realQueryValue);
+                    }
+                }
                 break;
-               
-           // string tag type
-            case 'A':
+
+            // string tag type
+
             case 'Z':
-            case 'H':        
+            case 'H':
                 if ( al.GetTag(tagName, stringQueryValue) ) {
-                   if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
-                       tagFilter.Value = stringFilterValue;
-                       tagFilter.Type  = compareType;
-                       keepAlignment   = tagFilter.check(stringQueryValue);
-                   }
-               }
-               break;
-               
-           // unknown tag type
-           default : 
-               keepAlignment = false;
-       }
-       
-       return keepAlignment;
+                    if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
+                        tagFilter.Value = stringFilterValue;
+                        tagFilter.Type  = compareType;
+                        keepAlignment   = tagFilter.check(stringQueryValue);
+                    }
+                }
+                break;
+
+            // unknown tag type
+            default :
+                keepAlignment = false;
+        }
+
+        return keepAlignment;
     }
 };    
     
 } // namespace BamTools
   
-// ---------------------------------------------
-// FilterToolPrivate declaration
-
-class FilterTool::FilterToolPrivate {
-      
-    // ctor & dtor
-    public:
-        FilterToolPrivate(FilterTool::FilterSettings* settings);
-        ~FilterToolPrivate(void);  
-        
-    // 'public' interface
-    public:
-        bool Run(void);
-        
-    // internal methods
-    private:
-        bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
-        bool CheckAlignment(const BamAlignment& al);
-        const string GetScriptContents(void);
-        void InitProperties(void);
-        bool ParseCommandLine(void);
-        bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
-        bool ParseScript(void);
-        bool SetupFilters(void);
-        
-    // data members
-    private:
-        vector<string> m_propertyNames;
-        FilterTool::FilterSettings* m_settings;
-        FilterEngine<BamAlignmentChecker> m_filterEngine;
-};
-  
 // ---------------------------------------------
 // FilterSettings implementation
 
@@ -260,23 +247,25 @@ struct FilterTool::FilterSettings {
 
     // ----------------------------------
     // IO opts
-  
+
     // flags
-    bool HasInputBamFilename;
-    bool HasOutputBamFilename;
+    bool HasInput;
+    bool HasInputFilelist;
+    bool HasOutput;
     bool HasRegion;
-    bool HasScriptFilename;
+    bool HasScript;
     bool IsForceCompression;
-    
+
     // filenames
     vector<string> InputFiles;
+    string InputFilelist;
     string OutputFilename;
     string Region;
     string ScriptFilename;
-    
+
     // -----------------------------------
     // General filter opts
-    
+
     // flags
     bool HasAlignmentFlagFilter;
     bool HasInsertSizeFilter;
@@ -295,7 +284,7 @@ struct FilterTool::FilterSettings {
 
     // -----------------------------------
     // AlignmentFlag filter opts
-    
+
     // flags
     bool HasIsDuplicateFilter;
     bool HasIsFailedQCFilter;
@@ -308,7 +297,8 @@ struct FilterTool::FilterSettings {
     bool HasIsProperPairFilter;
     bool HasIsReverseStrandFilter;
     bool HasIsSecondMateFilter;
-    
+    bool HasIsSingletonFilter;
+
     // filters
     string IsDuplicateFilter;
     string IsFailedQCFilter;
@@ -321,15 +311,17 @@ struct FilterTool::FilterSettings {
     string IsProperPairFilter;
     string IsReverseStrandFilter;
     string IsSecondMateFilter;
-    
+    string IsSingletonFilter;
+
     // ---------------------------------
     // constructor
-    
+
     FilterSettings(void)
-        : HasInputBamFilename(false)
-        , HasOutputBamFilename(false)
+        : HasInput(false)
+        , HasInputFilelist(false)
+        , HasOutput(false)
         , HasRegion(false)
-        , HasScriptFilename(false)
+        , HasScript(false)
         , IsForceCompression(false)
         , OutputFilename(Options::StandardOut())
         , HasAlignmentFlagFilter(false)
@@ -349,6 +341,7 @@ struct FilterTool::FilterSettings {
         , HasIsProperPairFilter(false)
         , HasIsReverseStrandFilter(false)
         , HasIsSecondMateFilter(false)
+        , HasIsSingletonFilter(false)
         , IsDuplicateFilter(TRUE_STR)
         , IsFailedQCFilter(TRUE_STR)
         , IsFirstMateFilter(TRUE_STR)
@@ -360,73 +353,41 @@ struct FilterTool::FilterSettings {
         , IsProperPairFilter(TRUE_STR)
         , IsReverseStrandFilter(TRUE_STR)
         , IsSecondMateFilter(TRUE_STR)
+        , IsSingletonFilter(TRUE_STR)
     { }
-};  
+};
 
 // ---------------------------------------------
-// FilterTool implementation
-
-FilterTool::FilterTool(void)
-    : AbstractTool()
-    , m_settings(new FilterSettings)
-    , m_impl(0)
-{
-    // set program details
-    Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>] [ [-script <filename] | [filterOptions] ]");
-    
-    OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
-    Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename,  m_settings->InputFiles,     IO_Opts, Options::StandardIn());
-    Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
-    Options::AddValueOption("-region", "REGION",       "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
-    Options::AddValueOption("-script", "filename",     "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
-    Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
-    
-    OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
-    Options::AddValueOption("-alignmentFlag", "int",        "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
-    Options::AddValueOption("-insertSize",    "int",        "keep reads with insert size that mathces pattern",             "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
-    Options::AddValueOption("-mapQuality",    "[0-255]",    "keep reads with map quality that matches pattern",             "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
-    Options::AddValueOption("-name",          "string",     "keep reads with name that matches pattern",                    "", m_settings->HasNameFilter,       m_settings->NameFilter,       FilterOpts);
-    Options::AddValueOption("-queryBases",    "string",     "keep reads with motif that mathces pattern",                   "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
-    Options::AddValueOption("-tag",            "TAG:VALUE", "keep reads with this key=>value pair",                         "", m_settings->HasTagFilter,        m_settings->TagFilter,        FilterOpts);
-    
-    OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
-    Options::AddValueOption("-isDuplicate",         "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter,         m_settings->IsDuplicateFilter,         AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isFailedQC",          "true/false", "keep only alignments that failed QC?",               "", m_settings->HasIsFailedQCFilter,          m_settings->IsFailedQCFilter,          AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isFirstMate",         "true/false", "keep only alignments marked as first mate?",         "", m_settings->HasIsFirstMateFilter,         m_settings->IsFirstMateFilter,         AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isMapped",            "true/false", "keep only alignments that were mapped?",             "", m_settings->HasIsMappedFilter,            m_settings->IsMappedFilter,            AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isMateMapped",        "true/false", "keep only alignments with mates that mapped",        "", m_settings->HasIsMateMappedFilter,        m_settings->IsMateMappedFilter,        AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isPaired",            "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter,            m_settings->IsPairedFilter,            AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isPrimaryAlignment",  "true/false", "keep only alignments marked as primary?",            "", m_settings->HasIsPrimaryAlignmentFilter,  m_settings->IsPrimaryAlignmentFilter,  AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isProperPair",        "true/false", "keep only alignments that passed PE resolution?",    "", m_settings->HasIsProperPairFilter,        m_settings->IsProperPairFilter,        AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isReverseStrand",     "true/false", "keep only alignments on reverse strand?",            "", m_settings->HasIsReverseStrandFilter,     m_settings->IsReverseStrandFilter,     AlignmentFlagOpts, TRUE_STR);
-    Options::AddValueOption("-isSecondMate",        "true/false", "keep only alignments marked as second mate?",        "", m_settings->HasIsSecondMateFilter,        m_settings->IsSecondMateFilter,        AlignmentFlagOpts, TRUE_STR);
-}
-
-FilterTool::~FilterTool(void) {
-    delete m_settings;
-    m_settings = 0;
-    
-    delete m_impl;
-    m_impl = 0;
-}
-
-int FilterTool::Help(void) {
-    Options::DisplayHelp();
-    return 0;
-}
+// FilterToolPrivate declaration
 
-int FilterTool::Run(int argc, char* argv[]) {
-  
-    // parse command line arguments
-    Options::Parse(argc, argv, 1);
-    
-    // run internal FilterTool implementation, return success/fail
-    m_impl = new FilterToolPrivate(m_settings);
-    
-    if ( m_impl->Run() ) return 0;
-    else return 1;
-}
+class FilterTool::FilterToolPrivate {
+      
+    // ctor & dtor
+    public:
+        FilterToolPrivate(FilterTool::FilterSettings* settings);
+        ~FilterToolPrivate(void);  
+        
+    // 'public' interface
+    public:
+        bool Run(void);
+        
+    // internal methods
+    private:
+        bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
+        bool CheckAlignment(const BamAlignment& al);
+        const string GetScriptContents(void);
+        void InitProperties(void);
+        bool ParseCommandLine(void);
+        bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
+        bool ParseScript(void);
+        bool SetupFilters(void);
+        
+    // data members
+    private:
+        vector<string> m_propertyNames;
+        FilterTool::FilterSettings* m_settings;
+        FilterEngine<BamAlignmentChecker> m_filterEngine;
+};
  
 // ---------------------------------------------
 // FilterToolPrivate implementation
@@ -439,8 +400,10 @@ FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* set
 // destructor
 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
 
-bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
-
+bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName,
+                                                              const map<string,
+                                                              string>& propertyTokens)
+{
     // dummy temp values for token parsing
     bool boolValue;
     int32_t int32Value;
@@ -472,7 +435,8 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
              propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
              propertyName == ISPROPERPAIR_PROPERTY ||
              propertyName == ISREVERSESTRAND_PROPERTY ||
-             propertyName == ISSECONDMATE_PROPERTY
+             propertyName == ISSECONDMATE_PROPERTY ||
+             propertyName == ISSINGLETON_PROPERTY
            ) 
         {
             FilterEngine<BamAlignmentChecker>::parseToken(token, boolValue, type);
@@ -505,7 +469,7 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
         
         // string conversion
         else if ( propertyName == CIGAR_PROPERTY || 
-                 propertyName == MATEREFERENCE_PROPERTY ||
+                  propertyName == MATEREFERENCE_PROPERTY ||
                   propertyName == NAME_PROPERTY ||
                   propertyName == QUERYBASES_PROPERTY ||
                   propertyName == REFERENCE_PROPERTY 
@@ -515,15 +479,15 @@ bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filt
             m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
         }
       
-       else if (propertyName == TAG_PROPERTY ) {
-           // this will be stored directly as the TAG:VALUE token 
-           // (VALUE may contain compare ops, will be parsed out later)
-           m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
-       }
+        else if ( propertyName == TAG_PROPERTY ) {
+            // this will be stored directly as the TAG:VALUE token
+            // (VALUE may contain compare ops, will be parsed out later)
+            m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
+        }
       
         // else unknown property 
         else {
-            cerr << "Unknown property: " << propertyName << "!" << endl;
+            cerr << "bamtools filter ERROR: unknown property - " << propertyName << endl;
             return false;
         }
     }
@@ -539,8 +503,9 @@ const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
     // open script for reading
     FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
     if ( !inFile ) {
-        cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl;
-        return false;
+        cerr << "bamtools filter ERROR: could not open script: "
+             << m_settings->ScriptFilename << " for reading" << endl;
+        return string();
     }
     
     // read in entire script contents  
@@ -551,12 +516,13 @@ const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
         // peek ahead, make sure there is data available
         char ch = fgetc(inFile);
         ungetc(ch, inFile);
-        if( feof(inFile) ) break;       
+        if( feof(inFile) )
+            break;
         
         // read next block of data
         if ( fgets(buffer, 1024, inFile) == 0 ) {
-            cerr << "FilterTool error : could not read from script" << endl;
-            return false;
+            cerr << "bamtools filter ERROR: could not read script contents" << endl;
+            return string();
         }
         
         docStream << buffer;
@@ -566,8 +532,7 @@ const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
     fclose(inFile);
     
     // import buffer contents to document, return
-    string document = docStream.str();
-    return document;
+    return docStream.str();
 }
 
 void FilterTool::FilterToolPrivate::InitProperties(void) {
@@ -587,6 +552,7 @@ void FilterTool::FilterToolPrivate::InitProperties(void) {
     m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
     m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
     m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
+    m_propertyNames.push_back(ISSINGLETON_PROPERTY);
     m_propertyNames.push_back(MAPQUALITY_PROPERTY);
     m_propertyNames.push_back(MATEPOSITION_PROPERTY);
     m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
@@ -624,6 +590,7 @@ bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
     if ( m_settings->HasIsProperPairFilter )        propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY,        m_settings->IsProperPairFilter) );
     if ( m_settings->HasIsReverseStrandFilter )     propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY,     m_settings->IsReverseStrandFilter) );
     if ( m_settings->HasIsSecondMateFilter )        propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY,        m_settings->IsSecondMateFilter) );
+    if ( m_settings->HasIsSingletonFilter )         propertyTokens.insert( make_pair(ISSINGLETON_PROPERTY,         m_settings->IsSingletonFilter) );
     if ( m_settings->HasMapQualityFilter )          propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY,          m_settings->MapQualityFilter) );
     if ( m_settings->HasNameFilter )                propertyTokens.insert( make_pair(NAME_PROPERTY,                m_settings->NameFilter) );
     if ( m_settings->HasQueryBasesFilter )          propertyTokens.insert( make_pair(QUERYBASES_PROPERTY,          m_settings->QueryBasesFilter) );
@@ -671,7 +638,8 @@ bool FilterTool::FilterToolPrivate::ParseScript(void) {
     Json::Reader reader;
     if ( !reader.parse(document, root) ) {
         // use built-in error reporting mechanism to alert user what was wrong with the script
-        cerr  << "Failed to parse configuration\n" << reader.getFormatedErrorMessages();
+        cerr  << "bamtools filter ERROR: failed to parse script - see error message(s) below" << endl
+              << reader.getFormatedErrorMessages();
         return false;     
     }
 
@@ -731,33 +699,56 @@ bool FilterTool::FilterToolPrivate::ParseScript(void) {
 bool FilterTool::FilterToolPrivate::Run(void) {
     
     // set to default input if none provided
-    if ( !m_settings->HasInputBamFilename ) 
+    if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
         m_settings->InputFiles.push_back(Options::StandardIn());
 
+    // add files in the filelist to the input file list
+    if ( m_settings->HasInputFilelist ) {
+
+        ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
+        if ( !filelist.is_open() ) {
+            cerr << "bamtools filter ERROR: could not open input BAM file list... Aborting." << endl;
+            return false;
+        }
+
+        string line;
+        while ( getline(filelist, line) )
+            m_settings->InputFiles.push_back(line);
+    }
+
     // initialize defined properties & user-specified filters
     // quit if failed
-    if ( !SetupFilters() ) return 1;
+    if ( !SetupFilters() )
+        return false;
 
     // open reader without index
     BamMultiReader reader;
-    if ( !reader.Open(m_settings->InputFiles, false, false) ) {
-        cerr << "Could not open input files for reading." << endl;
+    if ( !reader.Open(m_settings->InputFiles) ) {
+        cerr << "bamtools filter ERROR: could not open input files for reading." << endl;
         return false;
     }
+
+    // retrieve reader header & reference data
     const string headerText = reader.GetHeaderText();
     filterToolReferences = reader.GetReferenceData();
     
-    // open writer
+    // determine compression mode for BamWriter
+    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
+                              !m_settings->IsForceCompression );
+    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
+    if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
+
+    // open BamWriter
     BamWriter writer;
-    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
-    if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences, writeUncompressed) ) {
-        cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl;
+    writer.SetCompressionMode(compressionMode);
+    if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences) ) {
+        cerr << "bamtools filter ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl;
+        reader.Close();
         return false;
     }
-    
-    BamAlignment al;
-    
+
     // if no region specified, filter entire file 
+    BamAlignment al;
     if ( !m_settings->HasRegion ) {
         while ( reader.GetNextAlignment(al) ) {
             if ( CheckAlignment(al) ) 
@@ -772,36 +763,29 @@ bool FilterTool::FilterToolPrivate::Run(void) {
         BamRegion region;
         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
 
-            // attempt to re-open reader with index files
-            reader.Close();
-           bool openedOK = reader.Open(m_settings->InputFiles, true, false );
-            
-            // if error
-            if ( !openedOK ) {
-                cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
-                return 1;
-            }
-            
-            // if index data available, we can use SetRegion
-            if ( reader.IsIndexLoaded() ) {
-              
+            // attempt to find index files
+            reader.LocateIndexes();
+
+            // if index data available for all BAM files, we can use SetRegion
+            if ( reader.HasIndexes() ) {
+
                 // attempt to use SetRegion(), if failed report error
                 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
-                    cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
+                    cerr << "bamtools filter ERROR: set region failed. Check that REGION describes a valid range" << endl;
                     reader.Close();
-                    return 1;
+                    return false;
                 } 
               
                 // everything checks out, just iterate through specified region, filtering alignments
-               while ( reader.GetNextAlignment(al) )
+                while ( reader.GetNextAlignment(al) )
                     if ( CheckAlignment(al) ) 
                         writer.SaveAlignment(al);
-            } 
+                }
             
             // no index data available, we have to iterate through until we
             // find overlapping alignments
             else {
-               while ( reader.GetNextAlignment(al) ) {
+                while ( reader.GetNextAlignment(al) ) {
                     if ( (al.RefID >= region.LeftRefID)  && ((al.Position + al.Length) >= region.LeftPosition) &&
                          (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
                     {
@@ -814,27 +798,148 @@ bool FilterTool::FilterToolPrivate::Run(void) {
         
         // error parsing REGION string
         else {
-            cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
-            cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
+            cerr << "bamtools filter ERROR: could not parse REGION: " << m_settings->Region << endl;
+            cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
+                 << endl;
             reader.Close();
-            return 1;
+            return false;
         }
     }
 
     // clean up & exit
     reader.Close();
     writer.Close();
-    return 0;
+    return true;
 }
 
 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
   
-    // add known properties to FilterEngine<BamAlignmentChecker>
+    // set up filter engine with supported properties
     InitProperties();
     
     // parse script for filter rules, if given
-    if ( m_settings->HasScriptFilename ) return ParseScript();
+    if ( m_settings->HasScript )
+        return ParseScript();
     
     // otherwise check command line for filters
     else return ParseCommandLine();
 }
+
+// ---------------------------------------------
+// FilterTool implementation
+
+FilterTool::FilterTool(void)
+    : AbstractTool()
+    , m_settings(new FilterSettings)
+    , m_impl(0)
+{
+    // ----------------------------------
+    // set program details
+
+    const string usage = "[-in <filename> -in <filename> ... | -list <filelist>] "
+                         "[-out <filename> | [-forceCompression]] [-region <REGION>] "
+                         "[ [-script <filename] | [filterOptions] ]";
+
+    Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", usage );
+
+    // ----------------------------------
+    // I/O options
+
+    OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+
+    const string inDesc     = "the input BAM file(s)";
+    const string listDesc   = "the input BAM file list, one line per file";
+    const string outDesc    = "the output BAM file";
+    const string regionDesc = "only read data from this genomic region (see documentation for more details)";
+    const string scriptDesc = "the filter script file (see documentation for more details)";
+    const string forceDesc  = "if results are sent to stdout (like when piping to another tool), "
+                              "default behavior is to leave output uncompressed. Use this flag to "
+                              "override and force compression";
+
+    Options::AddValueOption("-in",     "BAM filename", inDesc,     "", m_settings->HasInput,  m_settings->InputFiles,     IO_Opts, Options::StandardIn());
+    Options::AddValueOption("-list",   "filename",     listDesc,   "", m_settings->HasInputFilelist,  m_settings->InputFilelist, IO_Opts);
+    Options::AddValueOption("-out",    "BAM filename", outDesc,    "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+    Options::AddValueOption("-region", "REGION",       regionDesc, "", m_settings->HasRegion, m_settings->Region,         IO_Opts);
+    Options::AddValueOption("-script", "filename",     scriptDesc, "", m_settings->HasScript, m_settings->ScriptFilename, IO_Opts);
+    Options::AddOption("-forceCompression",forceDesc, m_settings->IsForceCompression, IO_Opts);
+
+    // ----------------------------------
+    // general filter options
+
+    OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
+
+    const string flagDesc    = "keep reads with this *exact* alignment flag (for more detailed queries, see below)";
+    const string insertDesc  = "keep reads with insert size that matches pattern";
+    const string mapQualDesc = "keep reads with map quality that matches pattern";
+    const string nameDesc    = "keep reads with name that matches pattern";
+    const string queryDesc   = "keep reads with motif that matches pattern";
+    const string tagDesc     = "keep reads with this key=>value pair";
+
+    Options::AddValueOption("-alignmentFlag", "int",       flagDesc,    "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
+    Options::AddValueOption("-insertSize",    "int",       insertDesc,  "", m_settings->HasInsertSizeFilter,    m_settings->InsertSizeFilter,    FilterOpts);
+    Options::AddValueOption("-mapQuality",    "[0-255]",   mapQualDesc, "", m_settings->HasMapQualityFilter,    m_settings->MapQualityFilter,    FilterOpts);
+    Options::AddValueOption("-name",          "string",    nameDesc,    "", m_settings->HasNameFilter,          m_settings->NameFilter,          FilterOpts);
+    Options::AddValueOption("-queryBases",    "string",    queryDesc,   "", m_settings->HasQueryBasesFilter,    m_settings->QueryBasesFilter,    FilterOpts);
+    Options::AddValueOption("-tag",           "TAG:VALUE", tagDesc,     "", m_settings->HasTagFilter,           m_settings->TagFilter,           FilterOpts);
+
+    // ----------------------------------
+    // alignment flag filter options
+
+    OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
+
+    const string boolArg           = "true/false";
+    const string isDupDesc         = "keep only alignments that are marked as duplicate?";
+    const string isFailQcDesc      = "keep only alignments that failed QC?";
+    const string isFirstMateDesc   = "keep only alignments marked as first mate?";
+    const string isMappedDesc      = "keep only alignments that were mapped?";
+    const string isMateMappedDesc  = "keep only alignments with mates that mapped";
+    const string isMateReverseDesc = "keep only alignments with mate on reverese strand?";
+    const string isPairedDesc      = "keep only alignments that were sequenced as paired?";
+    const string isPrimaryDesc     = "keep only alignments marked as primary?";
+    const string isProperPairDesc  = "keep only alignments that passed PE resolution?";
+    const string isReverseDesc     = "keep only alignments on reverse strand?";
+    const string isSecondMateDesc  = "keep only alignments marked as second mate?";
+    const string isSingletonDesc   = "keep only singletons";
+
+    Options::AddValueOption("-isDuplicate",         boolArg, isDupDesc,         "", m_settings->HasIsDuplicateFilter,         m_settings->IsDuplicateFilter,         AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isFailedQC",          boolArg, isFailQcDesc,      "", m_settings->HasIsFailedQCFilter,          m_settings->IsFailedQCFilter,          AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isFirstMate",         boolArg, isFirstMateDesc,   "", m_settings->HasIsFirstMateFilter,         m_settings->IsFirstMateFilter,         AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isMapped",            boolArg, isMappedDesc,      "", m_settings->HasIsMappedFilter,            m_settings->IsMappedFilter,            AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isMateMapped",        boolArg, isMateMappedDesc,  "", m_settings->HasIsMateMappedFilter,        m_settings->IsMateMappedFilter,        AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isMateReverseStrand", boolArg, isMateReverseDesc, "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isPaired",            boolArg, isPairedDesc,      "", m_settings->HasIsPairedFilter,            m_settings->IsPairedFilter,            AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isPrimaryAlignment",  boolArg, isPrimaryDesc,     "", m_settings->HasIsPrimaryAlignmentFilter,  m_settings->IsPrimaryAlignmentFilter,  AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isProperPair",        boolArg, isProperPairDesc,  "", m_settings->HasIsProperPairFilter,        m_settings->IsProperPairFilter,        AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isReverseStrand",     boolArg, isReverseDesc,     "", m_settings->HasIsReverseStrandFilter,     m_settings->IsReverseStrandFilter,     AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isSecondMate",        boolArg, isSecondMateDesc,  "", m_settings->HasIsSecondMateFilter,        m_settings->IsSecondMateFilter,        AlignmentFlagOpts, TRUE_STR);
+    Options::AddValueOption("-isSingleton",         boolArg, isSingletonDesc,   "", m_settings->HasIsSingletonFilter,         m_settings->IsSingletonFilter,         AlignmentFlagOpts, TRUE_STR);
+}
+
+FilterTool::~FilterTool(void) {
+
+    delete m_settings;
+    m_settings = 0;
+
+    delete m_impl;
+    m_impl = 0;
+}
+
+int FilterTool::Help(void) {
+    Options::DisplayHelp();
+    return 0;
+}
+
+int FilterTool::Run(int argc, char* argv[]) {
+
+    // parse command line arguments
+    Options::Parse(argc, argv, 1);
+
+    // initialize FilterTool with settings
+    m_impl = new FilterToolPrivate(m_settings);
+
+    // run FilterTool, return success/fail
+    if ( m_impl->Run() )
+        return 0;
+    else
+        return 1;
+}