]> git.donarmstrong.com Git - bamtools.git/blob - src/toolkit/bamtools_filter.cpp
Major update to BamTools version 1.0
[bamtools.git] / src / toolkit / bamtools_filter.cpp
1 // ***************************************************************************
2 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 21 March 2011
7 // ---------------------------------------------------------------------------
8 // Filters BAM file(s) according to some user-specified criteria.
9 // ***************************************************************************
10
11 #include "bamtools_filter.h"
12
13 #include <api/BamMultiReader.h>
14 #include <api/BamWriter.h>
15 #include <utils/bamtools_filter_engine.h>
16 #include <utils/bamtools_options.h>
17 #include <utils/bamtools_utilities.h>
18 using namespace BamTools;
19
20 #include <jsoncpp/json.h>
21 using namespace Json;
22
23 #include <cstdio>
24 #include <iostream>
25 #include <sstream>
26 #include <string>
27 #include <vector>
28 using namespace std;
29   
30 namespace BamTools {
31   
32 // -------------------------------  
33 // string literal constants  
34
35 // property names
36 const string ALIGNMENTFLAG_PROPERTY       = "alignmentFlag";
37 const string CIGAR_PROPERTY               = "cigar";
38 const string INSERTSIZE_PROPERTY          = "insertSize";
39 const string ISDUPLICATE_PROPERTY         = "isDuplicate";
40 const string ISFAILEDQC_PROPERTY          = "isFailedQC";
41 const string ISFIRSTMATE_PROPERTY         = "isFirstMate";
42 const string ISMAPPED_PROPERTY            = "isMapped";
43 const string ISMATEMAPPED_PROPERTY        = "isMateMapped";
44 const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand";
45 const string ISPAIRED_PROPERTY            = "isPaired";
46 const string ISPRIMARYALIGNMENT_PROPERTY  = "isPrimaryAlignment";
47 const string ISPROPERPAIR_PROPERTY        = "isProperPair";
48 const string ISREVERSESTRAND_PROPERTY     = "isReverseStrand";
49 const string ISSECONDMATE_PROPERTY        = "isSecondMate";
50 const string MAPQUALITY_PROPERTY          = "mapQuality";
51 const string MATEPOSITION_PROPERTY        = "matePosition";
52 const string MATEREFERENCE_PROPERTY       = "mateReference";
53 const string NAME_PROPERTY                = "name";
54 const string POSITION_PROPERTY            = "position";
55 const string QUERYBASES_PROPERTY          = "queryBases";
56 const string REFERENCE_PROPERTY           = "reference";
57 const string TAG_PROPERTY                 = "tag";
58
59 // boolalpha
60 const string TRUE_STR  = "true";
61 const string FALSE_STR = "false";
62     
63 RefVector filterToolReferences;    
64     
65 struct BamAlignmentChecker {
66     bool check(const PropertyFilter& filter, const BamAlignment& al) {
67       
68         bool keepAlignment = true;
69         const PropertyMap& properties = filter.Properties;
70         PropertyMap::const_iterator propertyIter = properties.begin();
71         PropertyMap::const_iterator propertyEnd  = properties.end();
72         for ( ; propertyIter != propertyEnd; ++propertyIter ) {
73           
74             // check alignment data field depending on propertyName
75             const string& propertyName = (*propertyIter).first;
76             const PropertyFilterValue& valueFilter = (*propertyIter).second;
77             
78             if      ( propertyName == ALIGNMENTFLAG_PROPERTY )  keepAlignment &= valueFilter.check(al.AlignmentFlag);
79             else if ( propertyName == CIGAR_PROPERTY ) {
80                 stringstream cigarSs;
81                 const vector<CigarOp>& cigarData = al.CigarData;
82                 if ( !cigarData.empty() ) {
83                     vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
84                     vector<CigarOp>::const_iterator cigarIter = cigarBegin;
85                     vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
86                     for ( ; cigarIter != cigarEnd; ++cigarIter ) {
87                         const CigarOp& op = (*cigarIter);
88                         cigarSs << op.Length << op.Type;
89                     }
90                     keepAlignment &= valueFilter.check(cigarSs.str());
91                 }
92             }
93             else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= valueFilter.check(al.InsertSize);
94             else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsDuplicate());
95             else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= valueFilter.check(al.IsFailedQC());
96             else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsFirstMate());
97             else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsMapped());
98             else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= valueFilter.check(al.IsMateMapped());
99             else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
100             else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsPaired());
101             else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
102             else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
103             else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
104             else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
105             else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
106             else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
107             else if ( propertyName == MATEREFERENCE_PROPERTY ) {
108                 if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
109                 BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
110                 const string& refName = filterToolReferences.at(al.MateRefID).RefName;
111                 keepAlignment &= valueFilter.check(refName);
112             }
113             else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= valueFilter.check(al.Name);
114             else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= valueFilter.check(al.Position);
115             else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= valueFilter.check(al.QueryBases);
116             else if ( propertyName == REFERENCE_PROPERTY ) {
117                 BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
118                 const string& refName = filterToolReferences.at(al.RefID).RefName;
119                 keepAlignment &= valueFilter.check(refName);
120             }
121             else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
122             else BAMTOOLS_ASSERT_UNREACHABLE;
123             
124             // if alignment fails at ANY point, just quit and return false
125             if ( !keepAlignment ) return false;
126         }
127       
128         BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
129         return keepAlignment;
130     }
131     
132     bool checkAlignmentTag(const PropertyFilterValue& valueFilter, const BamAlignment& al) {
133      
134         // ensure filter contains string data
135         Variant entireTagFilter = valueFilter.Value;
136         if ( !entireTagFilter.is_type<string>() ) return false;
137
138         // localize string from variant
139         const string& entireTagFilterString = entireTagFilter.get<string>();
140
141         // ensure we have at least "XX:x"
142         if ( entireTagFilterString.length() < 4 ) return false;
143
144         // get tagName & lookup in alignment
145         // if found, set tagType to tag type character
146         // if not found, return false
147         const string& tagName = entireTagFilterString.substr(0,2);
148         char tagType = '\0';
149         if ( !al.GetTagType(tagName, tagType) ) return false;
150
151         // remove tagName & ":" from beginning tagFilter
152         string tagFilterString = entireTagFilterString.substr(3);
153
154         // switch on tag type to set tag query value & parse filter token
155         int32_t  intFilterValue,    intQueryValue;
156         uint32_t uintFilterValue,   uintQueryValue;
157         float    realFilterValue,   realQueryValue;
158         string   stringFilterValue, stringQueryValue;
159
160         PropertyFilterValue tagFilter;
161         PropertyFilterValue::ValueCompareType compareType;
162         bool keepAlignment = false;
163         switch (tagType) {
164
165             // signed int tag type
166             case 'c' :
167             case 's' :
168             case 'i' :
169                 if ( al.GetTag(tagName, intQueryValue) ) {
170                     if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
171                         tagFilter.Value = intFilterValue;
172                         tagFilter.Type  = compareType;
173                         keepAlignment   = tagFilter.check(intQueryValue);
174                     }
175                 }
176                 break;
177
178             // unsigned int tag type
179             case 'C' :
180             case 'S' :
181             case 'I' :
182                 if ( al.GetTag(tagName, uintQueryValue) ) {
183                     if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
184                         tagFilter.Value = uintFilterValue;
185                         tagFilter.Type  = compareType;
186                         keepAlignment   = tagFilter.check(uintQueryValue);
187                     }
188                 }
189                 break;
190
191             // 'real' tag type
192             case 'f' :
193                 if ( al.GetTag(tagName, realQueryValue) ) {
194                     if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
195                         tagFilter.Value = realFilterValue;
196                         tagFilter.Type  = compareType;
197                         keepAlignment   = tagFilter.check(realQueryValue);
198                     }
199                 }
200                 break;
201
202             // string tag type
203             case 'A':
204             case 'Z':
205             case 'H':
206                 if ( al.GetTag(tagName, stringQueryValue) ) {
207                     if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
208                         tagFilter.Value = stringFilterValue;
209                         tagFilter.Type  = compareType;
210                         keepAlignment   = tagFilter.check(stringQueryValue);
211                     }
212                 }
213                 break;
214
215             // unknown tag type
216             default :
217                 keepAlignment = false;
218         }
219
220         return keepAlignment;
221     }
222 };    
223     
224 } // namespace BamTools
225   
226 // ---------------------------------------------
227 // FilterToolPrivate declaration
228
229 class FilterTool::FilterToolPrivate {
230       
231     // ctor & dtor
232     public:
233         FilterToolPrivate(FilterTool::FilterSettings* settings);
234         ~FilterToolPrivate(void);  
235         
236     // 'public' interface
237     public:
238         bool Run(void);
239         
240     // internal methods
241     private:
242         bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
243         bool CheckAlignment(const BamAlignment& al);
244         const string GetScriptContents(void);
245         void InitProperties(void);
246         bool ParseCommandLine(void);
247         bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
248         bool ParseScript(void);
249         bool SetupFilters(void);
250         
251     // data members
252     private:
253         vector<string> m_propertyNames;
254         FilterTool::FilterSettings* m_settings;
255         FilterEngine<BamAlignmentChecker> m_filterEngine;
256 };
257   
258 // ---------------------------------------------
259 // FilterSettings implementation
260
261 struct FilterTool::FilterSettings {
262
263     // ----------------------------------
264     // IO opts
265   
266     // flags
267     bool HasInputBamFilename;
268     bool HasOutputBamFilename;
269     bool HasRegion;
270     bool HasScriptFilename;
271     bool IsForceCompression;
272     
273     // filenames
274     vector<string> InputFiles;
275     string OutputFilename;
276     string Region;
277     string ScriptFilename;
278     
279     // -----------------------------------
280     // General filter opts
281     
282     // flags
283     bool HasAlignmentFlagFilter;
284     bool HasInsertSizeFilter;
285     bool HasMapQualityFilter;
286     bool HasNameFilter;
287     bool HasQueryBasesFilter;
288     bool HasTagFilter; //(s)
289
290     // filters
291     string AlignmentFlagFilter;
292     string InsertSizeFilter;
293     string NameFilter;
294     string MapQualityFilter;
295     string QueryBasesFilter;
296     string TagFilter;  // support multiple ?
297
298     // -----------------------------------
299     // AlignmentFlag filter opts
300     
301     // flags
302     bool HasIsDuplicateFilter;
303     bool HasIsFailedQCFilter;
304     bool HasIsFirstMateFilter;
305     bool HasIsMappedFilter;
306     bool HasIsMateMappedFilter;
307     bool HasIsMateReverseStrandFilter;
308     bool HasIsPairedFilter;
309     bool HasIsPrimaryAlignmentFilter;
310     bool HasIsProperPairFilter;
311     bool HasIsReverseStrandFilter;
312     bool HasIsSecondMateFilter;
313     
314     // filters
315     string IsDuplicateFilter;
316     string IsFailedQCFilter;
317     string IsFirstMateFilter;
318     string IsMappedFilter;
319     string IsMateMappedFilter;
320     string IsMateReverseStrandFilter;
321     string IsPairedFilter;
322     string IsPrimaryAlignmentFilter;
323     string IsProperPairFilter;
324     string IsReverseStrandFilter;
325     string IsSecondMateFilter;
326     
327     // ---------------------------------
328     // constructor
329     
330     FilterSettings(void)
331         : HasInputBamFilename(false)
332         , HasOutputBamFilename(false)
333         , HasRegion(false)
334         , HasScriptFilename(false)
335         , IsForceCompression(false)
336         , OutputFilename(Options::StandardOut())
337         , HasAlignmentFlagFilter(false)
338         , HasInsertSizeFilter(false)
339         , HasMapQualityFilter(false)
340         , HasNameFilter(false)
341         , HasQueryBasesFilter(false)
342         , HasTagFilter(false)
343         , HasIsDuplicateFilter(false)
344         , HasIsFailedQCFilter(false)
345         , HasIsFirstMateFilter(false)
346         , HasIsMappedFilter(false)
347         , HasIsMateMappedFilter(false)
348         , HasIsMateReverseStrandFilter(false)
349         , HasIsPairedFilter(false)
350         , HasIsPrimaryAlignmentFilter(false)
351         , HasIsProperPairFilter(false)
352         , HasIsReverseStrandFilter(false)
353         , HasIsSecondMateFilter(false)
354         , IsDuplicateFilter(TRUE_STR)
355         , IsFailedQCFilter(TRUE_STR)
356         , IsFirstMateFilter(TRUE_STR)
357         , IsMappedFilter(TRUE_STR)
358         , IsMateMappedFilter(TRUE_STR)
359         , IsMateReverseStrandFilter(TRUE_STR)
360         , IsPairedFilter(TRUE_STR)
361         , IsPrimaryAlignmentFilter(TRUE_STR)
362         , IsProperPairFilter(TRUE_STR)
363         , IsReverseStrandFilter(TRUE_STR)
364         , IsSecondMateFilter(TRUE_STR)
365     { }
366 };  
367
368 // ---------------------------------------------
369 // FilterTool implementation
370
371 FilterTool::FilterTool(void)
372     : AbstractTool()
373     , m_settings(new FilterSettings)
374     , m_impl(0)
375 {
376     // set program details
377     Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>] [ [-script <filename] | [filterOptions] ]");
378     
379     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
380     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename,  m_settings->InputFiles,     IO_Opts, Options::StandardIn());
381     Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
382     Options::AddValueOption("-region", "REGION",       "only read data from this genomic region (see documentation for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
383     Options::AddValueOption("-script", "filename",     "the filter script file (see documentation for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
384     Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
385     
386     OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
387     Options::AddValueOption("-alignmentFlag", "int",        "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
388     Options::AddValueOption("-insertSize",    "int",        "keep reads with insert size that mathces pattern",             "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
389     Options::AddValueOption("-mapQuality",    "[0-255]",    "keep reads with map quality that matches pattern",             "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
390     Options::AddValueOption("-name",          "string",     "keep reads with name that matches pattern",                    "", m_settings->HasNameFilter,       m_settings->NameFilter,       FilterOpts);
391     Options::AddValueOption("-queryBases",    "string",     "keep reads with motif that mathces pattern",                   "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
392     Options::AddValueOption("-tag",           "TAG:VALUE",  "keep reads with this key=>value pair",                         "", m_settings->HasTagFilter,        m_settings->TagFilter,        FilterOpts);
393     
394     OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
395     Options::AddValueOption("-isDuplicate",         "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter,         m_settings->IsDuplicateFilter,         AlignmentFlagOpts, TRUE_STR);
396     Options::AddValueOption("-isFailedQC",          "true/false", "keep only alignments that failed QC?",               "", m_settings->HasIsFailedQCFilter,          m_settings->IsFailedQCFilter,          AlignmentFlagOpts, TRUE_STR);
397     Options::AddValueOption("-isFirstMate",         "true/false", "keep only alignments marked as first mate?",         "", m_settings->HasIsFirstMateFilter,         m_settings->IsFirstMateFilter,         AlignmentFlagOpts, TRUE_STR);
398     Options::AddValueOption("-isMapped",            "true/false", "keep only alignments that were mapped?",             "", m_settings->HasIsMappedFilter,            m_settings->IsMappedFilter,            AlignmentFlagOpts, TRUE_STR);
399     Options::AddValueOption("-isMateMapped",        "true/false", "keep only alignments with mates that mapped",        "", m_settings->HasIsMateMappedFilter,        m_settings->IsMateMappedFilter,        AlignmentFlagOpts, TRUE_STR);
400     Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
401     Options::AddValueOption("-isPaired",            "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter,            m_settings->IsPairedFilter,            AlignmentFlagOpts, TRUE_STR);
402     Options::AddValueOption("-isPrimaryAlignment",  "true/false", "keep only alignments marked as primary?",            "", m_settings->HasIsPrimaryAlignmentFilter,  m_settings->IsPrimaryAlignmentFilter,  AlignmentFlagOpts, TRUE_STR);
403     Options::AddValueOption("-isProperPair",        "true/false", "keep only alignments that passed PE resolution?",    "", m_settings->HasIsProperPairFilter,        m_settings->IsProperPairFilter,        AlignmentFlagOpts, TRUE_STR);
404     Options::AddValueOption("-isReverseStrand",     "true/false", "keep only alignments on reverse strand?",            "", m_settings->HasIsReverseStrandFilter,     m_settings->IsReverseStrandFilter,     AlignmentFlagOpts, TRUE_STR);
405     Options::AddValueOption("-isSecondMate",        "true/false", "keep only alignments marked as second mate?",        "", m_settings->HasIsSecondMateFilter,        m_settings->IsSecondMateFilter,        AlignmentFlagOpts, TRUE_STR);
406 }
407
408 FilterTool::~FilterTool(void) {
409     delete m_settings;
410     m_settings = 0;
411     
412     delete m_impl;
413     m_impl = 0;
414 }
415
416 int FilterTool::Help(void) {
417     Options::DisplayHelp();
418     return 0;
419 }
420
421 int FilterTool::Run(int argc, char* argv[]) {
422   
423     // parse command line arguments
424     Options::Parse(argc, argv, 1);
425     
426     // run internal FilterTool implementation, return success/fail
427     m_impl = new FilterToolPrivate(m_settings);
428     
429     if ( m_impl->Run() ) return 0;
430     else return 1;
431 }
432  
433 // ---------------------------------------------
434 // FilterToolPrivate implementation
435   
436 // constructor  
437 FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
438     : m_settings(settings)
439 { }  
440   
441 // destructor
442 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
443
444 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName,
445                                                               const map<string,
446                                                               string>& propertyTokens)
447 {
448     // dummy temp values for token parsing
449     bool boolValue;
450     int32_t int32Value;
451     uint16_t uint16Value;
452     uint32_t uint32Value;
453     string stringValue;
454     PropertyFilterValue::ValueCompareType type;
455   
456     // iterate over property token map
457     map<string, string>::const_iterator mapIter = propertyTokens.begin();
458     map<string, string>::const_iterator mapEnd  = propertyTokens.end();
459     for ( ; mapIter != mapEnd; ++mapIter ) {
460       
461         const string& propertyName = (*mapIter).first;
462         const string& token        = (*mapIter).second;
463         
464         // ------------------------------
465         // convert token to value & compare type 
466         // then add to filter engine
467         
468         // bool conversion
469         if ( propertyName == ISDUPLICATE_PROPERTY ||
470              propertyName == ISFAILEDQC_PROPERTY ||
471              propertyName == ISFIRSTMATE_PROPERTY ||
472              propertyName == ISMAPPED_PROPERTY ||
473              propertyName == ISMATEMAPPED_PROPERTY ||
474              propertyName == ISMATEREVERSESTRAND_PROPERTY ||
475              propertyName == ISPAIRED_PROPERTY ||
476              propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
477              propertyName == ISPROPERPAIR_PROPERTY ||
478              propertyName == ISREVERSESTRAND_PROPERTY ||
479              propertyName == ISSECONDMATE_PROPERTY
480            ) 
481         {
482             FilterEngine<BamAlignmentChecker>::parseToken(token, boolValue, type);
483             m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
484         }
485         
486         // int32_t conversion
487         else if ( propertyName == INSERTSIZE_PROPERTY ||
488                   propertyName == MATEPOSITION_PROPERTY ||
489                   propertyName == POSITION_PROPERTY 
490                 ) 
491         {
492             FilterEngine<BamAlignmentChecker>::parseToken(token, int32Value, type);
493             m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
494         }
495         
496         // uint16_t conversion
497         else if ( propertyName == MAPQUALITY_PROPERTY )
498         {
499             FilterEngine<BamAlignmentChecker>::parseToken(token, uint16Value, type);
500             m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
501         }
502         
503         // uint32_t conversion
504         else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
505         {
506             FilterEngine<BamAlignmentChecker>::parseToken(token, uint32Value, type);
507             m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
508         }
509         
510         // string conversion
511         else if ( propertyName == CIGAR_PROPERTY || 
512                   propertyName == MATEREFERENCE_PROPERTY ||
513                   propertyName == NAME_PROPERTY ||
514                   propertyName == QUERYBASES_PROPERTY ||
515                   propertyName == REFERENCE_PROPERTY 
516                 ) 
517         {
518             FilterEngine<BamAlignmentChecker>::parseToken(token, stringValue, type);
519             m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
520         }
521       
522     else if ( propertyName == TAG_PROPERTY ) {
523             // this will be stored directly as the TAG:VALUE token 
524             // (VALUE may contain compare ops, will be parsed out later)
525             m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
526         }
527       
528         // else unknown property 
529         else {
530             cerr << "bamtools filter ERROR: unknown property - " << propertyName << endl;
531             return false;
532         }
533     }
534     return true;
535 }
536
537 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
538     return m_filterEngine.check(al);
539 }
540
541 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
542   
543     // open script for reading
544     FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
545     if ( !inFile ) {
546         cerr << "bamtools filter ERROR: could not open script: "
547              << m_settings->ScriptFilename << " for reading" << endl;
548         return false;
549     }
550     
551     // read in entire script contents  
552     char buffer[1024];
553     ostringstream docStream("");
554     while ( true ) {
555         
556         // peek ahead, make sure there is data available
557         char ch = fgetc(inFile);
558         ungetc(ch, inFile);
559         if( feof(inFile) ) break;       
560         
561         // read next block of data
562         if ( fgets(buffer, 1024, inFile) == 0 ) {
563             cerr << "bamtools filter ERROR: could not read script contents" << endl;
564             return false;
565         }
566         
567         docStream << buffer;
568     }
569     
570     // close script file
571     fclose(inFile);
572     
573     // import buffer contents to document, return
574     string document = docStream.str();
575     return document;
576 }
577
578 void FilterTool::FilterToolPrivate::InitProperties(void) {
579   
580     // store property names in vector 
581     m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
582     m_propertyNames.push_back(CIGAR_PROPERTY);
583     m_propertyNames.push_back(INSERTSIZE_PROPERTY);
584     m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
585     m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
586     m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
587     m_propertyNames.push_back(ISMAPPED_PROPERTY);
588     m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
589     m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
590     m_propertyNames.push_back(ISPAIRED_PROPERTY);
591     m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
592     m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
593     m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
594     m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
595     m_propertyNames.push_back(MAPQUALITY_PROPERTY);
596     m_propertyNames.push_back(MATEPOSITION_PROPERTY);
597     m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
598     m_propertyNames.push_back(NAME_PROPERTY);
599     m_propertyNames.push_back(POSITION_PROPERTY);
600     m_propertyNames.push_back(QUERYBASES_PROPERTY);
601     m_propertyNames.push_back(REFERENCE_PROPERTY);
602     m_propertyNames.push_back(TAG_PROPERTY);
603     
604     // add vector contents to FilterEngine<BamAlignmentChecker>
605     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
606     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
607     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
608         m_filterEngine.addProperty((*propertyNameIter));
609 }
610
611 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
612   
613     // add a rule set to filter engine
614     const string CMD = "COMMAND_LINE";
615     m_filterEngine.addFilter(CMD);
616
617     // map property names to command line args
618     map<string, string> propertyTokens;
619     if ( m_settings->HasAlignmentFlagFilter )       propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY,       m_settings->AlignmentFlagFilter) );
620     if ( m_settings->HasInsertSizeFilter )          propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY,          m_settings->InsertSizeFilter) );
621     if ( m_settings->HasIsDuplicateFilter )         propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY,         m_settings->IsDuplicateFilter) );
622     if ( m_settings->HasIsFailedQCFilter )          propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY,          m_settings->IsFailedQCFilter) );
623     if ( m_settings->HasIsFirstMateFilter )         propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY,         m_settings->IsFirstMateFilter) );
624     if ( m_settings->HasIsMappedFilter )            propertyTokens.insert( make_pair(ISMAPPED_PROPERTY,            m_settings->IsMappedFilter) );
625     if ( m_settings->HasIsMateMappedFilter )        propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY,        m_settings->IsMateMappedFilter) );
626     if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
627     if ( m_settings->HasIsPairedFilter )            propertyTokens.insert( make_pair(ISPAIRED_PROPERTY,            m_settings->IsPairedFilter) );
628     if ( m_settings->HasIsPrimaryAlignmentFilter )  propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY,  m_settings->IsPrimaryAlignmentFilter) );
629     if ( m_settings->HasIsProperPairFilter )        propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY,        m_settings->IsProperPairFilter) );
630     if ( m_settings->HasIsReverseStrandFilter )     propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY,     m_settings->IsReverseStrandFilter) );
631     if ( m_settings->HasIsSecondMateFilter )        propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY,        m_settings->IsSecondMateFilter) );
632     if ( m_settings->HasMapQualityFilter )          propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY,          m_settings->MapQualityFilter) );
633     if ( m_settings->HasNameFilter )                propertyTokens.insert( make_pair(NAME_PROPERTY,                m_settings->NameFilter) );
634     if ( m_settings->HasQueryBasesFilter )          propertyTokens.insert( make_pair(QUERYBASES_PROPERTY,          m_settings->QueryBasesFilter) );
635     if ( m_settings->HasTagFilter )                 propertyTokens.insert( make_pair(TAG_PROPERTY,                 m_settings->TagFilter) );
636     
637     // send add these properties to filter set "COMMAND_LINE"
638     return AddPropertyTokensToFilter(CMD, propertyTokens);
639 }
640
641 bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
642   
643     // filter object parsing variables
644     Json::Value null(Json::nullValue);
645     Json::Value propertyValue;
646     
647     // store results
648     map<string, string> propertyTokens;
649     
650     // iterate over known properties
651     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
652     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
653     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
654         const string& propertyName = (*propertyNameIter);
655         
656         // if property defined in filter, add to token list
657         propertyValue = filterObject.get(propertyName, null);
658         if ( propertyValue != null ) 
659             propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
660     }
661   
662     // add this filter to engin
663     m_filterEngine.addFilter(filterName);
664   
665     // add token list to this filter
666     return AddPropertyTokensToFilter(filterName, propertyTokens);
667 }
668
669 bool FilterTool::FilterToolPrivate::ParseScript(void) {
670   
671     // read in script contents from file
672     const string document = GetScriptContents();
673     
674     // set up JsonCPP reader and attempt to parse script
675     Json::Value root;
676     Json::Reader reader;
677     if ( !reader.parse(document, root) ) {
678         // use built-in error reporting mechanism to alert user what was wrong with the script
679         cerr  << "bamtools filter ERROR: failed to parse script - see error message(s) below" << endl
680               << reader.getFormatedErrorMessages();
681         return false;     
682     }
683
684     // initialize return status
685     bool success = true;
686     
687     // see if root object contains multiple filters
688     const Json::Value filters = root["filters"];
689     if ( !filters.isNull() ) {
690       
691         // iterate over any filters found
692         int filterIndex = 0;
693         Json::Value::const_iterator filtersIter = filters.begin();
694         Json::Value::const_iterator filtersEnd  = filters.end();
695         for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
696             Json::Value filter = (*filtersIter);
697             
698             // convert filter index to string
699             string filterName;
700             
701             // if id tag supplied
702             const Json::Value id = filter["id"];
703             if ( !id.isNull() ) 
704                 filterName = id.asString();
705             
706             // use array index 
707             else {
708                 stringstream convert;
709                 convert << filterIndex;
710                 filterName = convert.str();
711             }
712             
713             // create & parse filter 
714             success &= ParseFilterObject(filterName, filter);
715         }
716         
717         // see if user defined a "rule" for these filters
718         // otherwise, use filter engine's default rule behavior
719         string ruleString("");
720         const Json::Value rule = root["rule"];
721         if ( rule.isString() )
722             ruleString = rule.asString();
723         m_filterEngine.setRule(ruleString);
724           
725         // return success/fail
726         return success;
727     } 
728     
729     // otherwise, root is the only filter (just contains properties)
730     // create & parse filter named "ROOT"
731     else success = ParseFilterObject("ROOT", root);
732     
733     // return success/failure
734     return success;
735 }
736
737 bool FilterTool::FilterToolPrivate::Run(void) {
738     
739     // set to default input if none provided
740     if ( !m_settings->HasInputBamFilename ) 
741         m_settings->InputFiles.push_back(Options::StandardIn());
742
743     // initialize defined properties & user-specified filters
744     // quit if failed
745     if ( !SetupFilters() ) return false;
746
747     // open reader without index
748     BamMultiReader reader;
749     if ( !reader.Open(m_settings->InputFiles) ) {
750         cerr << "bamtools filter ERROR: could not open input files for reading." << endl;
751         return false;
752     }
753
754     // retrieve reader header & reference data
755     const string headerText = reader.GetHeaderText();
756     filterToolReferences = reader.GetReferenceData();
757     
758     // determine compression mode for BamWriter
759     bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
760                               !m_settings->IsForceCompression );
761     BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
762     if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
763
764     // open BamWriter
765     BamWriter writer;
766     writer.SetCompressionMode(compressionMode);
767     if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences) ) {
768         cerr << "bamtools filter ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl;
769         reader.Close();
770         return false;
771     }
772
773     // if no region specified, filter entire file 
774     BamAlignment al;
775     if ( !m_settings->HasRegion ) {
776         while ( reader.GetNextAlignment(al) ) {
777             if ( CheckAlignment(al) ) 
778                 writer.SaveAlignment(al);
779         }
780     }
781     
782     // otherwise attempt to use region as constraint
783     else {
784         
785         // if region string parses OK
786         BamRegion region;
787         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
788
789             // attempt to find index files
790             reader.LocateIndexes();
791
792             // if index data available for all BAM files, we can use SetRegion
793             if ( reader.HasIndexes() ) {
794
795                 // attempt to use SetRegion(), if failed report error
796                 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
797                     cerr << "bamtools filter ERROR: set region failed. Check that REGION describes a valid range" << endl;
798                     reader.Close();
799                     return false;
800                 } 
801               
802                 // everything checks out, just iterate through specified region, filtering alignments
803                 while ( reader.GetNextAlignment(al) )
804                     if ( CheckAlignment(al) ) 
805                         writer.SaveAlignment(al);
806                 }
807             
808             // no index data available, we have to iterate through until we
809             // find overlapping alignments
810             else {
811                 while ( reader.GetNextAlignment(al) ) {
812                     if ( (al.RefID >= region.LeftRefID)  && ((al.Position + al.Length) >= region.LeftPosition) &&
813                          (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
814                     {
815                         if ( CheckAlignment(al) ) 
816                             writer.SaveAlignment(al);
817                     }
818                 }
819             }
820         } 
821         
822         // error parsing REGION string
823         else {
824             cerr << "bamtools filter ERROR: could not parse REGION: " << m_settings->Region << endl;
825             cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
826                  << endl;
827             reader.Close();
828             return false;
829         }
830     }
831
832     // clean up & exit
833     reader.Close();
834     writer.Close();
835     return true;
836 }
837
838 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
839   
840     // set up filter engine with supported properties
841     InitProperties();
842     
843     // parse script for filter rules, if given
844     if ( m_settings->HasScriptFilename )
845         return ParseScript();
846     
847     // otherwise check command line for filters
848     else return ParseCommandLine();
849 }