]> git.donarmstrong.com Git - bamtools.git/blob - src/toolkit/bamtools_filter.cpp
Added tag support in filter tool.
[bamtools.git] / src / toolkit / bamtools_filter.cpp
1 // ***************************************************************************
2 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 4 October 2010
7 // ---------------------------------------------------------------------------
8 // Filters BAM file(s) according to some user-specified criteria.
9 // ***************************************************************************
10
11 #include <cstdio>
12 #include <iostream>
13 #include <sstream>
14 #include <string>
15 #include <vector>
16 #include "bamtools_filter.h"
17 #include "bamtools_filter_engine.h"
18 #include "bamtools_options.h"
19 #include "bamtools_utilities.h"
20 #include "BamReader.h"
21 #include "BamMultiReader.h"
22 #include "BamWriter.h"
23 #include "jsoncpp/json.h"
24 using namespace std;
25 using namespace BamTools; 
26 using namespace Json;
27   
28 namespace BamTools {
29   
30 // -------------------------------  
31 // string literal constants  
32
33 // property names
34 const string ALIGNMENTFLAG_PROPERTY       = "alignmentFlag";
35 const string CIGAR_PROPERTY               = "cigar";
36 const string INSERTSIZE_PROPERTY          = "insertSize";
37 const string ISDUPLICATE_PROPERTY         = "isDuplicate";
38 const string ISFAILEDQC_PROPERTY          = "isFailedQC";
39 const string ISFIRSTMATE_PROPERTY         = "isFirstMate";
40 const string ISMAPPED_PROPERTY            = "isMapped";
41 const string ISMATEMAPPED_PROPERTY        = "isMateMapped";
42 const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand";
43 const string ISPAIRED_PROPERTY            = "isPaired";
44 const string ISPRIMARYALIGNMENT_PROPERTY  = "isPrimaryAlignment";
45 const string ISPROPERPAIR_PROPERTY        = "isProperPair";
46 const string ISREVERSESTRAND_PROPERTY     = "isReverseStrand";
47 const string ISSECONDMATE_PROPERTY        = "isSecondMate";
48 const string MAPQUALITY_PROPERTY          = "mapQuality";
49 const string MATEPOSITION_PROPERTY        = "matePosition";
50 const string MATEREFERENCE_PROPERTY       = "mateReference";
51 const string NAME_PROPERTY                = "name";
52 const string POSITION_PROPERTY            = "position";
53 const string QUERYBASES_PROPERTY          = "queryBases";
54 const string REFERENCE_PROPERTY           = "reference";
55 const string TAG_PROPERTY                 = "tag";
56
57 // boolalpha
58 const string TRUE_STR  = "true";
59 const string FALSE_STR = "false";
60     
61 RefVector filterToolReferences;    
62     
63 struct BamAlignmentChecker {
64     bool check(const PropertyFilter& filter, const BamAlignment& al) {
65       
66         bool keepAlignment = true;
67         const PropertyMap& properties = filter.Properties;
68         PropertyMap::const_iterator propertyIter = properties.begin();
69         PropertyMap::const_iterator propertyEnd  = properties.end();
70         for ( ; propertyIter != propertyEnd; ++propertyIter ) {
71           
72             // check alignment data field depending on propertyName
73             const string& propertyName = (*propertyIter).first;
74             const PropertyFilterValue& valueFilter = (*propertyIter).second;
75             
76             if      ( propertyName == ALIGNMENTFLAG_PROPERTY )  keepAlignment &= valueFilter.check(al.AlignmentFlag);
77             else if ( propertyName == CIGAR_PROPERTY ) {
78                 stringstream cigarSs;
79                 const vector<CigarOp>& cigarData = al.CigarData;
80                 if ( !cigarData.empty() ) {
81                     vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
82                     vector<CigarOp>::const_iterator cigarIter = cigarBegin;
83                     vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
84                     for ( ; cigarIter != cigarEnd; ++cigarIter ) {
85                         const CigarOp& op = (*cigarIter);
86                         cigarSs << op.Length << op.Type;
87                     }
88                     keepAlignment &= valueFilter.check(cigarSs.str());
89                 }
90             }
91             else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= valueFilter.check(al.InsertSize);
92             else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsDuplicate());
93             else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= valueFilter.check(al.IsFailedQC());
94             else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsFirstMate());
95             else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsMapped());
96             else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= valueFilter.check(al.IsMateMapped());
97             else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
98             else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsPaired());
99             else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
100             else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
101             else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
102             else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
103             else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
104             else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
105             else if ( propertyName == MATEREFERENCE_PROPERTY ) {
106                 if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
107                 BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
108                 const string& refName = filterToolReferences.at(al.MateRefID).RefName;
109                 keepAlignment &= valueFilter.check(refName);
110             }
111             else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= valueFilter.check(al.Name);
112             else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= valueFilter.check(al.Position);
113             else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= valueFilter.check(al.QueryBases);
114             else if ( propertyName == REFERENCE_PROPERTY ) {
115                 BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
116                 const string& refName = filterToolReferences.at(al.RefID).RefName;
117                 keepAlignment &= valueFilter.check(refName);
118             }
119             else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
120             else BAMTOOLS_ASSERT_UNREACHABLE;
121             
122             // if alignment fails at ANY point, just quit and return false
123             if ( !keepAlignment ) return false;
124         }
125       
126         BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
127         return keepAlignment;
128     }
129     
130     bool checkAlignmentTag(const PropertyFilterValue& valueFilter, const BamAlignment& al) {
131      
132         // ensure filter contains string data
133         Variant entireTagFilter = valueFilter.Value;
134         if ( !entireTagFilter.is_type<string>() ) return false;
135         
136         // localize string from variant
137         const string& entireTagFilterString = entireTagFilter.get<string>();
138         
139         // ensure we have at least "XX:x"
140         if ( entireTagFilterString.length() < 4 ) return false;
141         
142         // get tagName & lookup in alignment
143         // if found, set tagType to tag type character
144         // if not found, return false
145         const string& tagName = entireTagFilterString.substr(0,2);
146         char tagType = '\0';
147         if ( !al.GetTagType(tagName, tagType) ) return false;
148           
149         // remove tagName & ":" from beginning tagFilter
150         string tagFilterString = entireTagFilterString.substr(3);
151           
152         // switch on tag type to set tag query value & parse filter token
153         int32_t  intFilterValue,    intQueryValue;
154         uint32_t uintFilterValue,   uintQueryValue;
155         float    realFilterValue,   realQueryValue;
156         string   stringFilterValue, stringQueryValue;
157         
158         PropertyFilterValue tagFilter;
159         PropertyFilterValue::ValueCompareType compareType;
160         bool keepAlignment = false;
161         switch (tagType) {
162             
163             // signed int tag type
164             case 'c' :
165             case 's' : 
166             case 'i' :
167                 if ( al.GetTag(tagName, intQueryValue) ) {
168                     if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
169                         tagFilter.Value = intFilterValue;
170                         tagFilter.Type  = compareType;
171                         keepAlignment   = tagFilter.check(intQueryValue);
172                     }
173                 }
174                 break;
175                 
176             // unsigned int tag type
177             case 'C' :
178             case 'S' :
179             case 'I' : 
180                 if ( al.GetTag(tagName, uintQueryValue) ) {
181                     if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
182                         tagFilter.Value = uintFilterValue;
183                         tagFilter.Type  = compareType;
184                         keepAlignment   = tagFilter.check(uintQueryValue);
185                     }
186                 }
187                 break;
188                 
189             // 'real' tag type
190             case 'f' :
191                 if ( al.GetTag(tagName, realQueryValue) ) {
192                     if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
193                         tagFilter.Value = realFilterValue;
194                         tagFilter.Type  = compareType;
195                         keepAlignment   = tagFilter.check(realQueryValue);
196                     }
197                 }
198                 break;
199                 
200             // string tag type
201             case 'A':
202             case 'Z':
203             case 'H':         
204                 if ( al.GetTag(tagName, stringQueryValue) ) {
205                     if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
206                         tagFilter.Value = stringFilterValue;
207                         tagFilter.Type  = compareType;
208                         keepAlignment   = tagFilter.check(stringQueryValue);
209                     }
210                 }
211                 break;
212                 
213             // unknown tag type
214             default : 
215                 keepAlignment = false;
216         }
217         
218         return keepAlignment;
219     }
220 };    
221     
222 } // namespace BamTools
223   
224 // ---------------------------------------------
225 // FilterToolPrivate declaration
226
227 class FilterTool::FilterToolPrivate {
228       
229     // ctor & dtor
230     public:
231         FilterToolPrivate(FilterTool::FilterSettings* settings);
232         ~FilterToolPrivate(void);  
233         
234     // 'public' interface
235     public:
236         bool Run(void);
237         
238     // internal methods
239     private:
240         bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
241         bool CheckAlignment(const BamAlignment& al);
242         const string GetScriptContents(void);
243         void InitProperties(void);
244         bool ParseCommandLine(void);
245         bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
246         bool ParseScript(void);
247         bool SetupFilters(void);
248         
249     // data members
250     private:
251         vector<string> m_propertyNames;
252         FilterTool::FilterSettings* m_settings;
253         FilterEngine<BamAlignmentChecker> m_filterEngine;
254 };
255   
256 // ---------------------------------------------
257 // FilterSettings implementation
258
259 struct FilterTool::FilterSettings {
260
261     // ----------------------------------
262     // IO opts
263   
264     // flags
265     bool HasInputBamFilename;
266     bool HasOutputBamFilename;
267     bool HasRegion;
268     bool HasScriptFilename;
269     bool IsForceCompression;
270     
271     // filenames
272     vector<string> InputFiles;
273     string OutputFilename;
274     string Region;
275     string ScriptFilename;
276     
277     // -----------------------------------
278     // General filter opts
279     
280     // flags
281     bool HasAlignmentFlagFilter;
282     bool HasInsertSizeFilter;
283     bool HasMapQualityFilter;
284     bool HasNameFilter;
285     bool HasQueryBasesFilter;
286     bool HasTagFilter; //(s)
287
288     // filters
289     string AlignmentFlagFilter;
290     string InsertSizeFilter;
291     string NameFilter;
292     string MapQualityFilter;
293     string QueryBasesFilter;
294     string TagFilter;  // support multiple ?
295
296     // -----------------------------------
297     // AlignmentFlag filter opts
298     
299     // flags
300     bool HasIsDuplicateFilter;
301     bool HasIsFailedQCFilter;
302     bool HasIsFirstMateFilter;
303     bool HasIsMappedFilter;
304     bool HasIsMateMappedFilter;
305     bool HasIsMateReverseStrandFilter;
306     bool HasIsPairedFilter;
307     bool HasIsPrimaryAlignmentFilter;
308     bool HasIsProperPairFilter;
309     bool HasIsReverseStrandFilter;
310     bool HasIsSecondMateFilter;
311     
312     // filters
313     string IsDuplicateFilter;
314     string IsFailedQCFilter;
315     string IsFirstMateFilter;
316     string IsMappedFilter;
317     string IsMateMappedFilter;
318     string IsMateReverseStrandFilter;
319     string IsPairedFilter;
320     string IsPrimaryAlignmentFilter;
321     string IsProperPairFilter;
322     string IsReverseStrandFilter;
323     string IsSecondMateFilter;
324     
325     // ---------------------------------
326     // constructor
327     
328     FilterSettings(void)
329         : HasInputBamFilename(false)
330         , HasOutputBamFilename(false)
331         , HasRegion(false)
332         , HasScriptFilename(false)
333         , IsForceCompression(false)
334         , OutputFilename(Options::StandardOut())
335         , HasAlignmentFlagFilter(false)
336         , HasInsertSizeFilter(false)
337         , HasMapQualityFilter(false)
338         , HasNameFilter(false)
339         , HasQueryBasesFilter(false)
340         , HasTagFilter(false)
341         , HasIsDuplicateFilter(false)
342         , HasIsFailedQCFilter(false)
343         , HasIsFirstMateFilter(false)
344         , HasIsMappedFilter(false)
345         , HasIsMateMappedFilter(false)
346         , HasIsMateReverseStrandFilter(false)
347         , HasIsPairedFilter(false)
348         , HasIsPrimaryAlignmentFilter(false)
349         , HasIsProperPairFilter(false)
350         , HasIsReverseStrandFilter(false)
351         , HasIsSecondMateFilter(false)
352         , IsDuplicateFilter(TRUE_STR)
353         , IsFailedQCFilter(TRUE_STR)
354         , IsFirstMateFilter(TRUE_STR)
355         , IsMappedFilter(TRUE_STR)
356         , IsMateMappedFilter(TRUE_STR)
357         , IsMateReverseStrandFilter(TRUE_STR)
358         , IsPairedFilter(TRUE_STR)
359         , IsPrimaryAlignmentFilter(TRUE_STR)
360         , IsProperPairFilter(TRUE_STR)
361         , IsReverseStrandFilter(TRUE_STR)
362         , IsSecondMateFilter(TRUE_STR)
363     { }
364 };  
365
366 // ---------------------------------------------
367 // FilterTool implementation
368
369 FilterTool::FilterTool(void)
370     : AbstractTool()
371     , m_settings(new FilterSettings)
372     , m_impl(0)
373 {
374     // set program details
375     Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "-in <filename> [-in <filename> ... ] -out <filename> -region <REGION> [ [-script <filename] | [filterOptions] ]");
376     
377     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
378     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename,  m_settings->InputFiles,     IO_Opts, Options::StandardIn());
379     Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
380     Options::AddValueOption("-region", "REGION",       "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
381     Options::AddValueOption("-script", "filename",     "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
382     Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
383     
384     OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
385     Options::AddValueOption("-alignmentFlag", "int",        "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
386     Options::AddValueOption("-insertSize",    "int",        "keep reads with insert size that mathces pattern",             "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
387     Options::AddValueOption("-mapQuality",    "[0-255]",    "keep reads with map quality that matches pattern",             "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
388     Options::AddValueOption("-name",          "string",     "keep reads with name that matches pattern",                    "", m_settings->HasNameFilter,       m_settings->NameFilter,       FilterOpts);
389     Options::AddValueOption("-queryBases",    "string",     "keep reads with motif that mathces pattern",                   "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
390     Options::AddValueOption("-tag",            "TAG:VALUE", "keep reads with this key=>value pair",                         "", m_settings->HasTagFilter,        m_settings->TagFilter,        FilterOpts);
391     
392     OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
393     Options::AddValueOption("-isDuplicate",         "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter,         m_settings->IsDuplicateFilter,         AlignmentFlagOpts, TRUE_STR);
394     Options::AddValueOption("-isFailedQC",          "true/false", "keep only alignments that failed QC?",               "", m_settings->HasIsFailedQCFilter,          m_settings->IsFailedQCFilter,          AlignmentFlagOpts, TRUE_STR);
395     Options::AddValueOption("-isFirstMate",         "true/false", "keep only alignments marked as first mate?",         "", m_settings->HasIsFirstMateFilter,         m_settings->IsFirstMateFilter,         AlignmentFlagOpts, TRUE_STR);
396     Options::AddValueOption("-isMapped",            "true/false", "keep only alignments that were mapped?",             "", m_settings->HasIsMappedFilter,            m_settings->IsMappedFilter,            AlignmentFlagOpts, TRUE_STR);
397     Options::AddValueOption("-isMateMapped",        "true/false", "keep only alignments with mates that mapped",        "", m_settings->HasIsMateMappedFilter,        m_settings->IsMateMappedFilter,        AlignmentFlagOpts, TRUE_STR);
398     Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
399     Options::AddValueOption("-isPaired",            "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter,            m_settings->IsPairedFilter,            AlignmentFlagOpts, TRUE_STR);
400     Options::AddValueOption("-isPrimaryAlignment",  "true/false", "keep only alignments marked as primary?",            "", m_settings->HasIsPrimaryAlignmentFilter,  m_settings->IsPrimaryAlignmentFilter,  AlignmentFlagOpts, TRUE_STR);
401     Options::AddValueOption("-isProperPair",        "true/false", "keep only alignments that passed PE resolution?",    "", m_settings->HasIsProperPairFilter,        m_settings->IsProperPairFilter,        AlignmentFlagOpts, TRUE_STR);
402     Options::AddValueOption("-isReverseStrand",     "true/false", "keep only alignments on reverse strand?",            "", m_settings->HasIsReverseStrandFilter,     m_settings->IsReverseStrandFilter,     AlignmentFlagOpts, TRUE_STR);
403     Options::AddValueOption("-isSecondMate",        "true/false", "keep only alignments marked as second mate?",        "", m_settings->HasIsSecondMateFilter,        m_settings->IsSecondMateFilter,        AlignmentFlagOpts, TRUE_STR);
404 }
405
406 FilterTool::~FilterTool(void) {
407     delete m_settings;
408     m_settings = 0;
409     
410     delete m_impl;
411     m_impl = 0;
412 }
413
414 int FilterTool::Help(void) {
415     Options::DisplayHelp();
416     return 0;
417 }
418
419 int FilterTool::Run(int argc, char* argv[]) {
420   
421     // parse command line arguments
422     Options::Parse(argc, argv, 1);
423     
424     // run internal FilterTool implementation, return success/fail
425     m_impl = new FilterToolPrivate(m_settings);
426     
427     if ( m_impl->Run() ) return 0;
428     else return 1;
429 }
430  
431 // ---------------------------------------------
432 // FilterToolPrivate implementation
433   
434 // constructor  
435 FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
436     : m_settings(settings)
437 { }  
438   
439 // destructor
440 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
441
442 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
443
444     // dummy temp values for token parsing
445     bool boolValue;
446     int32_t int32Value;
447     uint16_t uint16Value;
448     uint32_t uint32Value;
449     string stringValue;
450     PropertyFilterValue::ValueCompareType type;
451   
452     // iterate over property token map
453     map<string, string>::const_iterator mapIter = propertyTokens.begin();
454     map<string, string>::const_iterator mapEnd  = propertyTokens.end();
455     for ( ; mapIter != mapEnd; ++mapIter ) {
456       
457         const string& propertyName = (*mapIter).first;
458         const string& token        = (*mapIter).second;
459         
460         // ------------------------------
461         // convert token to value & compare type 
462         // then add to filter engine
463         
464         // bool conversion
465         if ( propertyName == ISDUPLICATE_PROPERTY ||
466              propertyName == ISFAILEDQC_PROPERTY ||
467              propertyName == ISFIRSTMATE_PROPERTY ||
468              propertyName == ISMAPPED_PROPERTY ||
469              propertyName == ISMATEMAPPED_PROPERTY ||
470              propertyName == ISMATEREVERSESTRAND_PROPERTY ||
471              propertyName == ISPAIRED_PROPERTY ||
472              propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
473              propertyName == ISPROPERPAIR_PROPERTY ||
474              propertyName == ISREVERSESTRAND_PROPERTY ||
475              propertyName == ISSECONDMATE_PROPERTY
476            ) 
477         {
478             FilterEngine<BamAlignmentChecker>::parseToken(token, boolValue, type);
479             m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
480         }
481         
482         // int32_t conversion
483         else if ( propertyName == INSERTSIZE_PROPERTY ||
484                   propertyName == MATEPOSITION_PROPERTY ||
485                   propertyName == POSITION_PROPERTY 
486                 ) 
487         {
488             FilterEngine<BamAlignmentChecker>::parseToken(token, int32Value, type);
489             m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
490         }
491         
492         // uint16_t conversion
493         else if ( propertyName == MAPQUALITY_PROPERTY )
494         {
495             FilterEngine<BamAlignmentChecker>::parseToken(token, uint16Value, type);
496             m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
497         }
498         
499         // uint32_t conversion
500         else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
501         {
502             FilterEngine<BamAlignmentChecker>::parseToken(token, uint32Value, type);
503             m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
504         }
505         
506         // string conversion
507         else if ( propertyName == CIGAR_PROPERTY || 
508                   propertyName == MATEREFERENCE_PROPERTY ||
509                   propertyName == NAME_PROPERTY ||
510                   propertyName == QUERYBASES_PROPERTY ||
511                   propertyName == REFERENCE_PROPERTY 
512                 ) 
513         {
514             FilterEngine<BamAlignmentChecker>::parseToken(token, stringValue, type);
515             m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
516         }
517       
518         else if (propertyName == TAG_PROPERTY ) {
519             // this will be stored directly as the TAG:VALUE token 
520             // (VALUE may contain compare ops, will be parsed out later)
521             m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
522         }
523       
524         // else unknown property 
525         else {
526             cerr << "Unknown property: " << propertyName << "!" << endl;
527             return false;
528         }
529     }
530     return true;
531 }
532
533 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
534     return m_filterEngine.check(al);
535 }
536
537 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
538   
539     // open script for reading
540     FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
541     if ( !inFile ) {
542         cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl;
543         return false;
544     }
545     
546     // read in entire script contents  
547     char buffer[1024];
548     ostringstream docStream("");
549     while ( true ) {
550         
551         // peek ahead, make sure there is data available
552         char ch = fgetc(inFile);
553         ungetc(ch, inFile);
554         if( feof(inFile) ) break;       
555         
556         // read next block of data
557         if ( fgets(buffer, 1024, inFile) == 0 ) {
558             cerr << "FilterTool error : could not read from script" << endl;
559             return false;
560         }
561         
562         docStream << buffer;
563     }
564     
565     // close script file
566     fclose(inFile);
567     
568     // import buffer contents to document, return
569     string document = docStream.str();
570     return document;
571 }
572
573 void FilterTool::FilterToolPrivate::InitProperties(void) {
574   
575     // store property names in vector 
576     m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
577     m_propertyNames.push_back(CIGAR_PROPERTY);
578     m_propertyNames.push_back(INSERTSIZE_PROPERTY);
579     m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
580     m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
581     m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
582     m_propertyNames.push_back(ISMAPPED_PROPERTY);
583     m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
584     m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
585     m_propertyNames.push_back(ISPAIRED_PROPERTY);
586     m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
587     m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
588     m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
589     m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
590     m_propertyNames.push_back(MAPQUALITY_PROPERTY);
591     m_propertyNames.push_back(MATEPOSITION_PROPERTY);
592     m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
593     m_propertyNames.push_back(NAME_PROPERTY);
594     m_propertyNames.push_back(POSITION_PROPERTY);
595     m_propertyNames.push_back(QUERYBASES_PROPERTY);
596     m_propertyNames.push_back(REFERENCE_PROPERTY);
597     m_propertyNames.push_back(TAG_PROPERTY);
598     
599     // add vector contents to FilterEngine<BamAlignmentChecker>
600     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
601     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
602     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
603         m_filterEngine.addProperty((*propertyNameIter));
604 }
605
606 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
607   
608     // add a rule set to filter engine
609     const string CMD = "COMMAND_LINE";
610     m_filterEngine.addFilter(CMD);
611
612     // map property names to command line args
613     map<string, string> propertyTokens;
614     if ( m_settings->HasAlignmentFlagFilter )       propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY,       m_settings->AlignmentFlagFilter) );
615     if ( m_settings->HasInsertSizeFilter )          propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY,          m_settings->InsertSizeFilter) );
616     if ( m_settings->HasIsDuplicateFilter )         propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY,         m_settings->IsDuplicateFilter) );
617     if ( m_settings->HasIsFailedQCFilter )          propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY,          m_settings->IsFailedQCFilter) );
618     if ( m_settings->HasIsFirstMateFilter )         propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY,         m_settings->IsFirstMateFilter) );
619     if ( m_settings->HasIsMappedFilter )            propertyTokens.insert( make_pair(ISMAPPED_PROPERTY,            m_settings->IsMappedFilter) );
620     if ( m_settings->HasIsMateMappedFilter )        propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY,        m_settings->IsMateMappedFilter) );
621     if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
622     if ( m_settings->HasIsPairedFilter )            propertyTokens.insert( make_pair(ISPAIRED_PROPERTY,            m_settings->IsPairedFilter) );
623     if ( m_settings->HasIsPrimaryAlignmentFilter )  propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY,  m_settings->IsPrimaryAlignmentFilter) );
624     if ( m_settings->HasIsProperPairFilter )        propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY,        m_settings->IsProperPairFilter) );
625     if ( m_settings->HasIsReverseStrandFilter )     propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY,     m_settings->IsReverseStrandFilter) );
626     if ( m_settings->HasIsSecondMateFilter )        propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY,        m_settings->IsSecondMateFilter) );
627     if ( m_settings->HasMapQualityFilter )          propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY,          m_settings->MapQualityFilter) );
628     if ( m_settings->HasNameFilter )                propertyTokens.insert( make_pair(NAME_PROPERTY,                m_settings->NameFilter) );
629     if ( m_settings->HasQueryBasesFilter )          propertyTokens.insert( make_pair(QUERYBASES_PROPERTY,          m_settings->QueryBasesFilter) );
630     if ( m_settings->HasTagFilter )                 propertyTokens.insert( make_pair(TAG_PROPERTY,                 m_settings->TagFilter) );
631     
632     // send add these properties to filter set "COMMAND_LINE"
633     return AddPropertyTokensToFilter(CMD, propertyTokens);
634 }
635
636 bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
637   
638     // filter object parsing variables
639     Json::Value null(Json::nullValue);
640     Json::Value propertyValue;
641     
642     // store results
643     map<string, string> propertyTokens;
644     
645     // iterate over known properties
646     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
647     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
648     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
649         const string& propertyName = (*propertyNameIter);
650         
651         // if property defined in filter, add to token list
652         propertyValue = filterObject.get(propertyName, null);
653         if ( propertyValue != null ) 
654             propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
655     }
656   
657     // add this filter to engin
658     m_filterEngine.addFilter(filterName);
659   
660     // add token list to this filter
661     return AddPropertyTokensToFilter(filterName, propertyTokens);
662 }
663
664 bool FilterTool::FilterToolPrivate::ParseScript(void) {
665   
666     // read in script contents from file
667     const string document = GetScriptContents();
668     
669     // set up JsonCPP reader and attempt to parse script
670     Json::Value root;
671     Json::Reader reader;
672     if ( !reader.parse(document, root) ) {
673         // use built-in error reporting mechanism to alert user what was wrong with the script
674         cerr  << "Failed to parse configuration\n" << reader.getFormatedErrorMessages();
675         return false;     
676     }
677
678     // initialize return status
679     bool success = true;
680     
681     // see if root object contains multiple filters
682     const Json::Value filters = root["filters"];
683     if ( !filters.isNull() ) {
684       
685         // iterate over any filters found
686         int filterIndex = 0;
687         Json::Value::const_iterator filtersIter = filters.begin();
688         Json::Value::const_iterator filtersEnd  = filters.end();
689         for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
690             Json::Value filter = (*filtersIter);
691             
692             // convert filter index to string
693             string filterName;
694             
695             // if id tag supplied
696             const Json::Value id = filter["id"];
697             if ( !id.isNull() ) 
698                 filterName = id.asString();
699             
700             // use array index 
701             else {
702                 stringstream convert;
703                 convert << filterIndex;
704                 filterName = convert.str();
705             }
706             
707             // create & parse filter 
708             success &= ParseFilterObject(filterName, filter);
709         }
710         
711         // see if user defined a "rule" for these filters
712         // otherwise, use filter engine's default rule behavior
713         string ruleString("");
714         const Json::Value rule = root["rule"];
715         if ( rule.isString() )
716             ruleString = rule.asString();
717         m_filterEngine.setRule(ruleString);
718           
719         // return success/fail
720         return success;
721     } 
722     
723     // otherwise, root is the only filter (just contains properties)
724     // create & parse filter named "ROOT"
725     else success = ParseFilterObject("ROOT", root);
726     
727     // return success/failure
728     return success;
729 }
730
731 bool FilterTool::FilterToolPrivate::Run(void) {
732     
733     // set to default input if none provided
734     if ( !m_settings->HasInputBamFilename ) 
735         m_settings->InputFiles.push_back(Options::StandardIn());
736
737     // initialize defined properties & user-specified filters
738     // quit if failed
739     if ( !SetupFilters() ) return 1;
740
741     // open reader without index
742     BamMultiReader reader;
743     if ( !reader.Open(m_settings->InputFiles, false, true) ) {
744         cerr << "Could not open input files for reading." << endl;
745         return false;
746     }
747     const string headerText = reader.GetHeaderText();
748     filterToolReferences = reader.GetReferenceData();
749     
750     // open writer
751     BamWriter writer;
752     bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
753     if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences, writeUncompressed) ) {
754         cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl;
755         return false;
756     }
757     
758     BamAlignment al;
759     
760     // if no region specified, filter entire file 
761     if ( !m_settings->HasRegion ) {
762         while ( reader.GetNextAlignment(al) ) {
763             if ( CheckAlignment(al) ) 
764                 writer.SaveAlignment(al);
765         }
766     }
767     
768     // otherwise attempt to use region as constraint
769     else {
770         
771         // if region string parses OK
772         BamRegion region;
773         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
774
775             // attempt to re-open reader with index files
776             reader.Close();
777             bool openedOK = reader.Open(m_settings->InputFiles, true, true );
778             
779             // if error
780             if ( !openedOK ) {
781                 cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
782                 return 1;
783             }
784             
785             // if index data available, we can use SetRegion
786             if ( reader.IsIndexLoaded() ) {
787               
788                 // attempt to use SetRegion(), if failed report error
789                 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
790                     cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
791                     reader.Close();
792                     return 1;
793                 } 
794               
795                 // everything checks out, just iterate through specified region, filtering alignments
796                 while ( reader.GetNextAlignmentCore(al) )
797                     if ( CheckAlignment(al) ) 
798                         writer.SaveAlignment(al);
799             } 
800             
801             // no index data available, we have to iterate through until we
802             // find overlapping alignments
803             else {
804                 while ( reader.GetNextAlignmentCore(al) ) {
805                     if ( (al.RefID >= region.LeftRefID)  && ((al.Position + al.Length) >= region.LeftPosition) &&
806                          (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
807                     {
808                         if ( CheckAlignment(al) ) 
809                             writer.SaveAlignment(al);
810                     }
811                 }
812             }
813         } 
814         
815         // error parsing REGION string
816         else {
817             cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
818             cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
819             reader.Close();
820             return 1;
821         }
822     }
823
824     // clean up & exit
825     reader.Close();
826     writer.Close();
827     return 0;
828 }
829
830 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
831   
832     // add known properties to FilterEngine<BamAlignmentChecker>
833     InitProperties();
834     
835     // parse script for filter rules, if given
836     if ( m_settings->HasScriptFilename ) return ParseScript();
837     
838     // otherwise check command line for filters
839     else return ParseCommandLine();
840 }