]> git.donarmstrong.com Git - bamtools.git/blob - src/toolkit/bamtools_filter.cpp
Implemented 'compound rule' logic in the FilterTool's script support.
[bamtools.git] / src / toolkit / bamtools_filter.cpp
1 // ***************************************************************************
2 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 21 September 2010
7 // ---------------------------------------------------------------------------
8 // Filters BAM file(s) according to some user-specified criteria.
9 // ***************************************************************************
10
11 #include <cstdio>
12 #include <iostream>
13 #include <sstream>
14 #include <string>
15 #include <vector>
16 #include "bamtools_filter.h"
17 #include "bamtools_filter_engine.h"
18 #include "bamtools_options.h"
19 #include "bamtools_utilities.h"
20 #include "BamReader.h"
21 #include "BamMultiReader.h"
22 #include "BamWriter.h"
23 #include "jsoncpp/json.h"
24 using namespace std;
25 using namespace BamTools; 
26 using namespace Json;
27   
28 namespace BamTools {
29   
30 // -------------------------------  
31 // string literal constants  
32
33 // property names
34 const string ALIGNMENTFLAG_PROPERTY       = "alignmentFlag";
35 const string INSERTSIZE_PROPERTY          = "insertSize";
36 const string ISDUPLICATE_PROPERTY         = "isDuplicate";
37 const string ISFAILEDQC_PROPERTY          = "isFailedQC";
38 const string ISFIRSTMATE_PROPERTY         = "isFirstMate";
39 const string ISMAPPED_PROPERTY            = "isMapped";
40 const string ISMATEMAPPED_PROPERTY        = "isMateMapped";
41 const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand";
42 const string ISPAIRED_PROPERTY            = "isPaired";
43 const string ISPRIMARYALIGNMENT_PROPERTY  = "isPrimaryAlignment";
44 const string ISPROPERPAIR_PROPERTY        = "isProperPair";
45 const string ISREVERSESTRAND_PROPERTY     = "isReverseStrand";
46 const string ISSECONDMATE_PROPERTY        = "isSecondMate";
47 const string MAPQUALITY_PROPERTY          = "mapQuality";
48 const string MATEPOSITION_PROPERTY        = "matePosition";
49 const string MATEREFERENCE_PROPERTY       = "mateReference";
50 const string NAME_PROPERTY                = "name";
51 const string POSITION_PROPERTY            = "position";
52 const string QUERYBASES_PROPERTY          = "queryBases";
53 const string REFERENCE_PROPERTY           = "reference";
54
55 // boolalpha
56 const string TRUE_STR  = "true";
57 const string FALSE_STR = "false";
58     
59 RefVector filterToolReferences;    
60     
61 struct BamAlignmentChecker {
62     bool check(const PropertyFilter& filter, const BamAlignment& al) {
63       
64         bool keepAlignment = true;
65         const PropertyMap& properties = filter.Properties;
66         PropertyMap::const_iterator propertyIter = properties.begin();
67         PropertyMap::const_iterator propertyEnd  = properties.end();
68         for ( ; propertyIter != propertyEnd; ++propertyIter ) {
69           
70             // check alignment data field depending on propertyName
71             const string& propertyName = (*propertyIter).first;
72             const PropertyFilterValue& valueFilter = (*propertyIter).second;
73             
74             if      ( propertyName == ALIGNMENTFLAG_PROPERTY )        keepAlignment &= valueFilter.check(al.AlignmentFlag);
75             else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= valueFilter.check(al.InsertSize);
76             else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsDuplicate());
77             else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= valueFilter.check(al.IsFailedQC());
78             else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsFirstMate());
79             else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsMapped());
80             else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= valueFilter.check(al.IsMateMapped());
81             else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
82             else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsPaired());
83             else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
84             else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
85             else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
86             else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
87             else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
88             else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
89             else if ( propertyName == MATEREFERENCE_PROPERTY ) {
90                 if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
91                 BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
92                 const string& refName = filterToolReferences.at(al.MateRefID).RefName;
93                 keepAlignment &= valueFilter.check(refName);
94             }
95             else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= valueFilter.check(al.Name);
96             else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= valueFilter.check(al.Position);
97             else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= valueFilter.check(al.QueryBases);
98             else if ( propertyName == REFERENCE_PROPERTY ) {
99                 BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
100                 const string& refName = filterToolReferences.at(al.RefID).RefName;
101                 keepAlignment &= valueFilter.check(refName);
102             }
103             else BAMTOOLS_ASSERT_UNREACHABLE;
104             
105             // if alignment fails at ANY point, just quit and return false
106             if ( !keepAlignment ) 
107                 return false;
108         }
109       
110         BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
111         return keepAlignment;
112     }
113 };
114     
115 } // namespace BamTools
116   
117 // ---------------------------------------------
118 // FilterToolPrivate declaration
119
120 class FilterTool::FilterToolPrivate {
121       
122     // ctor & dtor
123     public:
124         FilterToolPrivate(FilterTool::FilterSettings* settings);
125         ~FilterToolPrivate(void);  
126         
127     // 'public' interface
128     public:
129         bool Run(void);
130         
131     // internal methods
132     private:
133         bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
134         bool CheckAlignment(const BamAlignment& al);
135         const string GetScriptContents(void);
136         void InitProperties(void);
137         bool ParseCommandLine(void);
138         bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
139         bool ParseScript(void);
140         bool SetupFilters(void);
141         
142     // data members
143     private:
144         vector<string> m_propertyNames;
145         FilterTool::FilterSettings* m_settings;
146         FilterEngine<BamAlignmentChecker> m_filterEngine;
147 };
148   
149 // ---------------------------------------------
150 // FilterSettings implementation
151
152 struct FilterTool::FilterSettings {
153
154     // ----------------------------------
155     // IO opts
156   
157     // flags
158     bool HasInputBamFilename;
159     bool HasOutputBamFilename;
160     bool HasRegion;
161     bool HasScriptFilename;
162     bool IsForceCompression;
163     
164     // filenames
165     vector<string> InputFiles;
166     string OutputFilename;
167     string Region;
168     string ScriptFilename;
169     
170     // -----------------------------------
171     // General filter opts
172     
173     // flags
174     bool HasAlignmentFlagFilter;
175     bool HasInsertSizeFilter;
176     bool HasMapQualityFilter;
177     bool HasNameFilter;
178     bool HasQueryBasesFilter;
179     
180 //     bool HasTagFilters;
181
182     // filters
183     string AlignmentFlagFilter;
184     string InsertSizeFilter;
185     string NameFilter;
186     string MapQualityFilter;
187     string QueryBasesFilter;
188     
189 //     vector<string> TagFilters;
190
191     // -----------------------------------
192     // AlignmentFlag filter opts
193     
194     // flags
195     bool HasIsDuplicateFilter;
196     bool HasIsFailedQCFilter;
197     bool HasIsFirstMateFilter;
198     bool HasIsMappedFilter;
199     bool HasIsMateMappedFilter;
200     bool HasIsMateReverseStrandFilter;
201     bool HasIsPairedFilter;
202     bool HasIsPrimaryAlignmentFilter;
203     bool HasIsProperPairFilter;
204     bool HasIsReverseStrandFilter;
205     bool HasIsSecondMateFilter;
206     
207     // filters
208     string IsDuplicateFilter;
209     string IsFailedQCFilter;
210     string IsFirstMateFilter;
211     string IsMappedFilter;
212     string IsMateMappedFilter;
213     string IsMateReverseStrandFilter;
214     string IsPairedFilter;
215     string IsPrimaryAlignmentFilter;
216     string IsProperPairFilter;
217     string IsReverseStrandFilter;
218     string IsSecondMateFilter;
219     
220     // ---------------------------------
221     // constructor
222     
223     FilterSettings(void)
224         : HasInputBamFilename(false)
225         , HasOutputBamFilename(false)
226         , HasRegion(false)
227         , HasScriptFilename(false)
228         , IsForceCompression(false)
229         , OutputFilename(Options::StandardOut())
230         , HasAlignmentFlagFilter(false)
231         , HasInsertSizeFilter(false)
232         , HasMapQualityFilter(false)
233         , HasNameFilter(false)
234         , HasQueryBasesFilter(false)
235 //         , HasTagFilters(false)
236         , HasIsDuplicateFilter(false)
237         , HasIsFailedQCFilter(false)
238         , HasIsFirstMateFilter(false)
239         , HasIsMappedFilter(false)
240         , HasIsMateMappedFilter(false)
241         , HasIsMateReverseStrandFilter(false)
242         , HasIsPairedFilter(false)
243         , HasIsPrimaryAlignmentFilter(false)
244         , HasIsProperPairFilter(false)
245         , HasIsReverseStrandFilter(false)
246         , HasIsSecondMateFilter(false)
247         , IsDuplicateFilter(TRUE_STR)
248         , IsFailedQCFilter(TRUE_STR)
249         , IsFirstMateFilter(TRUE_STR)
250         , IsMappedFilter(TRUE_STR)
251         , IsMateMappedFilter(TRUE_STR)
252         , IsMateReverseStrandFilter(TRUE_STR)
253         , IsPairedFilter(TRUE_STR)
254         , IsPrimaryAlignmentFilter(TRUE_STR)
255         , IsProperPairFilter(TRUE_STR)
256         , IsReverseStrandFilter(TRUE_STR)
257         , IsSecondMateFilter(TRUE_STR)
258     { }
259 };  
260
261 // ---------------------------------------------
262 // FilterTool implementation
263
264 FilterTool::FilterTool(void)
265     : AbstractTool()
266     , m_settings(new FilterSettings)
267     , m_impl(0)
268 {
269     // set program details
270     Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "-in <filename> [-in <filename> ... ] -out <filename> -region <REGION> [ [-script <filename] | [filterOptions] ]");
271     
272     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
273     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename,  m_settings->InputFiles,     IO_Opts, Options::StandardIn());
274     Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
275     Options::AddValueOption("-region", "REGION",       "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
276     Options::AddValueOption("-script", "filename",     "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
277     Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
278     
279     OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
280     Options::AddValueOption("-alignmentFlag", "int",     "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
281     Options::AddValueOption("-insertSize",    "int",     "keep reads with insert size that mathces pattern",             "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
282     Options::AddValueOption("-mapQuality",    "[0-255]", "keep reads with map quality that matches pattern",             "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
283     Options::AddValueOption("-name",          "string",  "keep reads with name that matches pattern",                    "", m_settings->HasNameFilter,       m_settings->NameFilter,       FilterOpts);
284     Options::AddValueOption("-queryBases",    "string",  "keep reads with motif that mathces pattern",                   "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
285     
286 //     Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair. If multiple tags are given, reads must match all", "", m_settings->HasTagFilters, m_settings->TagFilters, FilterOpts);
287     
288     OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
289     Options::AddValueOption("-isDuplicate",         "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter,         m_settings->IsDuplicateFilter,         AlignmentFlagOpts, TRUE_STR);
290     Options::AddValueOption("-isFailedQC",          "true/false", "keep only alignments that failed QC?",               "", m_settings->HasIsFailedQCFilter,          m_settings->IsFailedQCFilter,          AlignmentFlagOpts, TRUE_STR);
291     Options::AddValueOption("-isFirstMate",         "true/false", "keep only alignments marked as first mate?",         "", m_settings->HasIsFirstMateFilter,         m_settings->IsFirstMateFilter,         AlignmentFlagOpts, TRUE_STR);
292     Options::AddValueOption("-isMapped",            "true/false", "keep only alignments that were mapped?",             "", m_settings->HasIsMappedFilter,            m_settings->IsMappedFilter,            AlignmentFlagOpts, TRUE_STR);
293     Options::AddValueOption("-isMateMapped",        "true/false", "keep only alignments with mates that mapped",        "", m_settings->HasIsMateMappedFilter,        m_settings->IsMateMappedFilter,        AlignmentFlagOpts, TRUE_STR);
294     Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
295     Options::AddValueOption("-isPaired",            "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter,            m_settings->IsPairedFilter,            AlignmentFlagOpts, TRUE_STR);
296     Options::AddValueOption("-isPrimaryAlignment",  "true/false", "keep only alignments marked as primary?",            "", m_settings->HasIsPrimaryAlignmentFilter,  m_settings->IsPrimaryAlignmentFilter,  AlignmentFlagOpts, TRUE_STR);
297     Options::AddValueOption("-isProperPair",        "true/false", "keep only alignments that passed PE resolution?",    "", m_settings->HasIsProperPairFilter,        m_settings->IsProperPairFilter,        AlignmentFlagOpts, TRUE_STR);
298     Options::AddValueOption("-isReverseStrand",     "true/false", "keep only alignments on reverse strand?",            "", m_settings->HasIsReverseStrandFilter,     m_settings->IsReverseStrandFilter,     AlignmentFlagOpts, TRUE_STR);
299     Options::AddValueOption("-isSecondMate",        "true/false", "keep only alignments marked as second mate?",        "", m_settings->HasIsSecondMateFilter,        m_settings->IsSecondMateFilter,        AlignmentFlagOpts, TRUE_STR);
300 }
301
302 FilterTool::~FilterTool(void) {
303     delete m_settings;
304     m_settings = 0;
305     
306     delete m_impl;
307     m_impl = 0;
308 }
309
310 int FilterTool::Help(void) {
311     Options::DisplayHelp();
312     return 0;
313 }
314
315 int FilterTool::Run(int argc, char* argv[]) {
316   
317     // parse command line arguments
318     Options::Parse(argc, argv, 1);
319     
320     // run internal FilterTool implementation, return success/fail
321     m_impl = new FilterToolPrivate(m_settings);
322     
323     if ( m_impl->Run() ) return 0;
324     else return 1;
325 }
326  
327 // ---------------------------------------------
328 // FilterToolPrivate implementation
329   
330 // constructor  
331 FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
332     : m_settings(settings)
333 { }  
334   
335 // destructor
336 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
337
338 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
339
340     // dummy temp values for token parsing
341     bool boolValue;
342     int32_t int32Value;
343     uint16_t uint16Value;
344     uint32_t uint32Value;
345     string stringValue;
346     PropertyFilterValue::ValueCompareType type;
347   
348     // iterate over property token map
349     map<string, string>::const_iterator mapIter = propertyTokens.begin();
350     map<string, string>::const_iterator mapEnd  = propertyTokens.end();
351     for ( ; mapIter != mapEnd; ++mapIter ) {
352       
353         const string& propertyName = (*mapIter).first;
354         const string& token        = (*mapIter).second;
355         
356         // ------------------------------
357         // convert token to value & compare type 
358         // then add to filter engine
359         
360         // bool conversion
361         if ( propertyName == ISDUPLICATE_PROPERTY ||
362              propertyName == ISFAILEDQC_PROPERTY ||
363              propertyName == ISFIRSTMATE_PROPERTY ||
364              propertyName == ISMAPPED_PROPERTY ||
365              propertyName == ISMATEMAPPED_PROPERTY ||
366              propertyName == ISMATEREVERSESTRAND_PROPERTY ||
367              propertyName == ISPAIRED_PROPERTY ||
368              propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
369              propertyName == ISPROPERPAIR_PROPERTY ||
370              propertyName == ISREVERSESTRAND_PROPERTY ||
371              propertyName == ISSECONDMATE_PROPERTY
372            ) 
373         {
374             m_filterEngine.parseToken(token, boolValue, type);
375             m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
376         }
377         
378         // int32_t conversion
379         else if ( propertyName == INSERTSIZE_PROPERTY ||
380                   propertyName == MATEPOSITION_PROPERTY ||
381                   propertyName == POSITION_PROPERTY 
382                 ) 
383         {
384             m_filterEngine.parseToken(token, int32Value, type);
385             m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
386         }
387         
388         // uint16_t conversion
389         else if ( propertyName == MAPQUALITY_PROPERTY )
390         {
391             m_filterEngine.parseToken(token, uint16Value, type);
392             m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
393         }
394         
395         // uint32_t conversion
396         else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
397         {
398             m_filterEngine.parseToken(token, uint32Value, type);
399             m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
400         }
401         
402         // string conversion
403         else if ( propertyName == MATEREFERENCE_PROPERTY ||
404                   propertyName == NAME_PROPERTY ||
405                   propertyName == QUERYBASES_PROPERTY ||
406                   propertyName == REFERENCE_PROPERTY
407                 ) 
408         {
409             m_filterEngine.parseToken(token, stringValue, type);
410             m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
411         }
412       
413         // else unknown property 
414         else {
415             cerr << "Unknown property: " << propertyName << "!" << endl;
416             return false;
417         }
418     }
419     return true;
420 }
421
422 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
423     return m_filterEngine.check(al);
424 }
425
426 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
427   
428     // open script for reading
429     FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
430     if ( !inFile ) {
431         cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl;
432         return false;
433     }
434     
435     // read in entire script contents  
436     char buffer[1024];
437     ostringstream docStream("");
438     while ( true ) {
439         
440         // peek ahead, make sure there is data available
441         char ch = fgetc(inFile);
442         ungetc(ch, inFile);
443         if( feof(inFile) ) break;       
444         
445         // read next block of data
446         if ( fgets(buffer, 1024, inFile) == 0 ) {
447             cerr << "FilterTool error : could not read from script" << endl;
448             return false;
449         }
450         
451         docStream << buffer;
452     }
453     
454     // close script file
455     fclose(inFile);
456     
457     // import buffer contents to document, return
458     string document = docStream.str();
459     return document;
460 }
461
462 void FilterTool::FilterToolPrivate::InitProperties(void) {
463   
464     // store property names in vector 
465     m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
466     m_propertyNames.push_back(INSERTSIZE_PROPERTY);
467     m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
468     m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
469     m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
470     m_propertyNames.push_back(ISMAPPED_PROPERTY);
471     m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
472     m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
473     m_propertyNames.push_back(ISPAIRED_PROPERTY);
474     m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
475     m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
476     m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
477     m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
478     m_propertyNames.push_back(MAPQUALITY_PROPERTY);
479     m_propertyNames.push_back(MATEPOSITION_PROPERTY);
480     m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
481     m_propertyNames.push_back(NAME_PROPERTY);
482     m_propertyNames.push_back(POSITION_PROPERTY);
483     m_propertyNames.push_back(QUERYBASES_PROPERTY);
484     m_propertyNames.push_back(REFERENCE_PROPERTY);
485     
486     // add vector contents to FilterEngine<BamAlignmentChecker>
487     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
488     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
489     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
490         m_filterEngine.addProperty((*propertyNameIter));
491 }
492
493 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
494   
495     // add a rule set to filter engine
496     const string CMD = "COMMAND_LINE";
497     m_filterEngine.addFilter(CMD);
498
499     // map property names to command line args
500     map<string, string> propertyTokens;
501     if ( m_settings->HasAlignmentFlagFilter )       propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY,       m_settings->AlignmentFlagFilter) );
502     if ( m_settings->HasInsertSizeFilter )          propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY,          m_settings->InsertSizeFilter) );
503     if ( m_settings->HasIsDuplicateFilter )         propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY,         m_settings->IsDuplicateFilter) );
504     if ( m_settings->HasIsFailedQCFilter )          propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY,          m_settings->IsFailedQCFilter) );
505     if ( m_settings->HasIsFirstMateFilter )         propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY,         m_settings->IsFirstMateFilter) );
506     if ( m_settings->HasIsMappedFilter )            propertyTokens.insert( make_pair(ISMAPPED_PROPERTY,            m_settings->IsMappedFilter) );
507     if ( m_settings->HasIsMateMappedFilter )        propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY,        m_settings->IsMateMappedFilter) );
508     if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
509     if ( m_settings->HasIsPairedFilter )            propertyTokens.insert( make_pair(ISPAIRED_PROPERTY,            m_settings->IsPairedFilter) );
510     if ( m_settings->HasIsPrimaryAlignmentFilter )  propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY,  m_settings->IsPrimaryAlignmentFilter) );
511     if ( m_settings->HasIsProperPairFilter )        propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY,        m_settings->IsProperPairFilter) );
512     if ( m_settings->HasIsReverseStrandFilter )     propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY,     m_settings->IsReverseStrandFilter) );
513     if ( m_settings->HasIsSecondMateFilter )        propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY,        m_settings->IsSecondMateFilter) );
514     if ( m_settings->HasMapQualityFilter )          propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY,          m_settings->MapQualityFilter) );
515     if ( m_settings->HasNameFilter )                propertyTokens.insert( make_pair(NAME_PROPERTY,                m_settings->NameFilter) );
516     if ( m_settings->HasQueryBasesFilter )          propertyTokens.insert( make_pair(QUERYBASES_PROPERTY,          m_settings->QueryBasesFilter) );
517
518     // send add these properties to filter set "COMMAND_LINE"
519     return AddPropertyTokensToFilter(CMD, propertyTokens);
520 }
521
522 bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
523   
524     // filter object parsing variables
525     Json::Value null(Json::nullValue);
526     Json::Value propertyValue;
527     
528     // store results
529     map<string, string> propertyTokens;
530     
531     // iterate over known properties
532     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
533     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
534     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
535         const string& propertyName = (*propertyNameIter);
536         
537         // if property defined in filter, add to token list
538         propertyValue = filterObject.get(propertyName, null);
539         if ( propertyValue != null ) 
540             propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
541     }
542   
543     // add this filter to engin
544     m_filterEngine.addFilter(filterName);
545   
546     // add token list to this filter
547     return AddPropertyTokensToFilter(filterName, propertyTokens);
548 }
549
550 bool FilterTool::FilterToolPrivate::ParseScript(void) {
551   
552     // read in script contents from file
553     const string document = GetScriptContents();
554     
555     // set up JsonCPP reader and attempt to parse script
556     Json::Value root;
557     Json::Reader reader;
558     if ( !reader.parse(document, root) ) {
559         // use built-in error reporting mechanism to alert user what was wrong with the script
560         cerr  << "Failed to parse configuration\n" << reader.getFormatedErrorMessages();
561         return false;     
562     }
563
564     // initialize return status
565     bool success = true;
566     
567     // see if root object contains multiple filters
568     const Json::Value filters = root["filters"];
569     if ( !filters.isNull() ) {
570       
571         // iterate over any filters found
572         int filterIndex = 0;
573         Json::Value::const_iterator filtersIter = filters.begin();
574         Json::Value::const_iterator filtersEnd  = filters.end();
575         for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
576             Json::Value filter = (*filtersIter);
577             
578             // convert filter index to string
579             string filterName;
580             
581             // if id tag supplied
582             const Json::Value id = filter["id"];
583             if ( !id.isNull() ) {
584                 filterName = id.asString();
585             } 
586             
587             // use array index 
588             else {
589                 stringstream convert;
590                 convert << filterIndex;
591                 filterName = convert.str();
592             }
593             
594             // create & parse filter 
595             success &= ParseFilterObject(filterName, filter);
596         }
597         
598         // see if user defined a "rule" for these filters
599         // otherwise, use filter engine's default rule behavior
600         string ruleString("");
601         const Json::Value rule = root["rule"];
602         if ( rule.isString() )
603             ruleString = rule.asString();
604         m_filterEngine.setRule(ruleString);
605           
606         // return success/fail
607         return success;
608     } 
609     
610     // otherwise, root is the only filter (just contains properties)
611     // create & parse filter named "ROOT"
612     else success = ParseFilterObject("ROOT", root);
613     
614     // return success/failure
615     return success;
616 }
617
618
619 bool FilterTool::FilterToolPrivate::Run(void) {
620     
621     // set to default input if none provided
622     if ( !m_settings->HasInputBamFilename ) 
623         m_settings->InputFiles.push_back(Options::StandardIn());
624     
625     // initialize defined properties & user-specified filters
626     // quit if failed
627     if ( !SetupFilters() ) return 1;
628
629     // open reader without index
630     BamMultiReader reader;
631     reader.Open(m_settings->InputFiles, false, true);
632     const string headerText = reader.GetHeaderText();
633     filterToolReferences = reader.GetReferenceData();
634     
635     // open writer
636     BamWriter writer;
637     bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
638     writer.Open(m_settings->OutputFilename, headerText, filterToolReferences, writeUncompressed);
639     
640     BamAlignment al;
641     
642     // if no region specified, filter entire file 
643     if ( !m_settings->HasRegion ) {
644         while ( reader.GetNextAlignment(al) ) {
645             if ( CheckAlignment(al) ) 
646                 writer.SaveAlignment(al);
647         }
648     }
649     
650     // otherwise attempt to use region as constraint
651     else {
652         
653         // if region string parses OK
654         BamRegion region;
655         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
656
657             // attempt to re-open reader with index files
658             reader.Close();
659             bool openedOK = reader.Open(m_settings->InputFiles, true, true );
660             
661             // if error
662             if ( !openedOK ) {
663                 cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
664                 return 1;
665             }
666             
667             // if index data available, we can use SetRegion
668             if ( reader.IsIndexLoaded() ) {
669               
670                 // attempt to use SetRegion(), if failed report error
671                 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
672                     cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
673                     reader.Close();
674                     return 1;
675                 } 
676               
677                 // everything checks out, just iterate through specified region, filtering alignments
678                 while ( reader.GetNextAlignmentCore(al) )
679                     if ( CheckAlignment(al) ) 
680                         writer.SaveAlignment(al);
681             } 
682             
683             // no index data available, we have to iterate through until we
684             // find overlapping alignments
685             else {
686                 while( reader.GetNextAlignmentCore(al) ) {
687                     if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
688                           (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
689                     {
690                         if ( CheckAlignment(al) ) 
691                             writer.SaveAlignment(al);
692                     }
693                 }
694             }
695         } 
696         
697         // error parsing REGION string
698         else {
699             cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
700             cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
701             reader.Close();
702             return 1;
703         }
704     }
705
706     // clean up & exit
707     reader.Close();
708     writer.Close();
709     return 0;
710 }
711
712 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
713   
714     // add known properties to FilterEngine<BamAlignmentChecker>
715     InitProperties();
716     
717     // parse script for filter rules, if given
718     if ( m_settings->HasScriptFilename ) return ParseScript();
719     
720     // otherwise check command line for filters
721     else return ParseCommandLine();
722 }