]> git.donarmstrong.com Git - bamtools.git/blob - src/toolkit/bamtools_filter.cpp
Reviewed BAM to YAML conversion contribution (thank you Chase Miller). Reorganized...
[bamtools.git] / src / toolkit / bamtools_filter.cpp
1 // ***************************************************************************
2 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 21 September 2010
7 // ---------------------------------------------------------------------------
8 // Filters BAM file(s) according to some user-specified criteria.
9 // ***************************************************************************
10
11 #include <cstdio>
12 #include <iostream>
13 #include <sstream>
14 #include <string>
15 #include <vector>
16 #include "bamtools_filter.h"
17 #include "bamtools_filter_engine.h"
18 #include "bamtools_options.h"
19 #include "bamtools_utilities.h"
20 #include "BamReader.h"
21 #include "BamMultiReader.h"
22 #include "BamWriter.h"
23 #include "jsoncpp/json.h"
24 using namespace std;
25 using namespace BamTools; 
26 using namespace Json;
27   
28 namespace BamTools {
29   
30 // -------------------------------  
31 // string literal constants  
32
33 // property names
34 const string ALIGNMENTFLAG_PROPERTY       = "alignmentFlag";
35 const string INSERTSIZE_PROPERTY          = "insertSize";
36 const string ISDUPLICATE_PROPERTY         = "isDuplicate";
37 const string ISFAILEDQC_PROPERTY          = "isFailedQC";
38 const string ISFIRSTMATE_PROPERTY         = "isFirstMate";
39 const string ISMAPPED_PROPERTY            = "isMapped";
40 const string ISMATEMAPPED_PROPERTY        = "isMateMapped";
41 const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand";
42 const string ISPAIRED_PROPERTY            = "isPaired";
43 const string ISPRIMARYALIGNMENT_PROPERTY  = "isPrimaryAlignment";
44 const string ISPROPERPAIR_PROPERTY        = "isProperPair";
45 const string ISREVERSESTRAND_PROPERTY     = "isReverseStrand";
46 const string ISSECONDMATE_PROPERTY        = "isSecondMate";
47 const string MAPQUALITY_PROPERTY          = "mapQuality";
48 const string MATEPOSITION_PROPERTY        = "matePosition";
49 const string MATEREFERENCE_PROPERTY       = "mateReference";
50 const string NAME_PROPERTY                = "name";
51 const string POSITION_PROPERTY            = "position";
52 const string QUERYBASES_PROPERTY          = "queryBases";
53 const string REFERENCE_PROPERTY           = "reference";
54 const string CIGAR_PROPERTY               = "cigar";
55
56 // boolalpha
57 const string TRUE_STR  = "true";
58 const string FALSE_STR = "false";
59     
60 RefVector filterToolReferences;    
61     
62 struct BamAlignmentChecker {
63     bool check(const PropertyFilter& filter, const BamAlignment& al) {
64       
65         bool keepAlignment = true;
66         const PropertyMap& properties = filter.Properties;
67         PropertyMap::const_iterator propertyIter = properties.begin();
68         PropertyMap::const_iterator propertyEnd  = properties.end();
69         for ( ; propertyIter != propertyEnd; ++propertyIter ) {
70           
71             // check alignment data field depending on propertyName
72             const string& propertyName = (*propertyIter).first;
73             const PropertyFilterValue& valueFilter = (*propertyIter).second;
74             
75             if      ( propertyName == ALIGNMENTFLAG_PROPERTY )        keepAlignment &= valueFilter.check(al.AlignmentFlag);
76             else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= valueFilter.check(al.InsertSize);
77             else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsDuplicate());
78             else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= valueFilter.check(al.IsFailedQC());
79             else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsFirstMate());
80             else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsMapped());
81             else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= valueFilter.check(al.IsMateMapped());
82             else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
83             else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsPaired());
84             else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
85             else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
86             else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
87             else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
88             else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
89             else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
90             else if ( propertyName == MATEREFERENCE_PROPERTY ) {
91                 if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
92                 BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
93                 const string& refName = filterToolReferences.at(al.MateRefID).RefName;
94                 keepAlignment &= valueFilter.check(refName);
95             }
96             else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= valueFilter.check(al.Name);
97             else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= valueFilter.check(al.Position);
98             else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= valueFilter.check(al.QueryBases);
99             else if ( propertyName == REFERENCE_PROPERTY ) {
100                 BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
101                 const string& refName = filterToolReferences.at(al.RefID).RefName;
102                 keepAlignment &= valueFilter.check(refName);
103             }
104             else if ( propertyName == CIGAR_PROPERTY ) {
105                 stringstream cigarSs;
106                 const vector<CigarOp>& cigarData = al.CigarData;
107                 if ( !cigarData.empty() ) {
108                     vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
109                     vector<CigarOp>::const_iterator cigarIter = cigarBegin;
110                     vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
111                     for ( ; cigarIter != cigarEnd; ++cigarIter ) {
112                         const CigarOp& op = (*cigarIter);
113                         cigarSs << op.Length << op.Type;
114                     }
115                     keepAlignment &= valueFilter.check(cigarSs.str());
116                 }
117             }
118             else BAMTOOLS_ASSERT_UNREACHABLE;
119             
120             // if alignment fails at ANY point, just quit and return false
121             if ( !keepAlignment ) 
122                 return false;
123         }
124       
125         BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
126         return keepAlignment;
127     }
128 };
129     
130 } // namespace BamTools
131   
132 // ---------------------------------------------
133 // FilterToolPrivate declaration
134
135 class FilterTool::FilterToolPrivate {
136       
137     // ctor & dtor
138     public:
139         FilterToolPrivate(FilterTool::FilterSettings* settings);
140         ~FilterToolPrivate(void);  
141         
142     // 'public' interface
143     public:
144         bool Run(void);
145         
146     // internal methods
147     private:
148         bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
149         bool CheckAlignment(const BamAlignment& al);
150         const string GetScriptContents(void);
151         void InitProperties(void);
152         bool ParseCommandLine(void);
153         bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
154         bool ParseScript(void);
155         bool SetupFilters(void);
156         
157     // data members
158     private:
159         vector<string> m_propertyNames;
160         FilterTool::FilterSettings* m_settings;
161         FilterEngine<BamAlignmentChecker> m_filterEngine;
162 };
163   
164 // ---------------------------------------------
165 // FilterSettings implementation
166
167 struct FilterTool::FilterSettings {
168
169     // ----------------------------------
170     // IO opts
171   
172     // flags
173     bool HasInputBamFilename;
174     bool HasOutputBamFilename;
175     bool HasRegion;
176     bool HasScriptFilename;
177     bool IsForceCompression;
178     
179     // filenames
180     vector<string> InputFiles;
181     string OutputFilename;
182     string Region;
183     string ScriptFilename;
184     
185     // -----------------------------------
186     // General filter opts
187     
188     // flags
189     bool HasAlignmentFlagFilter;
190     bool HasInsertSizeFilter;
191     bool HasMapQualityFilter;
192     bool HasNameFilter;
193     bool HasQueryBasesFilter;
194     
195 //     bool HasTagFilters;
196
197     // filters
198     string AlignmentFlagFilter;
199     string InsertSizeFilter;
200     string NameFilter;
201     string MapQualityFilter;
202     string QueryBasesFilter;
203     
204 //     vector<string> TagFilters;
205
206     // -----------------------------------
207     // AlignmentFlag filter opts
208     
209     // flags
210     bool HasIsDuplicateFilter;
211     bool HasIsFailedQCFilter;
212     bool HasIsFirstMateFilter;
213     bool HasIsMappedFilter;
214     bool HasIsMateMappedFilter;
215     bool HasIsMateReverseStrandFilter;
216     bool HasIsPairedFilter;
217     bool HasIsPrimaryAlignmentFilter;
218     bool HasIsProperPairFilter;
219     bool HasIsReverseStrandFilter;
220     bool HasIsSecondMateFilter;
221     
222     // filters
223     string IsDuplicateFilter;
224     string IsFailedQCFilter;
225     string IsFirstMateFilter;
226     string IsMappedFilter;
227     string IsMateMappedFilter;
228     string IsMateReverseStrandFilter;
229     string IsPairedFilter;
230     string IsPrimaryAlignmentFilter;
231     string IsProperPairFilter;
232     string IsReverseStrandFilter;
233     string IsSecondMateFilter;
234     
235     // ---------------------------------
236     // constructor
237     
238     FilterSettings(void)
239         : HasInputBamFilename(false)
240         , HasOutputBamFilename(false)
241         , HasRegion(false)
242         , HasScriptFilename(false)
243         , IsForceCompression(false)
244         , OutputFilename(Options::StandardOut())
245         , HasAlignmentFlagFilter(false)
246         , HasInsertSizeFilter(false)
247         , HasMapQualityFilter(false)
248         , HasNameFilter(false)
249         , HasQueryBasesFilter(false)
250 //         , HasTagFilters(false)
251         , HasIsDuplicateFilter(false)
252         , HasIsFailedQCFilter(false)
253         , HasIsFirstMateFilter(false)
254         , HasIsMappedFilter(false)
255         , HasIsMateMappedFilter(false)
256         , HasIsMateReverseStrandFilter(false)
257         , HasIsPairedFilter(false)
258         , HasIsPrimaryAlignmentFilter(false)
259         , HasIsProperPairFilter(false)
260         , HasIsReverseStrandFilter(false)
261         , HasIsSecondMateFilter(false)
262         , IsDuplicateFilter(TRUE_STR)
263         , IsFailedQCFilter(TRUE_STR)
264         , IsFirstMateFilter(TRUE_STR)
265         , IsMappedFilter(TRUE_STR)
266         , IsMateMappedFilter(TRUE_STR)
267         , IsMateReverseStrandFilter(TRUE_STR)
268         , IsPairedFilter(TRUE_STR)
269         , IsPrimaryAlignmentFilter(TRUE_STR)
270         , IsProperPairFilter(TRUE_STR)
271         , IsReverseStrandFilter(TRUE_STR)
272         , IsSecondMateFilter(TRUE_STR)
273     { }
274 };  
275
276 // ---------------------------------------------
277 // FilterTool implementation
278
279 FilterTool::FilterTool(void)
280     : AbstractTool()
281     , m_settings(new FilterSettings)
282     , m_impl(0)
283 {
284     // set program details
285     Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "-in <filename> [-in <filename> ... ] -out <filename> -region <REGION> [ [-script <filename] | [filterOptions] ]");
286     
287     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
288     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename,  m_settings->InputFiles,     IO_Opts, Options::StandardIn());
289     Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
290     Options::AddValueOption("-region", "REGION",       "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
291     Options::AddValueOption("-script", "filename",     "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
292     Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
293     
294     OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
295     Options::AddValueOption("-alignmentFlag", "int",     "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
296     Options::AddValueOption("-insertSize",    "int",     "keep reads with insert size that mathces pattern",             "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
297     Options::AddValueOption("-mapQuality",    "[0-255]", "keep reads with map quality that matches pattern",             "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
298     Options::AddValueOption("-name",          "string",  "keep reads with name that matches pattern",                    "", m_settings->HasNameFilter,       m_settings->NameFilter,       FilterOpts);
299     Options::AddValueOption("-queryBases",    "string",  "keep reads with motif that mathces pattern",                   "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
300     
301 //     Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair. If multiple tags are given, reads must match all", "", m_settings->HasTagFilters, m_settings->TagFilters, FilterOpts);
302     
303     OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
304     Options::AddValueOption("-isDuplicate",         "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter,         m_settings->IsDuplicateFilter,         AlignmentFlagOpts, TRUE_STR);
305     Options::AddValueOption("-isFailedQC",          "true/false", "keep only alignments that failed QC?",               "", m_settings->HasIsFailedQCFilter,          m_settings->IsFailedQCFilter,          AlignmentFlagOpts, TRUE_STR);
306     Options::AddValueOption("-isFirstMate",         "true/false", "keep only alignments marked as first mate?",         "", m_settings->HasIsFirstMateFilter,         m_settings->IsFirstMateFilter,         AlignmentFlagOpts, TRUE_STR);
307     Options::AddValueOption("-isMapped",            "true/false", "keep only alignments that were mapped?",             "", m_settings->HasIsMappedFilter,            m_settings->IsMappedFilter,            AlignmentFlagOpts, TRUE_STR);
308     Options::AddValueOption("-isMateMapped",        "true/false", "keep only alignments with mates that mapped",        "", m_settings->HasIsMateMappedFilter,        m_settings->IsMateMappedFilter,        AlignmentFlagOpts, TRUE_STR);
309     Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
310     Options::AddValueOption("-isPaired",            "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter,            m_settings->IsPairedFilter,            AlignmentFlagOpts, TRUE_STR);
311     Options::AddValueOption("-isPrimaryAlignment",  "true/false", "keep only alignments marked as primary?",            "", m_settings->HasIsPrimaryAlignmentFilter,  m_settings->IsPrimaryAlignmentFilter,  AlignmentFlagOpts, TRUE_STR);
312     Options::AddValueOption("-isProperPair",        "true/false", "keep only alignments that passed PE resolution?",    "", m_settings->HasIsProperPairFilter,        m_settings->IsProperPairFilter,        AlignmentFlagOpts, TRUE_STR);
313     Options::AddValueOption("-isReverseStrand",     "true/false", "keep only alignments on reverse strand?",            "", m_settings->HasIsReverseStrandFilter,     m_settings->IsReverseStrandFilter,     AlignmentFlagOpts, TRUE_STR);
314     Options::AddValueOption("-isSecondMate",        "true/false", "keep only alignments marked as second mate?",        "", m_settings->HasIsSecondMateFilter,        m_settings->IsSecondMateFilter,        AlignmentFlagOpts, TRUE_STR);
315 }
316
317 FilterTool::~FilterTool(void) {
318     delete m_settings;
319     m_settings = 0;
320     
321     delete m_impl;
322     m_impl = 0;
323 }
324
325 int FilterTool::Help(void) {
326     Options::DisplayHelp();
327     return 0;
328 }
329
330 int FilterTool::Run(int argc, char* argv[]) {
331   
332     // parse command line arguments
333     Options::Parse(argc, argv, 1);
334     
335     // run internal FilterTool implementation, return success/fail
336     m_impl = new FilterToolPrivate(m_settings);
337     
338     if ( m_impl->Run() ) return 0;
339     else return 1;
340 }
341  
342 // ---------------------------------------------
343 // FilterToolPrivate implementation
344   
345 // constructor  
346 FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
347     : m_settings(settings)
348 { }  
349   
350 // destructor
351 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
352
353 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
354
355     // dummy temp values for token parsing
356     bool boolValue;
357     int32_t int32Value;
358     uint16_t uint16Value;
359     uint32_t uint32Value;
360     string stringValue;
361     PropertyFilterValue::ValueCompareType type;
362   
363     // iterate over property token map
364     map<string, string>::const_iterator mapIter = propertyTokens.begin();
365     map<string, string>::const_iterator mapEnd  = propertyTokens.end();
366     for ( ; mapIter != mapEnd; ++mapIter ) {
367       
368         const string& propertyName = (*mapIter).first;
369         const string& token        = (*mapIter).second;
370         
371         // ------------------------------
372         // convert token to value & compare type 
373         // then add to filter engine
374         
375         // bool conversion
376         if ( propertyName == ISDUPLICATE_PROPERTY ||
377              propertyName == ISFAILEDQC_PROPERTY ||
378              propertyName == ISFIRSTMATE_PROPERTY ||
379              propertyName == ISMAPPED_PROPERTY ||
380              propertyName == ISMATEMAPPED_PROPERTY ||
381              propertyName == ISMATEREVERSESTRAND_PROPERTY ||
382              propertyName == ISPAIRED_PROPERTY ||
383              propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
384              propertyName == ISPROPERPAIR_PROPERTY ||
385              propertyName == ISREVERSESTRAND_PROPERTY ||
386              propertyName == ISSECONDMATE_PROPERTY
387            ) 
388         {
389             m_filterEngine.parseToken(token, boolValue, type);
390             m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
391         }
392         
393         // int32_t conversion
394         else if ( propertyName == INSERTSIZE_PROPERTY ||
395                   propertyName == MATEPOSITION_PROPERTY ||
396                   propertyName == POSITION_PROPERTY 
397                 ) 
398         {
399             m_filterEngine.parseToken(token, int32Value, type);
400             m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
401         }
402         
403         // uint16_t conversion
404         else if ( propertyName == MAPQUALITY_PROPERTY )
405         {
406             m_filterEngine.parseToken(token, uint16Value, type);
407             m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
408         }
409         
410         // uint32_t conversion
411         else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
412         {
413             m_filterEngine.parseToken(token, uint32Value, type);
414             m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
415         }
416         
417         // string conversion
418         else if ( propertyName == MATEREFERENCE_PROPERTY ||
419                   propertyName == NAME_PROPERTY ||
420                   propertyName == QUERYBASES_PROPERTY ||
421                   propertyName == REFERENCE_PROPERTY ||
422                   propertyName == CIGAR_PROPERTY
423                 ) 
424         {
425             m_filterEngine.parseToken(token, stringValue, type);
426             m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
427         }
428       
429         // else unknown property 
430         else {
431             cerr << "Unknown property: " << propertyName << "!" << endl;
432             return false;
433         }
434     }
435     return true;
436 }
437
438 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
439     return m_filterEngine.check(al);
440 }
441
442 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
443   
444     // open script for reading
445     FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
446     if ( !inFile ) {
447         cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl;
448         return false;
449     }
450     
451     // read in entire script contents  
452     char buffer[1024];
453     ostringstream docStream("");
454     while ( true ) {
455         
456         // peek ahead, make sure there is data available
457         char ch = fgetc(inFile);
458         ungetc(ch, inFile);
459         if( feof(inFile) ) break;       
460         
461         // read next block of data
462         if ( fgets(buffer, 1024, inFile) == 0 ) {
463             cerr << "FilterTool error : could not read from script" << endl;
464             return false;
465         }
466         
467         docStream << buffer;
468     }
469     
470     // close script file
471     fclose(inFile);
472     
473     // import buffer contents to document, return
474     string document = docStream.str();
475     return document;
476 }
477
478 void FilterTool::FilterToolPrivate::InitProperties(void) {
479   
480     // store property names in vector 
481     m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
482     m_propertyNames.push_back(INSERTSIZE_PROPERTY);
483     m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
484     m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
485     m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
486     m_propertyNames.push_back(ISMAPPED_PROPERTY);
487     m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
488     m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
489     m_propertyNames.push_back(ISPAIRED_PROPERTY);
490     m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
491     m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
492     m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
493     m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
494     m_propertyNames.push_back(MAPQUALITY_PROPERTY);
495     m_propertyNames.push_back(MATEPOSITION_PROPERTY);
496     m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
497     m_propertyNames.push_back(NAME_PROPERTY);
498     m_propertyNames.push_back(POSITION_PROPERTY);
499     m_propertyNames.push_back(QUERYBASES_PROPERTY);
500     m_propertyNames.push_back(REFERENCE_PROPERTY);
501     m_propertyNames.push_back(CIGAR_PROPERTY);
502     
503     // add vector contents to FilterEngine<BamAlignmentChecker>
504     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
505     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
506     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
507         m_filterEngine.addProperty((*propertyNameIter));
508 }
509
510 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
511   
512     // add a rule set to filter engine
513     const string CMD = "COMMAND_LINE";
514     m_filterEngine.addFilter(CMD);
515
516     // map property names to command line args
517     map<string, string> propertyTokens;
518     if ( m_settings->HasAlignmentFlagFilter )       propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY,       m_settings->AlignmentFlagFilter) );
519     if ( m_settings->HasInsertSizeFilter )          propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY,          m_settings->InsertSizeFilter) );
520     if ( m_settings->HasIsDuplicateFilter )         propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY,         m_settings->IsDuplicateFilter) );
521     if ( m_settings->HasIsFailedQCFilter )          propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY,          m_settings->IsFailedQCFilter) );
522     if ( m_settings->HasIsFirstMateFilter )         propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY,         m_settings->IsFirstMateFilter) );
523     if ( m_settings->HasIsMappedFilter )            propertyTokens.insert( make_pair(ISMAPPED_PROPERTY,            m_settings->IsMappedFilter) );
524     if ( m_settings->HasIsMateMappedFilter )        propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY,        m_settings->IsMateMappedFilter) );
525     if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
526     if ( m_settings->HasIsPairedFilter )            propertyTokens.insert( make_pair(ISPAIRED_PROPERTY,            m_settings->IsPairedFilter) );
527     if ( m_settings->HasIsPrimaryAlignmentFilter )  propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY,  m_settings->IsPrimaryAlignmentFilter) );
528     if ( m_settings->HasIsProperPairFilter )        propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY,        m_settings->IsProperPairFilter) );
529     if ( m_settings->HasIsReverseStrandFilter )     propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY,     m_settings->IsReverseStrandFilter) );
530     if ( m_settings->HasIsSecondMateFilter )        propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY,        m_settings->IsSecondMateFilter) );
531     if ( m_settings->HasMapQualityFilter )          propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY,          m_settings->MapQualityFilter) );
532     if ( m_settings->HasNameFilter )                propertyTokens.insert( make_pair(NAME_PROPERTY,                m_settings->NameFilter) );
533     if ( m_settings->HasQueryBasesFilter )          propertyTokens.insert( make_pair(QUERYBASES_PROPERTY,          m_settings->QueryBasesFilter) );
534
535     // send add these properties to filter set "COMMAND_LINE"
536     return AddPropertyTokensToFilter(CMD, propertyTokens);
537 }
538
539 bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
540   
541     // filter object parsing variables
542     Json::Value null(Json::nullValue);
543     Json::Value propertyValue;
544     
545     // store results
546     map<string, string> propertyTokens;
547     
548     // iterate over known properties
549     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
550     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
551     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
552         const string& propertyName = (*propertyNameIter);
553         
554         // if property defined in filter, add to token list
555         propertyValue = filterObject.get(propertyName, null);
556         if ( propertyValue != null ) 
557             propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
558     }
559   
560     // add this filter to engin
561     m_filterEngine.addFilter(filterName);
562   
563     // add token list to this filter
564     return AddPropertyTokensToFilter(filterName, propertyTokens);
565 }
566
567 bool FilterTool::FilterToolPrivate::ParseScript(void) {
568   
569     // read in script contents from file
570     const string document = GetScriptContents();
571     
572     // set up JsonCPP reader and attempt to parse script
573     Json::Value root;
574     Json::Reader reader;
575     if ( !reader.parse(document, root) ) {
576         // use built-in error reporting mechanism to alert user what was wrong with the script
577         cerr  << "Failed to parse configuration\n" << reader.getFormatedErrorMessages();
578         return false;     
579     }
580
581     // initialize return status
582     bool success = true;
583     
584     // see if root object contains multiple filters
585     const Json::Value filters = root["filters"];
586     if ( !filters.isNull() ) {
587       
588         // iterate over any filters found
589         int filterIndex = 0;
590         Json::Value::const_iterator filtersIter = filters.begin();
591         Json::Value::const_iterator filtersEnd  = filters.end();
592         for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
593             Json::Value filter = (*filtersIter);
594             
595             // convert filter index to string
596             string filterName;
597             
598             // if id tag supplied
599             const Json::Value id = filter["id"];
600             if ( !id.isNull() ) {
601                 filterName = id.asString();
602             } 
603             
604             // use array index 
605             else {
606                 stringstream convert;
607                 convert << filterIndex;
608                 filterName = convert.str();
609             }
610             
611             // create & parse filter 
612             success &= ParseFilterObject(filterName, filter);
613         }
614         
615         // see if user defined a "rule" for these filters
616         // otherwise, use filter engine's default rule behavior
617         string ruleString("");
618         const Json::Value rule = root["rule"];
619         if ( rule.isString() )
620             ruleString = rule.asString();
621         m_filterEngine.setRule(ruleString);
622           
623         // return success/fail
624         return success;
625     } 
626     
627     // otherwise, root is the only filter (just contains properties)
628     // create & parse filter named "ROOT"
629     else success = ParseFilterObject("ROOT", root);
630     
631     // return success/failure
632     return success;
633 }
634
635
636 bool FilterTool::FilterToolPrivate::Run(void) {
637     
638     // set to default input if none provided
639     if ( !m_settings->HasInputBamFilename ) 
640         m_settings->InputFiles.push_back(Options::StandardIn());
641     
642     // initialize defined properties & user-specified filters
643     // quit if failed
644     if ( !SetupFilters() ) return 1;
645
646     // open reader without index
647     BamMultiReader reader;
648     if (!reader.Open(m_settings->InputFiles, false, true)) {
649         cerr << "Could not open input files for reading." << endl;
650         return false;
651     }
652     const string headerText = reader.GetHeaderText();
653     filterToolReferences = reader.GetReferenceData();
654     
655     // open writer
656     BamWriter writer;
657     bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
658     if (!writer.Open(m_settings->OutputFilename, headerText, filterToolReferences, writeUncompressed)) {
659         cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl;
660         return false;
661     }
662     
663     BamAlignment al;
664     
665     // if no region specified, filter entire file 
666     if ( !m_settings->HasRegion ) {
667         while ( reader.GetNextAlignment(al) ) {
668             if ( CheckAlignment(al) ) 
669                 writer.SaveAlignment(al);
670         }
671     }
672     
673     // otherwise attempt to use region as constraint
674     else {
675         
676         // if region string parses OK
677         BamRegion region;
678         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
679
680             // attempt to re-open reader with index files
681             reader.Close();
682             bool openedOK = reader.Open(m_settings->InputFiles, true, true );
683             
684             // if error
685             if ( !openedOK ) {
686                 cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
687                 return 1;
688             }
689             
690             // if index data available, we can use SetRegion
691             if ( reader.IsIndexLoaded() ) {
692               
693                 // attempt to use SetRegion(), if failed report error
694                 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
695                     cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
696                     reader.Close();
697                     return 1;
698                 } 
699               
700                 // everything checks out, just iterate through specified region, filtering alignments
701                 while ( reader.GetNextAlignmentCore(al) )
702                     if ( CheckAlignment(al) ) 
703                         writer.SaveAlignment(al);
704             } 
705             
706             // no index data available, we have to iterate through until we
707             // find overlapping alignments
708             else {
709                 while( reader.GetNextAlignmentCore(al) ) {
710                     if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
711                           (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
712                     {
713                         if ( CheckAlignment(al) ) 
714                             writer.SaveAlignment(al);
715                     }
716                 }
717             }
718         } 
719         
720         // error parsing REGION string
721         else {
722             cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
723             cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
724             reader.Close();
725             return 1;
726         }
727     }
728
729     // clean up & exit
730     reader.Close();
731     writer.Close();
732     return 0;
733 }
734
735 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
736   
737     // add known properties to FilterEngine<BamAlignmentChecker>
738     InitProperties();
739     
740     // parse script for filter rules, if given
741     if ( m_settings->HasScriptFilename ) return ParseScript();
742     
743     // otherwise check command line for filters
744     else return ParseCommandLine();
745 }