]> git.donarmstrong.com Git - bamtools.git/blob - src/toolkit/bamtools_filter.cpp
Merge branch 'master' of git://github.com/pezmaster31/bamtools
[bamtools.git] / src / toolkit / bamtools_filter.cpp
1 // ***************************************************************************
2 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 31 August 2010
7 // ---------------------------------------------------------------------------
8 // Filters a single BAM file (or filters multiple BAM files and merges) 
9 // according to some user-specified criteria.
10 // ***************************************************************************
11
12 // std includes
13 #include <cstdio>
14 #include <iostream>
15 #include <sstream>
16 #include <string>
17 #include <vector>
18
19 // BamTools includes
20 #include "bamtools_filter.h"
21 #include "bamtools_filter_engine.h"
22 #include "bamtools_options.h"
23 #include "bamtools_utilities.h"
24 #include "BamReader.h"
25 #include "BamMultiReader.h"
26 #include "BamWriter.h"
27
28 //JsonCPP includes
29 #include "jsoncpp/json.h"
30
31 using namespace std;
32 using namespace BamTools; 
33 using namespace Json;
34   
35 namespace BamTools {
36   
37 // -------------------------------  
38 // string literal constants  
39
40 // property names
41 const string ALIGNMENTFLAG_PROPERTY       = "alignmentFlag";
42 const string INSERTSIZE_PROPERTY          = "insertSize";
43 const string ISDUPLICATE_PROPERTY         = "isDuplicate";
44 const string ISFAILEDQC_PROPERTY          = "isFailedQC";
45 const string ISFIRSTMATE_PROPERTY         = "isFirstMate";
46 const string ISMAPPED_PROPERTY            = "isMapped";
47 const string ISMATEMAPPED_PROPERTY        = "isMateMapped";
48 const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand";
49 const string ISPAIRED_PROPERTY            = "isPaired";
50 const string ISPRIMARYALIGNMENT_PROPERTY  = "isPrimaryAlignment";
51 const string ISPROPERPAIR_PROPERTY        = "isProperPair";
52 const string ISREVERSESTRAND_PROPERTY     = "isReverseStrand";
53 const string ISSECONDMATE_PROPERTY        = "isSecondMate";
54 const string MAPQUALITY_PROPERTY          = "mapQuality";
55 const string MATEPOSITION_PROPERTY        = "matePosition";
56 const string MATEREFERENCE_PROPERTY       = "mateReference";
57 const string NAME_PROPERTY                = "name";
58 const string POSITION_PROPERTY            = "position";
59 const string QUERYBASES_PROPERTY          = "queryBases";
60 const string REFERENCE_PROPERTY           = "reference";
61
62 // boolalpha
63 const string TRUE_STR  = "true";
64 const string FALSE_STR = "false";
65     
66 } // namespace BamTools
67   
68 // ---------------------------------------------
69 // FilterToolPrivate declaration
70
71 class FilterTool::FilterToolPrivate {
72       
73     // ctor & dtor
74     public:
75         FilterToolPrivate(FilterTool::FilterSettings* settings);
76         ~FilterToolPrivate(void);  
77         
78     // 'public' interface
79     public:
80         bool Run(void);
81         
82     // internal methods
83     private:
84         bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
85         bool CheckAlignment(const BamAlignment& al);
86         const string GetScriptContents(void);
87         void InitProperties(void);
88         bool ParseCommandLine(void);
89         bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
90         bool ParseScript(void);
91         bool SetupFilters(void);
92         
93     // data members
94     private:
95         vector<string> m_propertyNames;
96         FilterTool::FilterSettings* m_settings;
97         RefVector m_references;
98 };
99   
100 // ---------------------------------------------
101 // FilterSettings implementation
102
103 struct FilterTool::FilterSettings {
104
105     // ----------------------------------
106     // IO opts
107   
108     // flags
109     bool HasInputBamFilename;
110     bool HasOutputBamFilename;
111     bool HasRegion;
112     bool HasScriptFilename;
113     bool IsForceCompression;
114     
115     // filenames
116     vector<string> InputFiles;
117     string OutputFilename;
118     string Region;
119     string ScriptFilename;
120     
121     // -----------------------------------
122     // General filter opts
123     
124     // flags
125     bool HasAlignmentFlagFilter;
126     bool HasInsertSizeFilter;
127     bool HasMapQualityFilter;
128     bool HasNameFilter;
129     bool HasQueryBasesFilter;
130     
131 //     bool HasTagFilters;
132
133     // filters
134     string AlignmentFlagFilter;
135     string InsertSizeFilter;
136     string NameFilter;
137     string MapQualityFilter;
138     string QueryBasesFilter;
139     
140 //     vector<string> TagFilters;
141
142     // -----------------------------------
143     // AlignmentFlag filter opts
144     
145     // flags
146     bool HasIsDuplicateFilter;
147     bool HasIsFailedQCFilter;
148     bool HasIsFirstMateFilter;
149     bool HasIsMappedFilter;
150     bool HasIsMateMappedFilter;
151     bool HasIsMateReverseStrandFilter;
152     bool HasIsPairedFilter;
153     bool HasIsPrimaryAlignmentFilter;
154     bool HasIsProperPairFilter;
155     bool HasIsReverseStrandFilter;
156     bool HasIsSecondMateFilter;
157     
158     // filters
159     string IsDuplicateFilter;
160     string IsFailedQCFilter;
161     string IsFirstMateFilter;
162     string IsMappedFilter;
163     string IsMateMappedFilter;
164     string IsMateReverseStrandFilter;
165     string IsPairedFilter;
166     string IsPrimaryAlignmentFilter;
167     string IsProperPairFilter;
168     string IsReverseStrandFilter;
169     string IsSecondMateFilter;
170     
171     // ---------------------------------
172     // constructor
173     
174     FilterSettings(void)
175         : HasInputBamFilename(false)
176         , HasOutputBamFilename(false)
177         , HasRegion(false)
178         , HasScriptFilename(false)
179         , IsForceCompression(false)
180         , OutputFilename(Options::StandardOut())
181         , HasAlignmentFlagFilter(false)
182         , HasInsertSizeFilter(false)
183         , HasMapQualityFilter(false)
184         , HasNameFilter(false)
185         , HasQueryBasesFilter(false)
186 //         , HasTagFilters(false)
187         , HasIsDuplicateFilter(false)
188         , HasIsFailedQCFilter(false)
189         , HasIsFirstMateFilter(false)
190         , HasIsMappedFilter(false)
191         , HasIsMateMappedFilter(false)
192         , HasIsMateReverseStrandFilter(false)
193         , HasIsPairedFilter(false)
194         , HasIsPrimaryAlignmentFilter(false)
195         , HasIsProperPairFilter(false)
196         , HasIsReverseStrandFilter(false)
197         , HasIsSecondMateFilter(false)
198         , IsDuplicateFilter(TRUE_STR)
199         , IsFailedQCFilter(TRUE_STR)
200         , IsFirstMateFilter(TRUE_STR)
201         , IsMappedFilter(TRUE_STR)
202         , IsMateMappedFilter(TRUE_STR)
203         , IsMateReverseStrandFilter(TRUE_STR)
204         , IsPairedFilter(TRUE_STR)
205         , IsPrimaryAlignmentFilter(TRUE_STR)
206         , IsProperPairFilter(TRUE_STR)
207         , IsReverseStrandFilter(TRUE_STR)
208         , IsSecondMateFilter(TRUE_STR)
209     { }
210 };  
211
212 // ---------------------------------------------
213 // FilterTool implementation
214
215 FilterTool::FilterTool(void)
216     : AbstractTool()
217     , m_settings(new FilterSettings)
218     , m_impl(0)
219 {
220     // set program details
221     Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "-in <filename> [-in <filename> ... ] -out <filename> -region <REGION> [ [-script <filename] | [filterOptions] ]");
222     
223     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
224     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename,  m_settings->InputFiles,     IO_Opts, Options::StandardIn());
225     Options::AddValueOption("-out",    "BAM filename", "the output BAM file",   "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
226     Options::AddValueOption("-region", "REGION",       "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
227     Options::AddValueOption("-script", "filename",     "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
228     Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
229     
230     OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
231     Options::AddValueOption("-alignmentFlag", "int",     "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
232     Options::AddValueOption("-insertSize",    "int",     "keep reads with insert size that mathces pattern",             "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
233     Options::AddValueOption("-mapQuality",    "[0-255]", "keep reads with map quality that matches pattern",             "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
234     Options::AddValueOption("-name",          "string",  "keep reads with name that matches pattern",                    "", m_settings->HasNameFilter,       m_settings->NameFilter,       FilterOpts);
235     Options::AddValueOption("-queryBases",    "string",  "keep reads with motif that mathces pattern",                   "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
236     
237 //     Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair. If multiple tags are given, reads must match all", "", m_settings->HasTagFilters, m_settings->TagFilters, FilterOpts);
238     
239     OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
240     Options::AddValueOption("-isDuplicate",         "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter,         m_settings->IsDuplicateFilter,         AlignmentFlagOpts, TRUE_STR);
241     Options::AddValueOption("-isFailedQC",          "true/false", "keep only alignments that failed QC?",               "", m_settings->HasIsFailedQCFilter,          m_settings->IsFailedQCFilter,          AlignmentFlagOpts, TRUE_STR);
242     Options::AddValueOption("-isFirstMate",         "true/false", "keep only alignments marked as first mate?",         "", m_settings->HasIsFirstMateFilter,         m_settings->IsFirstMateFilter,         AlignmentFlagOpts, TRUE_STR);
243     Options::AddValueOption("-isMapped",            "true/false", "keep only alignments that were mapped?",             "", m_settings->HasIsMappedFilter,            m_settings->IsMappedFilter,            AlignmentFlagOpts, TRUE_STR);
244     Options::AddValueOption("-isMateMapped",        "true/false", "keep only alignments with mates that mapped",        "", m_settings->HasIsMateMappedFilter,        m_settings->IsMateMappedFilter,        AlignmentFlagOpts, TRUE_STR);
245     Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
246     Options::AddValueOption("-isPaired",            "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter,            m_settings->IsPairedFilter,            AlignmentFlagOpts, TRUE_STR);
247     Options::AddValueOption("-isPrimaryAlignment",  "true/false", "keep only alignments marked as primary?",            "", m_settings->HasIsPrimaryAlignmentFilter,  m_settings->IsPrimaryAlignmentFilter,  AlignmentFlagOpts, TRUE_STR);
248     Options::AddValueOption("-isProperPair",        "true/false", "keep only alignments that passed PE resolution?",    "", m_settings->HasIsProperPairFilter,        m_settings->IsProperPairFilter,        AlignmentFlagOpts, TRUE_STR);
249     Options::AddValueOption("-isReverseStrand",     "true/false", "keep only alignments on reverse strand?",            "", m_settings->HasIsReverseStrandFilter,     m_settings->IsReverseStrandFilter,     AlignmentFlagOpts, TRUE_STR);
250     Options::AddValueOption("-isSecondMate",        "true/false", "keep only alignments marked as second mate?",        "", m_settings->HasIsSecondMateFilter,        m_settings->IsSecondMateFilter,        AlignmentFlagOpts, TRUE_STR);
251 }
252
253 FilterTool::~FilterTool(void) {
254     delete m_settings;
255     m_settings = 0;
256     
257     delete m_impl;
258     m_impl = 0;
259 }
260
261 int FilterTool::Help(void) {
262     Options::DisplayHelp();
263     return 0;
264 }
265
266 int FilterTool::Run(int argc, char* argv[]) {
267   
268     // parse command line arguments
269     Options::Parse(argc, argv, 1);
270     
271     // run internal FilterTool implementation, return success/fail
272     m_impl = new FilterToolPrivate(m_settings);
273     
274     if ( m_impl->Run() ) return 0;
275     else return 1;
276 }
277  
278 // ---------------------------------------------
279 // FilterToolPrivate implementation
280   
281 // constructor  
282 FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
283     : m_settings(settings)
284 { }  
285   
286 // destructor
287 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
288
289 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
290
291   
292     // dummy temp values for token parsing
293     bool boolValue;
294     int32_t int32Value;
295     uint16_t uint16Value;
296     uint32_t uint32Value;
297     string stringValue;
298     PropertyFilterValue::ValueCompareType type;
299   
300     // iterate over property token map
301     map<string, string>::const_iterator mapIter = propertyTokens.begin();
302     map<string, string>::const_iterator mapEnd  = propertyTokens.end();
303     for ( ; mapIter != mapEnd; ++mapIter ) {
304       
305         const string& propertyName = (*mapIter).first;
306         const string& token        = (*mapIter).second;
307         
308         // ------------------------------
309         // convert token to value & compare type 
310         // then add to filter engine
311         
312         // bool conversion
313         if ( propertyName == ISDUPLICATE_PROPERTY ||
314              propertyName == ISFAILEDQC_PROPERTY ||
315              propertyName == ISFIRSTMATE_PROPERTY ||
316              propertyName == ISMAPPED_PROPERTY ||
317              propertyName == ISMATEMAPPED_PROPERTY ||
318              propertyName == ISMATEREVERSESTRAND_PROPERTY ||
319              propertyName == ISPAIRED_PROPERTY ||
320              propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
321              propertyName == ISPROPERPAIR_PROPERTY ||
322              propertyName == ISREVERSESTRAND_PROPERTY ||
323              propertyName == ISSECONDMATE_PROPERTY
324            ) 
325         {
326             FilterEngine::parseToken(token, boolValue, type);
327             FilterEngine::setProperty(filterName, propertyName, boolValue, type);
328         }
329         
330         // int32_t conversion
331         else if ( propertyName == INSERTSIZE_PROPERTY ||
332                   propertyName == MATEPOSITION_PROPERTY ||
333                   propertyName == POSITION_PROPERTY 
334                 ) 
335         {
336             FilterEngine::parseToken(token, int32Value, type);
337             FilterEngine::setProperty(filterName, propertyName, int32Value, type);
338         }
339         
340         // uint16_t conversion
341         else if ( propertyName == MAPQUALITY_PROPERTY )
342         {
343             FilterEngine::parseToken(token, uint16Value, type);
344             FilterEngine::setProperty(filterName, propertyName, uint16Value, type);
345         }
346         
347         // uint32_t conversion
348         else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
349         {
350             FilterEngine::parseToken(token, uint32Value, type);
351             FilterEngine::setProperty(filterName, propertyName, uint32Value, type);
352         }
353         
354         // string conversion
355         else if ( propertyName == MATEREFERENCE_PROPERTY ||
356                   propertyName == NAME_PROPERTY ||
357                   propertyName == QUERYBASES_PROPERTY ||
358                   propertyName == REFERENCE_PROPERTY
359                 ) 
360         {
361             FilterEngine::parseToken(token, stringValue, type);
362             FilterEngine::setProperty(filterName, propertyName, stringValue, type);
363         }
364       
365         // else unknown property 
366         else {
367             cerr << "Unknown property: " << propertyName << "!" << endl;
368             return false;
369         }
370     }
371     return true;
372 }
373
374 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
375
376     bool keepAlignment = true;
377   
378     // only consider properties that are actually enabled
379     // iterate over these enabled properties
380     const vector<string> enabledProperties = FilterEngine::enabledPropertyNames();
381     vector<string>::const_iterator propIter = enabledProperties.begin();
382     vector<string>::const_iterator propEnd  = enabledProperties.end();
383     for ( ; propIter != propEnd; ++propIter ) {
384      
385         // check alignment data field depending on propertyName
386         const string& propertyName = (*propIter);
387         if      ( propertyName == ALIGNMENTFLAG_PROPERTY )        keepAlignment &= FilterEngine::check(ALIGNMENTFLAG_PROPERTY,       al.AlignmentFlag);
388         else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= FilterEngine::check(INSERTSIZE_PROPERTY,          al.InsertSize);
389         else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= FilterEngine::check(ISDUPLICATE_PROPERTY,         al.IsDuplicate());
390         else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= FilterEngine::check(ISFAILEDQC_PROPERTY,          al.IsFailedQC());
391         else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= FilterEngine::check(ISFIRSTMATE_PROPERTY,         al.IsFirstMate());
392         else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= FilterEngine::check(ISMAPPED_PROPERTY,            al.IsMapped());
393         else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= FilterEngine::check(ISMATEMAPPED_PROPERTY,        al.IsMateMapped());
394         else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= FilterEngine::check(ISMATEREVERSESTRAND_PROPERTY, al.IsMateReverseStrand());
395         else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= FilterEngine::check(ISPAIRED_PROPERTY,            al.IsPaired());
396         else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= FilterEngine::check(ISPRIMARYALIGNMENT_PROPERTY,  al.IsPrimaryAlignment());
397         else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= FilterEngine::check(ISPROPERPAIR_PROPERTY,        al.IsProperPair());
398         else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= FilterEngine::check(ISREVERSESTRAND_PROPERTY,     al.IsReverseStrand());
399         else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= FilterEngine::check(ISSECONDMATE_PROPERTY,        al.IsSecondMate());
400         else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= FilterEngine::check(MAPQUALITY_PROPERTY,          al.MapQuality);
401         else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && FilterEngine::check(MATEPOSITION_PROPERTY, al.MateRefID) );
402         else if ( propertyName == MATEREFERENCE_PROPERTY ) {
403             if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
404             BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)m_references.size())), "Invalid MateRefID");
405             const string& refName = m_references.at(al.MateRefID).RefName;
406             keepAlignment &= FilterEngine::check(MATEREFERENCE_PROPERTY, refName);
407         }
408         else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= FilterEngine::check(NAME_PROPERTY,                al.Name);
409         else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= FilterEngine::check(POSITION_PROPERTY,            al.Position);
410         else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= FilterEngine::check(QUERYBASES_PROPERTY,          al.QueryBases);
411         else if ( propertyName == REFERENCE_PROPERTY ) {
412             BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)m_references.size())), "Invalid RefID");
413             const string& refName = m_references.at(al.RefID).RefName;
414             keepAlignment &= FilterEngine::check(REFERENCE_PROPERTY, refName);
415         }
416         else BAMTOOLS_ASSERT_MESSAGE( false, "Unknown property");
417         
418         // if alignment fails at ANY point, just quit and return false
419         if ( !keepAlignment ) return false;
420     }
421   
422     // return success (should still be true at this point)
423     return keepAlignment;
424 }
425
426 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
427   
428     // open script for reading
429     FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
430     if ( !inFile ) {
431         cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl;
432         return false;
433     }
434     
435     // read in entire script contents  
436     char buffer[1024];
437     ostringstream docStream("");
438     while ( true ) {
439         
440         // peek ahead, make sure there is data available
441         char ch = fgetc(inFile);
442         ungetc(ch, inFile);
443         if( feof(inFile) ) break;       
444         
445         // read next block of data
446         if ( fgets(buffer, 1024, inFile) == 0 ) {
447             cerr << "FilterTool error : could not read from script" << endl;
448             return false;
449         }
450         
451         docStream << buffer;
452     }
453     
454     // close script file
455     fclose(inFile);
456     
457     // import buffer contents to document, return
458     string document = docStream.str();
459     return document;
460 }
461
462 void FilterTool::FilterToolPrivate::InitProperties(void) {
463   
464     // store property names in vector 
465     m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
466     m_propertyNames.push_back(INSERTSIZE_PROPERTY);
467     m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
468     m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
469     m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
470     m_propertyNames.push_back(ISMAPPED_PROPERTY);
471     m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
472     m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
473     m_propertyNames.push_back(ISPAIRED_PROPERTY);
474     m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
475     m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
476     m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
477     m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
478     m_propertyNames.push_back(MAPQUALITY_PROPERTY);
479     m_propertyNames.push_back(MATEPOSITION_PROPERTY);
480     m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
481     m_propertyNames.push_back(NAME_PROPERTY);
482     m_propertyNames.push_back(POSITION_PROPERTY);
483     m_propertyNames.push_back(QUERYBASES_PROPERTY);
484     m_propertyNames.push_back(REFERENCE_PROPERTY);
485     
486     // add vector contents to FilterEngine
487     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
488     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
489     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
490         FilterEngine::addProperty((*propertyNameIter));
491 }
492
493 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
494   
495     // add a rule set to filter engine
496     const string CMD = "COMMAND_LINE";
497     FilterEngine::addFilter(CMD);
498
499     // map property names to command line args
500     map<string, string> propertyTokens;
501     if ( m_settings->HasAlignmentFlagFilter )       propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY,       m_settings->AlignmentFlagFilter) );
502     if ( m_settings->HasInsertSizeFilter )          propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY,          m_settings->InsertSizeFilter) );
503     if ( m_settings->HasIsDuplicateFilter )         propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY,         m_settings->IsDuplicateFilter) );
504     if ( m_settings->HasIsFailedQCFilter )          propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY,          m_settings->IsFailedQCFilter) );
505     if ( m_settings->HasIsFirstMateFilter )         propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY,         m_settings->IsFirstMateFilter) );
506     if ( m_settings->HasIsMappedFilter )            propertyTokens.insert( make_pair(ISMAPPED_PROPERTY,            m_settings->IsMappedFilter) );
507     if ( m_settings->HasIsMateMappedFilter )        propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY,        m_settings->IsMateMappedFilter) );
508     if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
509     if ( m_settings->HasIsPairedFilter )            propertyTokens.insert( make_pair(ISPAIRED_PROPERTY,            m_settings->IsPairedFilter) );
510     if ( m_settings->HasIsPrimaryAlignmentFilter )  propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY,  m_settings->IsPrimaryAlignmentFilter) );
511     if ( m_settings->HasIsProperPairFilter )        propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY,        m_settings->IsProperPairFilter) );
512     if ( m_settings->HasIsReverseStrandFilter )     propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY,     m_settings->IsReverseStrandFilter) );
513     if ( m_settings->HasIsSecondMateFilter )        propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY,        m_settings->IsSecondMateFilter) );
514     if ( m_settings->HasMapQualityFilter )          propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY,          m_settings->MapQualityFilter) );
515     if ( m_settings->HasNameFilter )                propertyTokens.insert( make_pair(NAME_PROPERTY,                m_settings->NameFilter) );
516     if ( m_settings->HasQueryBasesFilter )          propertyTokens.insert( make_pair(QUERYBASES_PROPERTY,          m_settings->QueryBasesFilter) );
517
518     // send add these properties to filter set "COMMAND_LINE"
519     return AddPropertyTokensToFilter(CMD, propertyTokens);
520 }
521
522 bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
523   
524     // filter object parsing variables
525     Json::Value null(Json::nullValue);
526     Json::Value propertyValue;
527     
528     // store results
529     map<string, string> propertyTokens;
530     
531     // iterate over known properties
532     vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
533     vector<string>::const_iterator propertyNameEnd  = m_propertyNames.end();
534     for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
535         const string& propertyName = (*propertyNameIter);
536         
537         // if property defined in filter, add to token list
538         propertyValue = filterObject.get(propertyName, null);
539         if ( propertyValue != null ) 
540             propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
541     }
542   
543     // add this filter to engin
544     FilterEngine::addFilter(filterName);
545   
546     // add token list to this filter
547     return AddPropertyTokensToFilter(filterName, propertyTokens);
548 }
549
550 bool FilterTool::FilterToolPrivate::ParseScript(void) {
551   
552     // read in script contents from file
553     const string document = GetScriptContents();
554     
555     // set up JsonCPP reader and attempt to parse script
556     Json::Value root;
557     Json::Reader reader;
558     if ( !reader.parse(document, root) ) {
559         // use built-in error reporting mechanism to alert user what was wrong with the script
560         cerr  << "Failed to parse configuration\n" << reader.getFormatedErrorMessages();
561         return false;     
562     }
563
564     // initialize return status
565     bool success = true;
566     
567     // see if root object contains multiple filters
568     const Json::Value filters = root["filters"];
569     if ( !filters.isNull() ) {
570       
571         // iterate over any filters found
572         int filterIndex = 0;
573         Json::Value::const_iterator filtersIter = filters.begin();
574         Json::Value::const_iterator filtersEnd  = filters.end();
575         for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
576             Json::Value filter = (*filtersIter);
577             
578             // convert filter index to string
579             string filterName;
580             
581             // if id tag supplied
582             const Json::Value id = filter["id"];
583             if ( !id.isNull() ) {
584                 filterName = id.asString();
585             } 
586             
587             // use array index 
588             else {
589                 stringstream convert;
590                 convert << filterIndex;
591                 filterName = convert.str();
592             }
593             
594             // create & parse filter 
595             success &= ParseFilterObject(filterName, filter);
596         }
597         
598         // see if user defined "rule" for these filters
599         const Json::Value rule = root["rule"];
600         if ( !rule.isNull() ) {
601             cout << "found rule: " << rule.asString() << endl;
602         } else {
603             cout << "no rule found!" << endl;
604         }
605           
606         return success;
607     } 
608     
609     // otherwise, root is the only filter (just contains properties)
610     // create & parse filter named "ROOT"
611     else success = ParseFilterObject("ROOT", root);
612     
613     // return success/failure
614     return success;
615 }
616
617
618 bool FilterTool::FilterToolPrivate::Run(void) {
619     
620     // set to default input if none provided
621     if ( !m_settings->HasInputBamFilename ) 
622         m_settings->InputFiles.push_back(Options::StandardIn());
623     
624     // initialize defined properties & user-specified filters
625     // quit if failed
626     if ( !SetupFilters() ) return 1;
627
628     // open reader without index
629     BamMultiReader reader;
630     reader.Open(m_settings->InputFiles, false, true);
631     const string headerText = reader.GetHeaderText();
632     m_references = reader.GetReferenceData();
633     
634     // open writer
635     BamWriter writer;
636     bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
637     writer.Open(m_settings->OutputFilename, headerText, m_references, writeUncompressed);
638     
639     BamAlignment al;
640     
641     // if no region specified, filter entire file 
642     if ( !m_settings->HasRegion ) {
643         while ( reader.GetNextAlignment(al) ) {
644             if ( CheckAlignment(al) ) 
645                 writer.SaveAlignment(al);
646         }
647     }
648     
649     // otherwise attempt to use region as constraint
650     else {
651         
652         // if region string parses OK
653         BamRegion region;
654         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
655
656             // attempt to re-open reader with index files
657             reader.Close();
658             bool openedOK = reader.Open(m_settings->InputFiles, true, true );
659             
660             // if error
661             if ( !openedOK ) {
662                 cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
663                 return 1;
664             }
665             
666             // if index data available, we can use SetRegion
667             if ( reader.IsIndexLoaded() ) {
668               
669                 // attempt to use SetRegion(), if failed report error
670                 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
671                     cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
672                     reader.Close();
673                     return 1;
674                 } 
675               
676                 // everything checks out, just iterate through specified region, filtering alignments
677                 while ( reader.GetNextAlignmentCore(al) )
678                     if ( CheckAlignment(al) ) 
679                         writer.SaveAlignment(al);
680             } 
681             
682             // no index data available, we have to iterate through until we
683             // find overlapping alignments
684             else {
685                 while( reader.GetNextAlignmentCore(al) ) {
686                     if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
687                           (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) 
688                     {
689                         if ( CheckAlignment(al) ) 
690                             writer.SaveAlignment(al);
691                     }
692                 }
693             }
694         } 
695         
696         // error parsing REGION string
697         else {
698             cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
699             cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
700             reader.Close();
701             return 1;
702         }
703     }
704
705     // clean up & exit
706     reader.Close();
707     writer.Close();
708     return 0;
709 }
710
711 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
712   
713     // add known properties to FilterEngine
714     InitProperties();
715     
716     // parse script for filter rules, if given
717     if ( m_settings->HasScriptFilename ) return ParseScript();
718     
719     // otherwise check command line for filters
720     else return ParseCommandLine();
721 }