1 // ***************************************************************************
2 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 21 September 2010
7 // ---------------------------------------------------------------------------
8 // Filters BAM file(s) according to some user-specified criteria.
9 // ***************************************************************************
16 #include "bamtools_filter.h"
17 #include "bamtools_filter_engine.h"
18 #include "bamtools_options.h"
19 #include "bamtools_utilities.h"
20 #include "BamReader.h"
21 #include "BamMultiReader.h"
22 #include "BamWriter.h"
23 #include "jsoncpp/json.h"
25 using namespace BamTools;
30 // -------------------------------
31 // string literal constants
34 const string ALIGNMENTFLAG_PROPERTY = "alignmentFlag";
35 const string INSERTSIZE_PROPERTY = "insertSize";
36 const string ISDUPLICATE_PROPERTY = "isDuplicate";
37 const string ISFAILEDQC_PROPERTY = "isFailedQC";
38 const string ISFIRSTMATE_PROPERTY = "isFirstMate";
39 const string ISMAPPED_PROPERTY = "isMapped";
40 const string ISMATEMAPPED_PROPERTY = "isMateMapped";
41 const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand";
42 const string ISPAIRED_PROPERTY = "isPaired";
43 const string ISPRIMARYALIGNMENT_PROPERTY = "isPrimaryAlignment";
44 const string ISPROPERPAIR_PROPERTY = "isProperPair";
45 const string ISREVERSESTRAND_PROPERTY = "isReverseStrand";
46 const string ISSECONDMATE_PROPERTY = "isSecondMate";
47 const string MAPQUALITY_PROPERTY = "mapQuality";
48 const string MATEPOSITION_PROPERTY = "matePosition";
49 const string MATEREFERENCE_PROPERTY = "mateReference";
50 const string NAME_PROPERTY = "name";
51 const string POSITION_PROPERTY = "position";
52 const string QUERYBASES_PROPERTY = "queryBases";
53 const string REFERENCE_PROPERTY = "reference";
54 const string CIGAR_PROPERTY = "cigar";
57 const string TRUE_STR = "true";
58 const string FALSE_STR = "false";
60 RefVector filterToolReferences;
62 struct BamAlignmentChecker {
63 bool check(const PropertyFilter& filter, const BamAlignment& al) {
65 bool keepAlignment = true;
66 const PropertyMap& properties = filter.Properties;
67 PropertyMap::const_iterator propertyIter = properties.begin();
68 PropertyMap::const_iterator propertyEnd = properties.end();
69 for ( ; propertyIter != propertyEnd; ++propertyIter ) {
71 // check alignment data field depending on propertyName
72 const string& propertyName = (*propertyIter).first;
73 const PropertyFilterValue& valueFilter = (*propertyIter).second;
75 if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= valueFilter.check(al.AlignmentFlag);
76 else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= valueFilter.check(al.InsertSize);
77 else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsDuplicate());
78 else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFailedQC());
79 else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFirstMate());
80 else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMapped());
81 else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateMapped());
82 else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
83 else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPaired());
84 else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
85 else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= valueFilter.check(al.IsProperPair());
86 else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsReverseStrand());
87 else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsSecondMate());
88 else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality);
89 else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
90 else if ( propertyName == MATEREFERENCE_PROPERTY ) {
91 if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
92 BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
93 const string& refName = filterToolReferences.at(al.MateRefID).RefName;
94 keepAlignment &= valueFilter.check(refName);
96 else if ( propertyName == NAME_PROPERTY ) keepAlignment &= valueFilter.check(al.Name);
97 else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= valueFilter.check(al.Position);
98 else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= valueFilter.check(al.QueryBases);
99 else if ( propertyName == REFERENCE_PROPERTY ) {
100 BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
101 const string& refName = filterToolReferences.at(al.RefID).RefName;
102 keepAlignment &= valueFilter.check(refName);
104 else if ( propertyName == CIGAR_PROPERTY ) {
105 stringstream cigarSs;
106 const vector<CigarOp>& cigarData = al.CigarData;
107 if ( !cigarData.empty() ) {
108 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
109 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
110 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
111 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
112 const CigarOp& op = (*cigarIter);
113 cigarSs << op.Length << op.Type;
115 keepAlignment &= valueFilter.check(cigarSs.str());
118 else BAMTOOLS_ASSERT_UNREACHABLE;
120 // if alignment fails at ANY point, just quit and return false
121 if ( !keepAlignment )
125 BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
126 return keepAlignment;
130 } // namespace BamTools
132 // ---------------------------------------------
133 // FilterToolPrivate declaration
135 class FilterTool::FilterToolPrivate {
139 FilterToolPrivate(FilterTool::FilterSettings* settings);
140 ~FilterToolPrivate(void);
142 // 'public' interface
148 bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
149 bool CheckAlignment(const BamAlignment& al);
150 const string GetScriptContents(void);
151 void InitProperties(void);
152 bool ParseCommandLine(void);
153 bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
154 bool ParseScript(void);
155 bool SetupFilters(void);
159 vector<string> m_propertyNames;
160 FilterTool::FilterSettings* m_settings;
161 FilterEngine<BamAlignmentChecker> m_filterEngine;
164 // ---------------------------------------------
165 // FilterSettings implementation
167 struct FilterTool::FilterSettings {
169 // ----------------------------------
173 bool HasInputBamFilename;
174 bool HasOutputBamFilename;
176 bool HasScriptFilename;
177 bool IsForceCompression;
180 vector<string> InputFiles;
181 string OutputFilename;
183 string ScriptFilename;
185 // -----------------------------------
186 // General filter opts
189 bool HasAlignmentFlagFilter;
190 bool HasInsertSizeFilter;
191 bool HasMapQualityFilter;
193 bool HasQueryBasesFilter;
195 // bool HasTagFilters;
198 string AlignmentFlagFilter;
199 string InsertSizeFilter;
201 string MapQualityFilter;
202 string QueryBasesFilter;
204 // vector<string> TagFilters;
206 // -----------------------------------
207 // AlignmentFlag filter opts
210 bool HasIsDuplicateFilter;
211 bool HasIsFailedQCFilter;
212 bool HasIsFirstMateFilter;
213 bool HasIsMappedFilter;
214 bool HasIsMateMappedFilter;
215 bool HasIsMateReverseStrandFilter;
216 bool HasIsPairedFilter;
217 bool HasIsPrimaryAlignmentFilter;
218 bool HasIsProperPairFilter;
219 bool HasIsReverseStrandFilter;
220 bool HasIsSecondMateFilter;
223 string IsDuplicateFilter;
224 string IsFailedQCFilter;
225 string IsFirstMateFilter;
226 string IsMappedFilter;
227 string IsMateMappedFilter;
228 string IsMateReverseStrandFilter;
229 string IsPairedFilter;
230 string IsPrimaryAlignmentFilter;
231 string IsProperPairFilter;
232 string IsReverseStrandFilter;
233 string IsSecondMateFilter;
235 // ---------------------------------
239 : HasInputBamFilename(false)
240 , HasOutputBamFilename(false)
242 , HasScriptFilename(false)
243 , IsForceCompression(false)
244 , OutputFilename(Options::StandardOut())
245 , HasAlignmentFlagFilter(false)
246 , HasInsertSizeFilter(false)
247 , HasMapQualityFilter(false)
248 , HasNameFilter(false)
249 , HasQueryBasesFilter(false)
250 // , HasTagFilters(false)
251 , HasIsDuplicateFilter(false)
252 , HasIsFailedQCFilter(false)
253 , HasIsFirstMateFilter(false)
254 , HasIsMappedFilter(false)
255 , HasIsMateMappedFilter(false)
256 , HasIsMateReverseStrandFilter(false)
257 , HasIsPairedFilter(false)
258 , HasIsPrimaryAlignmentFilter(false)
259 , HasIsProperPairFilter(false)
260 , HasIsReverseStrandFilter(false)
261 , HasIsSecondMateFilter(false)
262 , IsDuplicateFilter(TRUE_STR)
263 , IsFailedQCFilter(TRUE_STR)
264 , IsFirstMateFilter(TRUE_STR)
265 , IsMappedFilter(TRUE_STR)
266 , IsMateMappedFilter(TRUE_STR)
267 , IsMateReverseStrandFilter(TRUE_STR)
268 , IsPairedFilter(TRUE_STR)
269 , IsPrimaryAlignmentFilter(TRUE_STR)
270 , IsProperPairFilter(TRUE_STR)
271 , IsReverseStrandFilter(TRUE_STR)
272 , IsSecondMateFilter(TRUE_STR)
276 // ---------------------------------------------
277 // FilterTool implementation
279 FilterTool::FilterTool(void)
281 , m_settings(new FilterSettings)
284 // set program details
285 Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "-in <filename> [-in <filename> ... ] -out <filename> -region <REGION> [ [-script <filename] | [filterOptions] ]");
287 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
288 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn());
289 Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
290 Options::AddValueOption("-region", "REGION", "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
291 Options::AddValueOption("-script", "filename", "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
292 Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
294 OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
295 Options::AddValueOption("-alignmentFlag", "int", "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
296 Options::AddValueOption("-insertSize", "int", "keep reads with insert size that mathces pattern", "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
297 Options::AddValueOption("-mapQuality", "[0-255]", "keep reads with map quality that matches pattern", "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
298 Options::AddValueOption("-name", "string", "keep reads with name that matches pattern", "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts);
299 Options::AddValueOption("-queryBases", "string", "keep reads with motif that mathces pattern", "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
301 // Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair. If multiple tags are given, reads must match all", "", m_settings->HasTagFilters, m_settings->TagFilters, FilterOpts);
303 OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
304 Options::AddValueOption("-isDuplicate", "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter, m_settings->IsDuplicateFilter, AlignmentFlagOpts, TRUE_STR);
305 Options::AddValueOption("-isFailedQC", "true/false", "keep only alignments that failed QC?", "", m_settings->HasIsFailedQCFilter, m_settings->IsFailedQCFilter, AlignmentFlagOpts, TRUE_STR);
306 Options::AddValueOption("-isFirstMate", "true/false", "keep only alignments marked as first mate?", "", m_settings->HasIsFirstMateFilter, m_settings->IsFirstMateFilter, AlignmentFlagOpts, TRUE_STR);
307 Options::AddValueOption("-isMapped", "true/false", "keep only alignments that were mapped?", "", m_settings->HasIsMappedFilter, m_settings->IsMappedFilter, AlignmentFlagOpts, TRUE_STR);
308 Options::AddValueOption("-isMateMapped", "true/false", "keep only alignments with mates that mapped", "", m_settings->HasIsMateMappedFilter, m_settings->IsMateMappedFilter, AlignmentFlagOpts, TRUE_STR);
309 Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
310 Options::AddValueOption("-isPaired", "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter, m_settings->IsPairedFilter, AlignmentFlagOpts, TRUE_STR);
311 Options::AddValueOption("-isPrimaryAlignment", "true/false", "keep only alignments marked as primary?", "", m_settings->HasIsPrimaryAlignmentFilter, m_settings->IsPrimaryAlignmentFilter, AlignmentFlagOpts, TRUE_STR);
312 Options::AddValueOption("-isProperPair", "true/false", "keep only alignments that passed PE resolution?", "", m_settings->HasIsProperPairFilter, m_settings->IsProperPairFilter, AlignmentFlagOpts, TRUE_STR);
313 Options::AddValueOption("-isReverseStrand", "true/false", "keep only alignments on reverse strand?", "", m_settings->HasIsReverseStrandFilter, m_settings->IsReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
314 Options::AddValueOption("-isSecondMate", "true/false", "keep only alignments marked as second mate?", "", m_settings->HasIsSecondMateFilter, m_settings->IsSecondMateFilter, AlignmentFlagOpts, TRUE_STR);
317 FilterTool::~FilterTool(void) {
325 int FilterTool::Help(void) {
326 Options::DisplayHelp();
330 int FilterTool::Run(int argc, char* argv[]) {
332 // parse command line arguments
333 Options::Parse(argc, argv, 1);
335 // run internal FilterTool implementation, return success/fail
336 m_impl = new FilterToolPrivate(m_settings);
338 if ( m_impl->Run() ) return 0;
342 // ---------------------------------------------
343 // FilterToolPrivate implementation
346 FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
347 : m_settings(settings)
351 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
353 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
355 // dummy temp values for token parsing
358 uint16_t uint16Value;
359 uint32_t uint32Value;
361 PropertyFilterValue::ValueCompareType type;
363 // iterate over property token map
364 map<string, string>::const_iterator mapIter = propertyTokens.begin();
365 map<string, string>::const_iterator mapEnd = propertyTokens.end();
366 for ( ; mapIter != mapEnd; ++mapIter ) {
368 const string& propertyName = (*mapIter).first;
369 const string& token = (*mapIter).second;
371 // ------------------------------
372 // convert token to value & compare type
373 // then add to filter engine
376 if ( propertyName == ISDUPLICATE_PROPERTY ||
377 propertyName == ISFAILEDQC_PROPERTY ||
378 propertyName == ISFIRSTMATE_PROPERTY ||
379 propertyName == ISMAPPED_PROPERTY ||
380 propertyName == ISMATEMAPPED_PROPERTY ||
381 propertyName == ISMATEREVERSESTRAND_PROPERTY ||
382 propertyName == ISPAIRED_PROPERTY ||
383 propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
384 propertyName == ISPROPERPAIR_PROPERTY ||
385 propertyName == ISREVERSESTRAND_PROPERTY ||
386 propertyName == ISSECONDMATE_PROPERTY
389 m_filterEngine.parseToken(token, boolValue, type);
390 m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
393 // int32_t conversion
394 else if ( propertyName == INSERTSIZE_PROPERTY ||
395 propertyName == MATEPOSITION_PROPERTY ||
396 propertyName == POSITION_PROPERTY
399 m_filterEngine.parseToken(token, int32Value, type);
400 m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
403 // uint16_t conversion
404 else if ( propertyName == MAPQUALITY_PROPERTY )
406 m_filterEngine.parseToken(token, uint16Value, type);
407 m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
410 // uint32_t conversion
411 else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
413 m_filterEngine.parseToken(token, uint32Value, type);
414 m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
418 else if ( propertyName == MATEREFERENCE_PROPERTY ||
419 propertyName == NAME_PROPERTY ||
420 propertyName == QUERYBASES_PROPERTY ||
421 propertyName == REFERENCE_PROPERTY ||
422 propertyName == CIGAR_PROPERTY
425 m_filterEngine.parseToken(token, stringValue, type);
426 m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
429 // else unknown property
431 cerr << "Unknown property: " << propertyName << "!" << endl;
438 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
439 return m_filterEngine.check(al);
442 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
444 // open script for reading
445 FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
447 cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl;
451 // read in entire script contents
453 ostringstream docStream("");
456 // peek ahead, make sure there is data available
457 char ch = fgetc(inFile);
459 if( feof(inFile) ) break;
461 // read next block of data
462 if ( fgets(buffer, 1024, inFile) == 0 ) {
463 cerr << "FilterTool error : could not read from script" << endl;
473 // import buffer contents to document, return
474 string document = docStream.str();
478 void FilterTool::FilterToolPrivate::InitProperties(void) {
480 // store property names in vector
481 m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
482 m_propertyNames.push_back(INSERTSIZE_PROPERTY);
483 m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
484 m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
485 m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
486 m_propertyNames.push_back(ISMAPPED_PROPERTY);
487 m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
488 m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
489 m_propertyNames.push_back(ISPAIRED_PROPERTY);
490 m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
491 m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
492 m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
493 m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
494 m_propertyNames.push_back(MAPQUALITY_PROPERTY);
495 m_propertyNames.push_back(MATEPOSITION_PROPERTY);
496 m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
497 m_propertyNames.push_back(NAME_PROPERTY);
498 m_propertyNames.push_back(POSITION_PROPERTY);
499 m_propertyNames.push_back(QUERYBASES_PROPERTY);
500 m_propertyNames.push_back(REFERENCE_PROPERTY);
501 m_propertyNames.push_back(CIGAR_PROPERTY);
503 // add vector contents to FilterEngine<BamAlignmentChecker>
504 vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
505 vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
506 for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
507 m_filterEngine.addProperty((*propertyNameIter));
510 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
512 // add a rule set to filter engine
513 const string CMD = "COMMAND_LINE";
514 m_filterEngine.addFilter(CMD);
516 // map property names to command line args
517 map<string, string> propertyTokens;
518 if ( m_settings->HasAlignmentFlagFilter ) propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY, m_settings->AlignmentFlagFilter) );
519 if ( m_settings->HasInsertSizeFilter ) propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY, m_settings->InsertSizeFilter) );
520 if ( m_settings->HasIsDuplicateFilter ) propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY, m_settings->IsDuplicateFilter) );
521 if ( m_settings->HasIsFailedQCFilter ) propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY, m_settings->IsFailedQCFilter) );
522 if ( m_settings->HasIsFirstMateFilter ) propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY, m_settings->IsFirstMateFilter) );
523 if ( m_settings->HasIsMappedFilter ) propertyTokens.insert( make_pair(ISMAPPED_PROPERTY, m_settings->IsMappedFilter) );
524 if ( m_settings->HasIsMateMappedFilter ) propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY, m_settings->IsMateMappedFilter) );
525 if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
526 if ( m_settings->HasIsPairedFilter ) propertyTokens.insert( make_pair(ISPAIRED_PROPERTY, m_settings->IsPairedFilter) );
527 if ( m_settings->HasIsPrimaryAlignmentFilter ) propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY, m_settings->IsPrimaryAlignmentFilter) );
528 if ( m_settings->HasIsProperPairFilter ) propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY, m_settings->IsProperPairFilter) );
529 if ( m_settings->HasIsReverseStrandFilter ) propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY, m_settings->IsReverseStrandFilter) );
530 if ( m_settings->HasIsSecondMateFilter ) propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY, m_settings->IsSecondMateFilter) );
531 if ( m_settings->HasMapQualityFilter ) propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY, m_settings->MapQualityFilter) );
532 if ( m_settings->HasNameFilter ) propertyTokens.insert( make_pair(NAME_PROPERTY, m_settings->NameFilter) );
533 if ( m_settings->HasQueryBasesFilter ) propertyTokens.insert( make_pair(QUERYBASES_PROPERTY, m_settings->QueryBasesFilter) );
535 // send add these properties to filter set "COMMAND_LINE"
536 return AddPropertyTokensToFilter(CMD, propertyTokens);
539 bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
541 // filter object parsing variables
542 Json::Value null(Json::nullValue);
543 Json::Value propertyValue;
546 map<string, string> propertyTokens;
548 // iterate over known properties
549 vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
550 vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
551 for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
552 const string& propertyName = (*propertyNameIter);
554 // if property defined in filter, add to token list
555 propertyValue = filterObject.get(propertyName, null);
556 if ( propertyValue != null )
557 propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
560 // add this filter to engin
561 m_filterEngine.addFilter(filterName);
563 // add token list to this filter
564 return AddPropertyTokensToFilter(filterName, propertyTokens);
567 bool FilterTool::FilterToolPrivate::ParseScript(void) {
569 // read in script contents from file
570 const string document = GetScriptContents();
572 // set up JsonCPP reader and attempt to parse script
575 if ( !reader.parse(document, root) ) {
576 // use built-in error reporting mechanism to alert user what was wrong with the script
577 cerr << "Failed to parse configuration\n" << reader.getFormatedErrorMessages();
581 // initialize return status
584 // see if root object contains multiple filters
585 const Json::Value filters = root["filters"];
586 if ( !filters.isNull() ) {
588 // iterate over any filters found
590 Json::Value::const_iterator filtersIter = filters.begin();
591 Json::Value::const_iterator filtersEnd = filters.end();
592 for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
593 Json::Value filter = (*filtersIter);
595 // convert filter index to string
598 // if id tag supplied
599 const Json::Value id = filter["id"];
600 if ( !id.isNull() ) {
601 filterName = id.asString();
606 stringstream convert;
607 convert << filterIndex;
608 filterName = convert.str();
611 // create & parse filter
612 success &= ParseFilterObject(filterName, filter);
615 // see if user defined a "rule" for these filters
616 // otherwise, use filter engine's default rule behavior
617 string ruleString("");
618 const Json::Value rule = root["rule"];
619 if ( rule.isString() )
620 ruleString = rule.asString();
621 m_filterEngine.setRule(ruleString);
623 // return success/fail
627 // otherwise, root is the only filter (just contains properties)
628 // create & parse filter named "ROOT"
629 else success = ParseFilterObject("ROOT", root);
631 // return success/failure
636 bool FilterTool::FilterToolPrivate::Run(void) {
638 // set to default input if none provided
639 if ( !m_settings->HasInputBamFilename )
640 m_settings->InputFiles.push_back(Options::StandardIn());
642 // initialize defined properties & user-specified filters
644 if ( !SetupFilters() ) return 1;
646 // open reader without index
647 BamMultiReader reader;
648 if (!reader.Open(m_settings->InputFiles, false, true)) {
649 cerr << "Could not open input files for reading." << endl;
652 const string headerText = reader.GetHeaderText();
653 filterToolReferences = reader.GetReferenceData();
657 bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
658 if (!writer.Open(m_settings->OutputFilename, headerText, filterToolReferences, writeUncompressed)) {
659 cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl;
665 // if no region specified, filter entire file
666 if ( !m_settings->HasRegion ) {
667 while ( reader.GetNextAlignment(al) ) {
668 if ( CheckAlignment(al) )
669 writer.SaveAlignment(al);
673 // otherwise attempt to use region as constraint
676 // if region string parses OK
678 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
680 // attempt to re-open reader with index files
682 bool openedOK = reader.Open(m_settings->InputFiles, true, true );
686 cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
690 // if index data available, we can use SetRegion
691 if ( reader.IsIndexLoaded() ) {
693 // attempt to use SetRegion(), if failed report error
694 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
695 cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
700 // everything checks out, just iterate through specified region, filtering alignments
701 while ( reader.GetNextAlignmentCore(al) )
702 if ( CheckAlignment(al) )
703 writer.SaveAlignment(al);
706 // no index data available, we have to iterate through until we
707 // find overlapping alignments
709 while( reader.GetNextAlignmentCore(al) ) {
710 if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) &&
711 (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
713 if ( CheckAlignment(al) )
714 writer.SaveAlignment(al);
720 // error parsing REGION string
722 cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
723 cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
735 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
737 // add known properties to FilterEngine<BamAlignmentChecker>
740 // parse script for filter rules, if given
741 if ( m_settings->HasScriptFilename ) return ParseScript();
743 // otherwise check command line for filters
744 else return ParseCommandLine();