1 // ***************************************************************************
2 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 4 October 2010
7 // ---------------------------------------------------------------------------
8 // Filters BAM file(s) according to some user-specified criteria.
9 // ***************************************************************************
16 #include "bamtools_filter.h"
17 #include "bamtools_filter_engine.h"
18 #include "bamtools_options.h"
19 #include "bamtools_utilities.h"
20 #include "BamReader.h"
21 #include "BamMultiReader.h"
22 #include "BamWriter.h"
23 #include "jsoncpp/json.h"
25 using namespace BamTools;
30 // -------------------------------
31 // string literal constants
34 const string ALIGNMENTFLAG_PROPERTY = "alignmentFlag";
35 const string CIGAR_PROPERTY = "cigar";
36 const string INSERTSIZE_PROPERTY = "insertSize";
37 const string ISDUPLICATE_PROPERTY = "isDuplicate";
38 const string ISFAILEDQC_PROPERTY = "isFailedQC";
39 const string ISFIRSTMATE_PROPERTY = "isFirstMate";
40 const string ISMAPPED_PROPERTY = "isMapped";
41 const string ISMATEMAPPED_PROPERTY = "isMateMapped";
42 const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand";
43 const string ISPAIRED_PROPERTY = "isPaired";
44 const string ISPRIMARYALIGNMENT_PROPERTY = "isPrimaryAlignment";
45 const string ISPROPERPAIR_PROPERTY = "isProperPair";
46 const string ISREVERSESTRAND_PROPERTY = "isReverseStrand";
47 const string ISSECONDMATE_PROPERTY = "isSecondMate";
48 const string MAPQUALITY_PROPERTY = "mapQuality";
49 const string MATEPOSITION_PROPERTY = "matePosition";
50 const string MATEREFERENCE_PROPERTY = "mateReference";
51 const string NAME_PROPERTY = "name";
52 const string POSITION_PROPERTY = "position";
53 const string QUERYBASES_PROPERTY = "queryBases";
54 const string REFERENCE_PROPERTY = "reference";
55 const string TAG_PROPERTY = "tag";
58 const string TRUE_STR = "true";
59 const string FALSE_STR = "false";
61 RefVector filterToolReferences;
63 struct BamAlignmentChecker {
64 bool check(const PropertyFilter& filter, const BamAlignment& al) {
66 bool keepAlignment = true;
67 const PropertyMap& properties = filter.Properties;
68 PropertyMap::const_iterator propertyIter = properties.begin();
69 PropertyMap::const_iterator propertyEnd = properties.end();
70 for ( ; propertyIter != propertyEnd; ++propertyIter ) {
72 // check alignment data field depending on propertyName
73 const string& propertyName = (*propertyIter).first;
74 const PropertyFilterValue& valueFilter = (*propertyIter).second;
76 if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= valueFilter.check(al.AlignmentFlag);
77 else if ( propertyName == CIGAR_PROPERTY ) {
79 const vector<CigarOp>& cigarData = al.CigarData;
80 if ( !cigarData.empty() ) {
81 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
82 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
83 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
84 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
85 const CigarOp& op = (*cigarIter);
86 cigarSs << op.Length << op.Type;
88 keepAlignment &= valueFilter.check(cigarSs.str());
91 else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= valueFilter.check(al.InsertSize);
92 else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsDuplicate());
93 else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFailedQC());
94 else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFirstMate());
95 else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMapped());
96 else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateMapped());
97 else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
98 else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPaired());
99 else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
100 else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= valueFilter.check(al.IsProperPair());
101 else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsReverseStrand());
102 else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsSecondMate());
103 else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality);
104 else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
105 else if ( propertyName == MATEREFERENCE_PROPERTY ) {
106 if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
107 BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
108 const string& refName = filterToolReferences.at(al.MateRefID).RefName;
109 keepAlignment &= valueFilter.check(refName);
111 else if ( propertyName == NAME_PROPERTY ) keepAlignment &= valueFilter.check(al.Name);
112 else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= valueFilter.check(al.Position);
113 else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= valueFilter.check(al.QueryBases);
114 else if ( propertyName == REFERENCE_PROPERTY ) {
115 BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
116 const string& refName = filterToolReferences.at(al.RefID).RefName;
117 keepAlignment &= valueFilter.check(refName);
119 else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
120 else BAMTOOLS_ASSERT_UNREACHABLE;
122 // if alignment fails at ANY point, just quit and return false
123 if ( !keepAlignment ) return false;
126 BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
127 return keepAlignment;
130 bool checkAlignmentTag(const PropertyFilterValue& valueFilter, const BamAlignment& al) {
132 // ensure filter contains string data
133 Variant entireTagFilter = valueFilter.Value;
134 if ( !entireTagFilter.is_type<string>() ) return false;
136 // localize string from variant
137 const string& entireTagFilterString = entireTagFilter.get<string>();
139 // ensure we have at least "XX:x"
140 if ( entireTagFilterString.length() < 4 ) return false;
142 // get tagName & lookup in alignment
143 // if found, set tagType to tag type character
144 // if not found, return false
145 const string& tagName = entireTagFilterString.substr(0,2);
147 if ( !al.GetTagType(tagName, tagType) ) return false;
149 // remove tagName & ":" from beginning tagFilter
150 string tagFilterString = entireTagFilterString.substr(3);
152 // switch on tag type to set tag query value & parse filter token
153 int32_t intFilterValue, intQueryValue;
154 uint32_t uintFilterValue, uintQueryValue;
155 float realFilterValue, realQueryValue;
156 string stringFilterValue, stringQueryValue;
158 PropertyFilterValue tagFilter;
159 PropertyFilterValue::ValueCompareType compareType;
160 bool keepAlignment = false;
163 // signed int tag type
167 if ( al.GetTag(tagName, intQueryValue) ) {
168 if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
169 tagFilter.Value = intFilterValue;
170 tagFilter.Type = compareType;
171 keepAlignment = tagFilter.check(intQueryValue);
176 // unsigned int tag type
180 if ( al.GetTag(tagName, uintQueryValue) ) {
181 if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
182 tagFilter.Value = uintFilterValue;
183 tagFilter.Type = compareType;
184 keepAlignment = tagFilter.check(uintQueryValue);
191 if ( al.GetTag(tagName, realQueryValue) ) {
192 if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
193 tagFilter.Value = realFilterValue;
194 tagFilter.Type = compareType;
195 keepAlignment = tagFilter.check(realQueryValue);
204 if ( al.GetTag(tagName, stringQueryValue) ) {
205 if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
206 tagFilter.Value = stringFilterValue;
207 tagFilter.Type = compareType;
208 keepAlignment = tagFilter.check(stringQueryValue);
215 keepAlignment = false;
218 return keepAlignment;
222 } // namespace BamTools
224 // ---------------------------------------------
225 // FilterToolPrivate declaration
227 class FilterTool::FilterToolPrivate {
231 FilterToolPrivate(FilterTool::FilterSettings* settings);
232 ~FilterToolPrivate(void);
234 // 'public' interface
240 bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
241 bool CheckAlignment(const BamAlignment& al);
242 const string GetScriptContents(void);
243 void InitProperties(void);
244 bool ParseCommandLine(void);
245 bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
246 bool ParseScript(void);
247 bool SetupFilters(void);
251 vector<string> m_propertyNames;
252 FilterTool::FilterSettings* m_settings;
253 FilterEngine<BamAlignmentChecker> m_filterEngine;
256 // ---------------------------------------------
257 // FilterSettings implementation
259 struct FilterTool::FilterSettings {
261 // ----------------------------------
265 bool HasInputBamFilename;
266 bool HasOutputBamFilename;
268 bool HasScriptFilename;
269 bool IsForceCompression;
272 vector<string> InputFiles;
273 string OutputFilename;
275 string ScriptFilename;
277 // -----------------------------------
278 // General filter opts
281 bool HasAlignmentFlagFilter;
282 bool HasInsertSizeFilter;
283 bool HasMapQualityFilter;
285 bool HasQueryBasesFilter;
286 bool HasTagFilter; //(s)
289 string AlignmentFlagFilter;
290 string InsertSizeFilter;
292 string MapQualityFilter;
293 string QueryBasesFilter;
294 string TagFilter; // support multiple ?
296 // -----------------------------------
297 // AlignmentFlag filter opts
300 bool HasIsDuplicateFilter;
301 bool HasIsFailedQCFilter;
302 bool HasIsFirstMateFilter;
303 bool HasIsMappedFilter;
304 bool HasIsMateMappedFilter;
305 bool HasIsMateReverseStrandFilter;
306 bool HasIsPairedFilter;
307 bool HasIsPrimaryAlignmentFilter;
308 bool HasIsProperPairFilter;
309 bool HasIsReverseStrandFilter;
310 bool HasIsSecondMateFilter;
313 string IsDuplicateFilter;
314 string IsFailedQCFilter;
315 string IsFirstMateFilter;
316 string IsMappedFilter;
317 string IsMateMappedFilter;
318 string IsMateReverseStrandFilter;
319 string IsPairedFilter;
320 string IsPrimaryAlignmentFilter;
321 string IsProperPairFilter;
322 string IsReverseStrandFilter;
323 string IsSecondMateFilter;
325 // ---------------------------------
329 : HasInputBamFilename(false)
330 , HasOutputBamFilename(false)
332 , HasScriptFilename(false)
333 , IsForceCompression(false)
334 , OutputFilename(Options::StandardOut())
335 , HasAlignmentFlagFilter(false)
336 , HasInsertSizeFilter(false)
337 , HasMapQualityFilter(false)
338 , HasNameFilter(false)
339 , HasQueryBasesFilter(false)
340 , HasTagFilter(false)
341 , HasIsDuplicateFilter(false)
342 , HasIsFailedQCFilter(false)
343 , HasIsFirstMateFilter(false)
344 , HasIsMappedFilter(false)
345 , HasIsMateMappedFilter(false)
346 , HasIsMateReverseStrandFilter(false)
347 , HasIsPairedFilter(false)
348 , HasIsPrimaryAlignmentFilter(false)
349 , HasIsProperPairFilter(false)
350 , HasIsReverseStrandFilter(false)
351 , HasIsSecondMateFilter(false)
352 , IsDuplicateFilter(TRUE_STR)
353 , IsFailedQCFilter(TRUE_STR)
354 , IsFirstMateFilter(TRUE_STR)
355 , IsMappedFilter(TRUE_STR)
356 , IsMateMappedFilter(TRUE_STR)
357 , IsMateReverseStrandFilter(TRUE_STR)
358 , IsPairedFilter(TRUE_STR)
359 , IsPrimaryAlignmentFilter(TRUE_STR)
360 , IsProperPairFilter(TRUE_STR)
361 , IsReverseStrandFilter(TRUE_STR)
362 , IsSecondMateFilter(TRUE_STR)
366 // ---------------------------------------------
367 // FilterTool implementation
369 FilterTool::FilterTool(void)
371 , m_settings(new FilterSettings)
374 // set program details
375 Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>] [ [-script <filename] | [filterOptions] ]");
377 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
378 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn());
379 Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
380 Options::AddValueOption("-region", "REGION", "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
381 Options::AddValueOption("-script", "filename", "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
382 Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
384 OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
385 Options::AddValueOption("-alignmentFlag", "int", "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
386 Options::AddValueOption("-insertSize", "int", "keep reads with insert size that mathces pattern", "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
387 Options::AddValueOption("-mapQuality", "[0-255]", "keep reads with map quality that matches pattern", "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
388 Options::AddValueOption("-name", "string", "keep reads with name that matches pattern", "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts);
389 Options::AddValueOption("-queryBases", "string", "keep reads with motif that mathces pattern", "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
390 Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair", "", m_settings->HasTagFilter, m_settings->TagFilter, FilterOpts);
392 OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
393 Options::AddValueOption("-isDuplicate", "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter, m_settings->IsDuplicateFilter, AlignmentFlagOpts, TRUE_STR);
394 Options::AddValueOption("-isFailedQC", "true/false", "keep only alignments that failed QC?", "", m_settings->HasIsFailedQCFilter, m_settings->IsFailedQCFilter, AlignmentFlagOpts, TRUE_STR);
395 Options::AddValueOption("-isFirstMate", "true/false", "keep only alignments marked as first mate?", "", m_settings->HasIsFirstMateFilter, m_settings->IsFirstMateFilter, AlignmentFlagOpts, TRUE_STR);
396 Options::AddValueOption("-isMapped", "true/false", "keep only alignments that were mapped?", "", m_settings->HasIsMappedFilter, m_settings->IsMappedFilter, AlignmentFlagOpts, TRUE_STR);
397 Options::AddValueOption("-isMateMapped", "true/false", "keep only alignments with mates that mapped", "", m_settings->HasIsMateMappedFilter, m_settings->IsMateMappedFilter, AlignmentFlagOpts, TRUE_STR);
398 Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
399 Options::AddValueOption("-isPaired", "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter, m_settings->IsPairedFilter, AlignmentFlagOpts, TRUE_STR);
400 Options::AddValueOption("-isPrimaryAlignment", "true/false", "keep only alignments marked as primary?", "", m_settings->HasIsPrimaryAlignmentFilter, m_settings->IsPrimaryAlignmentFilter, AlignmentFlagOpts, TRUE_STR);
401 Options::AddValueOption("-isProperPair", "true/false", "keep only alignments that passed PE resolution?", "", m_settings->HasIsProperPairFilter, m_settings->IsProperPairFilter, AlignmentFlagOpts, TRUE_STR);
402 Options::AddValueOption("-isReverseStrand", "true/false", "keep only alignments on reverse strand?", "", m_settings->HasIsReverseStrandFilter, m_settings->IsReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
403 Options::AddValueOption("-isSecondMate", "true/false", "keep only alignments marked as second mate?", "", m_settings->HasIsSecondMateFilter, m_settings->IsSecondMateFilter, AlignmentFlagOpts, TRUE_STR);
406 FilterTool::~FilterTool(void) {
414 int FilterTool::Help(void) {
415 Options::DisplayHelp();
419 int FilterTool::Run(int argc, char* argv[]) {
421 // parse command line arguments
422 Options::Parse(argc, argv, 1);
424 // run internal FilterTool implementation, return success/fail
425 m_impl = new FilterToolPrivate(m_settings);
427 if ( m_impl->Run() ) return 0;
431 // ---------------------------------------------
432 // FilterToolPrivate implementation
435 FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
436 : m_settings(settings)
440 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
442 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens) {
444 // dummy temp values for token parsing
447 uint16_t uint16Value;
448 uint32_t uint32Value;
450 PropertyFilterValue::ValueCompareType type;
452 // iterate over property token map
453 map<string, string>::const_iterator mapIter = propertyTokens.begin();
454 map<string, string>::const_iterator mapEnd = propertyTokens.end();
455 for ( ; mapIter != mapEnd; ++mapIter ) {
457 const string& propertyName = (*mapIter).first;
458 const string& token = (*mapIter).second;
460 // ------------------------------
461 // convert token to value & compare type
462 // then add to filter engine
465 if ( propertyName == ISDUPLICATE_PROPERTY ||
466 propertyName == ISFAILEDQC_PROPERTY ||
467 propertyName == ISFIRSTMATE_PROPERTY ||
468 propertyName == ISMAPPED_PROPERTY ||
469 propertyName == ISMATEMAPPED_PROPERTY ||
470 propertyName == ISMATEREVERSESTRAND_PROPERTY ||
471 propertyName == ISPAIRED_PROPERTY ||
472 propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
473 propertyName == ISPROPERPAIR_PROPERTY ||
474 propertyName == ISREVERSESTRAND_PROPERTY ||
475 propertyName == ISSECONDMATE_PROPERTY
478 FilterEngine<BamAlignmentChecker>::parseToken(token, boolValue, type);
479 m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
482 // int32_t conversion
483 else if ( propertyName == INSERTSIZE_PROPERTY ||
484 propertyName == MATEPOSITION_PROPERTY ||
485 propertyName == POSITION_PROPERTY
488 FilterEngine<BamAlignmentChecker>::parseToken(token, int32Value, type);
489 m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
492 // uint16_t conversion
493 else if ( propertyName == MAPQUALITY_PROPERTY )
495 FilterEngine<BamAlignmentChecker>::parseToken(token, uint16Value, type);
496 m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
499 // uint32_t conversion
500 else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
502 FilterEngine<BamAlignmentChecker>::parseToken(token, uint32Value, type);
503 m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
507 else if ( propertyName == CIGAR_PROPERTY ||
508 propertyName == MATEREFERENCE_PROPERTY ||
509 propertyName == NAME_PROPERTY ||
510 propertyName == QUERYBASES_PROPERTY ||
511 propertyName == REFERENCE_PROPERTY
514 FilterEngine<BamAlignmentChecker>::parseToken(token, stringValue, type);
515 m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
518 else if (propertyName == TAG_PROPERTY ) {
519 // this will be stored directly as the TAG:VALUE token
520 // (VALUE may contain compare ops, will be parsed out later)
521 m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
524 // else unknown property
526 cerr << "Unknown property: " << propertyName << "!" << endl;
533 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
534 return m_filterEngine.check(al);
537 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
539 // open script for reading
540 FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
542 cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl;
546 // read in entire script contents
548 ostringstream docStream("");
551 // peek ahead, make sure there is data available
552 char ch = fgetc(inFile);
554 if( feof(inFile) ) break;
556 // read next block of data
557 if ( fgets(buffer, 1024, inFile) == 0 ) {
558 cerr << "FilterTool error : could not read from script" << endl;
568 // import buffer contents to document, return
569 string document = docStream.str();
573 void FilterTool::FilterToolPrivate::InitProperties(void) {
575 // store property names in vector
576 m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
577 m_propertyNames.push_back(CIGAR_PROPERTY);
578 m_propertyNames.push_back(INSERTSIZE_PROPERTY);
579 m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
580 m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
581 m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
582 m_propertyNames.push_back(ISMAPPED_PROPERTY);
583 m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
584 m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
585 m_propertyNames.push_back(ISPAIRED_PROPERTY);
586 m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
587 m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
588 m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
589 m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
590 m_propertyNames.push_back(MAPQUALITY_PROPERTY);
591 m_propertyNames.push_back(MATEPOSITION_PROPERTY);
592 m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
593 m_propertyNames.push_back(NAME_PROPERTY);
594 m_propertyNames.push_back(POSITION_PROPERTY);
595 m_propertyNames.push_back(QUERYBASES_PROPERTY);
596 m_propertyNames.push_back(REFERENCE_PROPERTY);
597 m_propertyNames.push_back(TAG_PROPERTY);
599 // add vector contents to FilterEngine<BamAlignmentChecker>
600 vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
601 vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
602 for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
603 m_filterEngine.addProperty((*propertyNameIter));
606 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
608 // add a rule set to filter engine
609 const string CMD = "COMMAND_LINE";
610 m_filterEngine.addFilter(CMD);
612 // map property names to command line args
613 map<string, string> propertyTokens;
614 if ( m_settings->HasAlignmentFlagFilter ) propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY, m_settings->AlignmentFlagFilter) );
615 if ( m_settings->HasInsertSizeFilter ) propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY, m_settings->InsertSizeFilter) );
616 if ( m_settings->HasIsDuplicateFilter ) propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY, m_settings->IsDuplicateFilter) );
617 if ( m_settings->HasIsFailedQCFilter ) propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY, m_settings->IsFailedQCFilter) );
618 if ( m_settings->HasIsFirstMateFilter ) propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY, m_settings->IsFirstMateFilter) );
619 if ( m_settings->HasIsMappedFilter ) propertyTokens.insert( make_pair(ISMAPPED_PROPERTY, m_settings->IsMappedFilter) );
620 if ( m_settings->HasIsMateMappedFilter ) propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY, m_settings->IsMateMappedFilter) );
621 if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
622 if ( m_settings->HasIsPairedFilter ) propertyTokens.insert( make_pair(ISPAIRED_PROPERTY, m_settings->IsPairedFilter) );
623 if ( m_settings->HasIsPrimaryAlignmentFilter ) propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY, m_settings->IsPrimaryAlignmentFilter) );
624 if ( m_settings->HasIsProperPairFilter ) propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY, m_settings->IsProperPairFilter) );
625 if ( m_settings->HasIsReverseStrandFilter ) propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY, m_settings->IsReverseStrandFilter) );
626 if ( m_settings->HasIsSecondMateFilter ) propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY, m_settings->IsSecondMateFilter) );
627 if ( m_settings->HasMapQualityFilter ) propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY, m_settings->MapQualityFilter) );
628 if ( m_settings->HasNameFilter ) propertyTokens.insert( make_pair(NAME_PROPERTY, m_settings->NameFilter) );
629 if ( m_settings->HasQueryBasesFilter ) propertyTokens.insert( make_pair(QUERYBASES_PROPERTY, m_settings->QueryBasesFilter) );
630 if ( m_settings->HasTagFilter ) propertyTokens.insert( make_pair(TAG_PROPERTY, m_settings->TagFilter) );
632 // send add these properties to filter set "COMMAND_LINE"
633 return AddPropertyTokensToFilter(CMD, propertyTokens);
636 bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
638 // filter object parsing variables
639 Json::Value null(Json::nullValue);
640 Json::Value propertyValue;
643 map<string, string> propertyTokens;
645 // iterate over known properties
646 vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
647 vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
648 for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
649 const string& propertyName = (*propertyNameIter);
651 // if property defined in filter, add to token list
652 propertyValue = filterObject.get(propertyName, null);
653 if ( propertyValue != null )
654 propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
657 // add this filter to engin
658 m_filterEngine.addFilter(filterName);
660 // add token list to this filter
661 return AddPropertyTokensToFilter(filterName, propertyTokens);
664 bool FilterTool::FilterToolPrivate::ParseScript(void) {
666 // read in script contents from file
667 const string document = GetScriptContents();
669 // set up JsonCPP reader and attempt to parse script
672 if ( !reader.parse(document, root) ) {
673 // use built-in error reporting mechanism to alert user what was wrong with the script
674 cerr << "Failed to parse configuration\n" << reader.getFormatedErrorMessages();
678 // initialize return status
681 // see if root object contains multiple filters
682 const Json::Value filters = root["filters"];
683 if ( !filters.isNull() ) {
685 // iterate over any filters found
687 Json::Value::const_iterator filtersIter = filters.begin();
688 Json::Value::const_iterator filtersEnd = filters.end();
689 for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
690 Json::Value filter = (*filtersIter);
692 // convert filter index to string
695 // if id tag supplied
696 const Json::Value id = filter["id"];
698 filterName = id.asString();
702 stringstream convert;
703 convert << filterIndex;
704 filterName = convert.str();
707 // create & parse filter
708 success &= ParseFilterObject(filterName, filter);
711 // see if user defined a "rule" for these filters
712 // otherwise, use filter engine's default rule behavior
713 string ruleString("");
714 const Json::Value rule = root["rule"];
715 if ( rule.isString() )
716 ruleString = rule.asString();
717 m_filterEngine.setRule(ruleString);
719 // return success/fail
723 // otherwise, root is the only filter (just contains properties)
724 // create & parse filter named "ROOT"
725 else success = ParseFilterObject("ROOT", root);
727 // return success/failure
731 bool FilterTool::FilterToolPrivate::Run(void) {
733 // set to default input if none provided
734 if ( !m_settings->HasInputBamFilename )
735 m_settings->InputFiles.push_back(Options::StandardIn());
737 // initialize defined properties & user-specified filters
739 if ( !SetupFilters() ) return 1;
741 // open reader without index
742 BamMultiReader reader;
743 if ( !reader.Open(m_settings->InputFiles, false, true) ) {
744 cerr << "Could not open input files for reading." << endl;
747 const string headerText = reader.GetHeaderText();
748 filterToolReferences = reader.GetReferenceData();
752 bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
753 if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences, writeUncompressed) ) {
754 cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl;
760 // if no region specified, filter entire file
761 if ( !m_settings->HasRegion ) {
762 while ( reader.GetNextAlignment(al) ) {
763 if ( CheckAlignment(al) )
764 writer.SaveAlignment(al);
768 // otherwise attempt to use region as constraint
771 // if region string parses OK
773 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
775 // attempt to re-open reader with index files
777 bool openedOK = reader.Open(m_settings->InputFiles, true, true );
781 cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl;
785 // if index data available, we can use SetRegion
786 if ( reader.IsIndexLoaded() ) {
788 // attempt to use SetRegion(), if failed report error
789 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
790 cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl;
795 // everything checks out, just iterate through specified region, filtering alignments
796 while ( reader.GetNextAlignmentCore(al) )
797 if ( CheckAlignment(al) )
798 writer.SaveAlignment(al);
801 // no index data available, we have to iterate through until we
802 // find overlapping alignments
804 while ( reader.GetNextAlignmentCore(al) ) {
805 if ( (al.RefID >= region.LeftRefID) && ((al.Position + al.Length) >= region.LeftPosition) &&
806 (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
808 if ( CheckAlignment(al) )
809 writer.SaveAlignment(al);
815 // error parsing REGION string
817 cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl;
818 cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl;
830 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
832 // add known properties to FilterEngine<BamAlignmentChecker>
835 // parse script for filter rules, if given
836 if ( m_settings->HasScriptFilename ) return ParseScript();
838 // otherwise check command line for filters
839 else return ParseCommandLine();