1 // ***************************************************************************
2 // bamtools_filter.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 7 April 2011
7 // ---------------------------------------------------------------------------
8 // Filters BAM file(s) according to some user-specified criteria
9 // ***************************************************************************
11 #include "bamtools_filter.h"
13 #include <api/BamMultiReader.h>
14 #include <api/BamWriter.h>
15 #include <utils/bamtools_filter_engine.h>
16 #include <utils/bamtools_options.h>
17 #include <utils/bamtools_utilities.h>
18 using namespace BamTools;
20 #include <jsoncpp/json.h>
32 // -------------------------------
33 // string literal constants
36 const string ALIGNMENTFLAG_PROPERTY = "alignmentFlag";
37 const string CIGAR_PROPERTY = "cigar";
38 const string INSERTSIZE_PROPERTY = "insertSize";
39 const string ISDUPLICATE_PROPERTY = "isDuplicate";
40 const string ISFAILEDQC_PROPERTY = "isFailedQC";
41 const string ISFIRSTMATE_PROPERTY = "isFirstMate";
42 const string ISMAPPED_PROPERTY = "isMapped";
43 const string ISMATEMAPPED_PROPERTY = "isMateMapped";
44 const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand";
45 const string ISPAIRED_PROPERTY = "isPaired";
46 const string ISPRIMARYALIGNMENT_PROPERTY = "isPrimaryAlignment";
47 const string ISPROPERPAIR_PROPERTY = "isProperPair";
48 const string ISREVERSESTRAND_PROPERTY = "isReverseStrand";
49 const string ISSECONDMATE_PROPERTY = "isSecondMate";
50 const string MAPQUALITY_PROPERTY = "mapQuality";
51 const string MATEPOSITION_PROPERTY = "matePosition";
52 const string MATEREFERENCE_PROPERTY = "mateReference";
53 const string NAME_PROPERTY = "name";
54 const string POSITION_PROPERTY = "position";
55 const string QUERYBASES_PROPERTY = "queryBases";
56 const string REFERENCE_PROPERTY = "reference";
57 const string TAG_PROPERTY = "tag";
60 const string TRUE_STR = "true";
61 const string FALSE_STR = "false";
63 RefVector filterToolReferences;
65 struct BamAlignmentChecker {
66 bool check(const PropertyFilter& filter, const BamAlignment& al) {
68 bool keepAlignment = true;
69 const PropertyMap& properties = filter.Properties;
70 PropertyMap::const_iterator propertyIter = properties.begin();
71 PropertyMap::const_iterator propertyEnd = properties.end();
72 for ( ; propertyIter != propertyEnd; ++propertyIter ) {
74 // check alignment data field depending on propertyName
75 const string& propertyName = (*propertyIter).first;
76 const PropertyFilterValue& valueFilter = (*propertyIter).second;
78 if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= valueFilter.check(al.AlignmentFlag);
79 else if ( propertyName == CIGAR_PROPERTY ) {
81 const vector<CigarOp>& cigarData = al.CigarData;
82 if ( !cigarData.empty() ) {
83 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
84 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
85 vector<CigarOp>::const_iterator cigarEnd = cigarData.end();
86 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
87 const CigarOp& op = (*cigarIter);
88 cigarSs << op.Length << op.Type;
90 keepAlignment &= valueFilter.check(cigarSs.str());
93 else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= valueFilter.check(al.InsertSize);
94 else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsDuplicate());
95 else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFailedQC());
96 else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFirstMate());
97 else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMapped());
98 else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateMapped());
99 else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
100 else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPaired());
101 else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
102 else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= valueFilter.check(al.IsProperPair());
103 else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsReverseStrand());
104 else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsSecondMate());
105 else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality);
106 else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
107 else if ( propertyName == MATEREFERENCE_PROPERTY ) {
108 if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
109 BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
110 const string& refName = filterToolReferences.at(al.MateRefID).RefName;
111 keepAlignment &= valueFilter.check(refName);
113 else if ( propertyName == NAME_PROPERTY ) keepAlignment &= valueFilter.check(al.Name);
114 else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= valueFilter.check(al.Position);
115 else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= valueFilter.check(al.QueryBases);
116 else if ( propertyName == REFERENCE_PROPERTY ) {
117 BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
118 const string& refName = filterToolReferences.at(al.RefID).RefName;
119 keepAlignment &= valueFilter.check(refName);
121 else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
122 else BAMTOOLS_ASSERT_UNREACHABLE;
124 // if alignment fails at ANY point, just quit and return false
125 if ( !keepAlignment ) return false;
128 BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
129 return keepAlignment;
132 bool checkAlignmentTag(const PropertyFilterValue& valueFilter, const BamAlignment& al) {
134 // ensure filter contains string data
135 Variant entireTagFilter = valueFilter.Value;
136 if ( !entireTagFilter.is_type<string>() ) return false;
138 // localize string from variant
139 const string& entireTagFilterString = entireTagFilter.get<string>();
141 // ensure we have at least "XX:x"
142 if ( entireTagFilterString.length() < 4 ) return false;
144 // get tagName & lookup in alignment
145 // if found, set tagType to tag type character
146 // if not found, return false
147 const string& tagName = entireTagFilterString.substr(0,2);
149 if ( !al.GetTagType(tagName, tagType) ) return false;
151 // remove tagName & ":" from beginning tagFilter
152 string tagFilterString = entireTagFilterString.substr(3);
154 // switch on tag type to set tag query value & parse filter token
155 int32_t intFilterValue, intQueryValue;
156 uint32_t uintFilterValue, uintQueryValue;
157 float realFilterValue, realQueryValue;
158 string stringFilterValue, stringQueryValue;
160 PropertyFilterValue tagFilter;
161 PropertyFilterValue::ValueCompareType compareType;
162 bool keepAlignment = false;
165 // signed int tag type
169 if ( al.GetTag(tagName, intQueryValue) ) {
170 if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, intFilterValue, compareType) ) {
171 tagFilter.Value = intFilterValue;
172 tagFilter.Type = compareType;
173 keepAlignment = tagFilter.check(intQueryValue);
178 // unsigned int tag type
182 if ( al.GetTag(tagName, uintQueryValue) ) {
183 if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, uintFilterValue, compareType) ) {
184 tagFilter.Value = uintFilterValue;
185 tagFilter.Type = compareType;
186 keepAlignment = tagFilter.check(uintQueryValue);
193 if ( al.GetTag(tagName, realQueryValue) ) {
194 if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, realFilterValue, compareType) ) {
195 tagFilter.Value = realFilterValue;
196 tagFilter.Type = compareType;
197 keepAlignment = tagFilter.check(realQueryValue);
206 if ( al.GetTag(tagName, stringQueryValue) ) {
207 if ( FilterEngine<BamAlignmentChecker>::parseToken(tagFilterString, stringFilterValue, compareType) ) {
208 tagFilter.Value = stringFilterValue;
209 tagFilter.Type = compareType;
210 keepAlignment = tagFilter.check(stringQueryValue);
217 keepAlignment = false;
220 return keepAlignment;
224 } // namespace BamTools
226 // ---------------------------------------------
227 // FilterSettings implementation
229 struct FilterTool::FilterSettings {
231 // ----------------------------------
235 bool HasInputBamFilename;
236 bool HasOutputBamFilename;
238 bool HasScriptFilename;
239 bool IsForceCompression;
242 vector<string> InputFiles;
243 string OutputFilename;
245 string ScriptFilename;
247 // -----------------------------------
248 // General filter opts
251 bool HasAlignmentFlagFilter;
252 bool HasInsertSizeFilter;
253 bool HasMapQualityFilter;
255 bool HasQueryBasesFilter;
256 bool HasTagFilter; //(s)
259 string AlignmentFlagFilter;
260 string InsertSizeFilter;
262 string MapQualityFilter;
263 string QueryBasesFilter;
264 string TagFilter; // support multiple ?
266 // -----------------------------------
267 // AlignmentFlag filter opts
270 bool HasIsDuplicateFilter;
271 bool HasIsFailedQCFilter;
272 bool HasIsFirstMateFilter;
273 bool HasIsMappedFilter;
274 bool HasIsMateMappedFilter;
275 bool HasIsMateReverseStrandFilter;
276 bool HasIsPairedFilter;
277 bool HasIsPrimaryAlignmentFilter;
278 bool HasIsProperPairFilter;
279 bool HasIsReverseStrandFilter;
280 bool HasIsSecondMateFilter;
283 string IsDuplicateFilter;
284 string IsFailedQCFilter;
285 string IsFirstMateFilter;
286 string IsMappedFilter;
287 string IsMateMappedFilter;
288 string IsMateReverseStrandFilter;
289 string IsPairedFilter;
290 string IsPrimaryAlignmentFilter;
291 string IsProperPairFilter;
292 string IsReverseStrandFilter;
293 string IsSecondMateFilter;
295 // ---------------------------------
299 : HasInputBamFilename(false)
300 , HasOutputBamFilename(false)
302 , HasScriptFilename(false)
303 , IsForceCompression(false)
304 , OutputFilename(Options::StandardOut())
305 , HasAlignmentFlagFilter(false)
306 , HasInsertSizeFilter(false)
307 , HasMapQualityFilter(false)
308 , HasNameFilter(false)
309 , HasQueryBasesFilter(false)
310 , HasTagFilter(false)
311 , HasIsDuplicateFilter(false)
312 , HasIsFailedQCFilter(false)
313 , HasIsFirstMateFilter(false)
314 , HasIsMappedFilter(false)
315 , HasIsMateMappedFilter(false)
316 , HasIsMateReverseStrandFilter(false)
317 , HasIsPairedFilter(false)
318 , HasIsPrimaryAlignmentFilter(false)
319 , HasIsProperPairFilter(false)
320 , HasIsReverseStrandFilter(false)
321 , HasIsSecondMateFilter(false)
322 , IsDuplicateFilter(TRUE_STR)
323 , IsFailedQCFilter(TRUE_STR)
324 , IsFirstMateFilter(TRUE_STR)
325 , IsMappedFilter(TRUE_STR)
326 , IsMateMappedFilter(TRUE_STR)
327 , IsMateReverseStrandFilter(TRUE_STR)
328 , IsPairedFilter(TRUE_STR)
329 , IsPrimaryAlignmentFilter(TRUE_STR)
330 , IsProperPairFilter(TRUE_STR)
331 , IsReverseStrandFilter(TRUE_STR)
332 , IsSecondMateFilter(TRUE_STR)
336 // ---------------------------------------------
337 // FilterToolPrivate declaration
339 class FilterTool::FilterToolPrivate {
343 FilterToolPrivate(FilterTool::FilterSettings* settings);
344 ~FilterToolPrivate(void);
346 // 'public' interface
352 bool AddPropertyTokensToFilter(const string& filterName, const map<string, string>& propertyTokens);
353 bool CheckAlignment(const BamAlignment& al);
354 const string GetScriptContents(void);
355 void InitProperties(void);
356 bool ParseCommandLine(void);
357 bool ParseFilterObject(const string& filterName, const Json::Value& filterObject);
358 bool ParseScript(void);
359 bool SetupFilters(void);
363 vector<string> m_propertyNames;
364 FilterTool::FilterSettings* m_settings;
365 FilterEngine<BamAlignmentChecker> m_filterEngine;
368 // ---------------------------------------------
369 // FilterToolPrivate implementation
372 FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings)
373 : m_settings(settings)
377 FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { }
379 bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName,
381 string>& propertyTokens)
383 // dummy temp values for token parsing
386 uint16_t uint16Value;
387 uint32_t uint32Value;
389 PropertyFilterValue::ValueCompareType type;
391 // iterate over property token map
392 map<string, string>::const_iterator mapIter = propertyTokens.begin();
393 map<string, string>::const_iterator mapEnd = propertyTokens.end();
394 for ( ; mapIter != mapEnd; ++mapIter ) {
396 const string& propertyName = (*mapIter).first;
397 const string& token = (*mapIter).second;
399 // ------------------------------
400 // convert token to value & compare type
401 // then add to filter engine
404 if ( propertyName == ISDUPLICATE_PROPERTY ||
405 propertyName == ISFAILEDQC_PROPERTY ||
406 propertyName == ISFIRSTMATE_PROPERTY ||
407 propertyName == ISMAPPED_PROPERTY ||
408 propertyName == ISMATEMAPPED_PROPERTY ||
409 propertyName == ISMATEREVERSESTRAND_PROPERTY ||
410 propertyName == ISPAIRED_PROPERTY ||
411 propertyName == ISPRIMARYALIGNMENT_PROPERTY ||
412 propertyName == ISPROPERPAIR_PROPERTY ||
413 propertyName == ISREVERSESTRAND_PROPERTY ||
414 propertyName == ISSECONDMATE_PROPERTY
417 FilterEngine<BamAlignmentChecker>::parseToken(token, boolValue, type);
418 m_filterEngine.setProperty(filterName, propertyName, boolValue, type);
421 // int32_t conversion
422 else if ( propertyName == INSERTSIZE_PROPERTY ||
423 propertyName == MATEPOSITION_PROPERTY ||
424 propertyName == POSITION_PROPERTY
427 FilterEngine<BamAlignmentChecker>::parseToken(token, int32Value, type);
428 m_filterEngine.setProperty(filterName, propertyName, int32Value, type);
431 // uint16_t conversion
432 else if ( propertyName == MAPQUALITY_PROPERTY )
434 FilterEngine<BamAlignmentChecker>::parseToken(token, uint16Value, type);
435 m_filterEngine.setProperty(filterName, propertyName, uint16Value, type);
438 // uint32_t conversion
439 else if ( propertyName == ALIGNMENTFLAG_PROPERTY )
441 FilterEngine<BamAlignmentChecker>::parseToken(token, uint32Value, type);
442 m_filterEngine.setProperty(filterName, propertyName, uint32Value, type);
446 else if ( propertyName == CIGAR_PROPERTY ||
447 propertyName == MATEREFERENCE_PROPERTY ||
448 propertyName == NAME_PROPERTY ||
449 propertyName == QUERYBASES_PROPERTY ||
450 propertyName == REFERENCE_PROPERTY
453 FilterEngine<BamAlignmentChecker>::parseToken(token, stringValue, type);
454 m_filterEngine.setProperty(filterName, propertyName, stringValue, type);
457 else if ( propertyName == TAG_PROPERTY ) {
458 // this will be stored directly as the TAG:VALUE token
459 // (VALUE may contain compare ops, will be parsed out later)
460 m_filterEngine.setProperty(filterName, propertyName, token, PropertyFilterValue::EXACT);
463 // else unknown property
465 cerr << "bamtools filter ERROR: unknown property - " << propertyName << endl;
472 bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) {
473 return m_filterEngine.check(al);
476 const string FilterTool::FilterToolPrivate::GetScriptContents(void) {
478 // open script for reading
479 FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb");
481 cerr << "bamtools filter ERROR: could not open script: "
482 << m_settings->ScriptFilename << " for reading" << endl;
486 // read in entire script contents
488 ostringstream docStream("");
491 // peek ahead, make sure there is data available
492 char ch = fgetc(inFile);
494 if( feof(inFile) ) break;
496 // read next block of data
497 if ( fgets(buffer, 1024, inFile) == 0 ) {
498 cerr << "bamtools filter ERROR: could not read script contents" << endl;
508 // import buffer contents to document, return
509 string document = docStream.str();
513 void FilterTool::FilterToolPrivate::InitProperties(void) {
515 // store property names in vector
516 m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY);
517 m_propertyNames.push_back(CIGAR_PROPERTY);
518 m_propertyNames.push_back(INSERTSIZE_PROPERTY);
519 m_propertyNames.push_back(ISDUPLICATE_PROPERTY);
520 m_propertyNames.push_back(ISFAILEDQC_PROPERTY);
521 m_propertyNames.push_back(ISFIRSTMATE_PROPERTY);
522 m_propertyNames.push_back(ISMAPPED_PROPERTY);
523 m_propertyNames.push_back(ISMATEMAPPED_PROPERTY);
524 m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY);
525 m_propertyNames.push_back(ISPAIRED_PROPERTY);
526 m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY);
527 m_propertyNames.push_back(ISPROPERPAIR_PROPERTY);
528 m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY);
529 m_propertyNames.push_back(ISSECONDMATE_PROPERTY);
530 m_propertyNames.push_back(MAPQUALITY_PROPERTY);
531 m_propertyNames.push_back(MATEPOSITION_PROPERTY);
532 m_propertyNames.push_back(MATEREFERENCE_PROPERTY);
533 m_propertyNames.push_back(NAME_PROPERTY);
534 m_propertyNames.push_back(POSITION_PROPERTY);
535 m_propertyNames.push_back(QUERYBASES_PROPERTY);
536 m_propertyNames.push_back(REFERENCE_PROPERTY);
537 m_propertyNames.push_back(TAG_PROPERTY);
539 // add vector contents to FilterEngine<BamAlignmentChecker>
540 vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
541 vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
542 for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter )
543 m_filterEngine.addProperty((*propertyNameIter));
546 bool FilterTool::FilterToolPrivate::ParseCommandLine(void) {
548 // add a rule set to filter engine
549 const string CMD = "COMMAND_LINE";
550 m_filterEngine.addFilter(CMD);
552 // map property names to command line args
553 map<string, string> propertyTokens;
554 if ( m_settings->HasAlignmentFlagFilter ) propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY, m_settings->AlignmentFlagFilter) );
555 if ( m_settings->HasInsertSizeFilter ) propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY, m_settings->InsertSizeFilter) );
556 if ( m_settings->HasIsDuplicateFilter ) propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY, m_settings->IsDuplicateFilter) );
557 if ( m_settings->HasIsFailedQCFilter ) propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY, m_settings->IsFailedQCFilter) );
558 if ( m_settings->HasIsFirstMateFilter ) propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY, m_settings->IsFirstMateFilter) );
559 if ( m_settings->HasIsMappedFilter ) propertyTokens.insert( make_pair(ISMAPPED_PROPERTY, m_settings->IsMappedFilter) );
560 if ( m_settings->HasIsMateMappedFilter ) propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY, m_settings->IsMateMappedFilter) );
561 if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) );
562 if ( m_settings->HasIsPairedFilter ) propertyTokens.insert( make_pair(ISPAIRED_PROPERTY, m_settings->IsPairedFilter) );
563 if ( m_settings->HasIsPrimaryAlignmentFilter ) propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY, m_settings->IsPrimaryAlignmentFilter) );
564 if ( m_settings->HasIsProperPairFilter ) propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY, m_settings->IsProperPairFilter) );
565 if ( m_settings->HasIsReverseStrandFilter ) propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY, m_settings->IsReverseStrandFilter) );
566 if ( m_settings->HasIsSecondMateFilter ) propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY, m_settings->IsSecondMateFilter) );
567 if ( m_settings->HasMapQualityFilter ) propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY, m_settings->MapQualityFilter) );
568 if ( m_settings->HasNameFilter ) propertyTokens.insert( make_pair(NAME_PROPERTY, m_settings->NameFilter) );
569 if ( m_settings->HasQueryBasesFilter ) propertyTokens.insert( make_pair(QUERYBASES_PROPERTY, m_settings->QueryBasesFilter) );
570 if ( m_settings->HasTagFilter ) propertyTokens.insert( make_pair(TAG_PROPERTY, m_settings->TagFilter) );
572 // send add these properties to filter set "COMMAND_LINE"
573 return AddPropertyTokensToFilter(CMD, propertyTokens);
576 bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) {
578 // filter object parsing variables
579 Json::Value null(Json::nullValue);
580 Json::Value propertyValue;
583 map<string, string> propertyTokens;
585 // iterate over known properties
586 vector<string>::const_iterator propertyNameIter = m_propertyNames.begin();
587 vector<string>::const_iterator propertyNameEnd = m_propertyNames.end();
588 for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) {
589 const string& propertyName = (*propertyNameIter);
591 // if property defined in filter, add to token list
592 propertyValue = filterObject.get(propertyName, null);
593 if ( propertyValue != null )
594 propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) );
597 // add this filter to engin
598 m_filterEngine.addFilter(filterName);
600 // add token list to this filter
601 return AddPropertyTokensToFilter(filterName, propertyTokens);
604 bool FilterTool::FilterToolPrivate::ParseScript(void) {
606 // read in script contents from file
607 const string document = GetScriptContents();
609 // set up JsonCPP reader and attempt to parse script
612 if ( !reader.parse(document, root) ) {
613 // use built-in error reporting mechanism to alert user what was wrong with the script
614 cerr << "bamtools filter ERROR: failed to parse script - see error message(s) below" << endl
615 << reader.getFormatedErrorMessages();
619 // initialize return status
622 // see if root object contains multiple filters
623 const Json::Value filters = root["filters"];
624 if ( !filters.isNull() ) {
626 // iterate over any filters found
628 Json::Value::const_iterator filtersIter = filters.begin();
629 Json::Value::const_iterator filtersEnd = filters.end();
630 for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) {
631 Json::Value filter = (*filtersIter);
633 // convert filter index to string
636 // if id tag supplied
637 const Json::Value id = filter["id"];
639 filterName = id.asString();
643 stringstream convert;
644 convert << filterIndex;
645 filterName = convert.str();
648 // create & parse filter
649 success &= ParseFilterObject(filterName, filter);
652 // see if user defined a "rule" for these filters
653 // otherwise, use filter engine's default rule behavior
654 string ruleString("");
655 const Json::Value rule = root["rule"];
656 if ( rule.isString() )
657 ruleString = rule.asString();
658 m_filterEngine.setRule(ruleString);
660 // return success/fail
664 // otherwise, root is the only filter (just contains properties)
665 // create & parse filter named "ROOT"
666 else success = ParseFilterObject("ROOT", root);
668 // return success/failure
672 bool FilterTool::FilterToolPrivate::Run(void) {
674 // set to default input if none provided
675 if ( !m_settings->HasInputBamFilename )
676 m_settings->InputFiles.push_back(Options::StandardIn());
678 // initialize defined properties & user-specified filters
680 if ( !SetupFilters() ) return false;
682 // open reader without index
683 BamMultiReader reader;
684 if ( !reader.Open(m_settings->InputFiles) ) {
685 cerr << "bamtools filter ERROR: could not open input files for reading." << endl;
689 // retrieve reader header & reference data
690 const string headerText = reader.GetHeaderText();
691 filterToolReferences = reader.GetReferenceData();
693 // determine compression mode for BamWriter
694 bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
695 !m_settings->IsForceCompression );
696 BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
697 if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
701 writer.SetCompressionMode(compressionMode);
702 if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences) ) {
703 cerr << "bamtools filter ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl;
708 // if no region specified, filter entire file
710 if ( !m_settings->HasRegion ) {
711 while ( reader.GetNextAlignment(al) ) {
712 if ( CheckAlignment(al) )
713 writer.SaveAlignment(al);
717 // otherwise attempt to use region as constraint
720 // if region string parses OK
722 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
724 // attempt to find index files
725 reader.LocateIndexes();
727 // if index data available for all BAM files, we can use SetRegion
728 if ( reader.HasIndexes() ) {
730 // attempt to use SetRegion(), if failed report error
731 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
732 cerr << "bamtools filter ERROR: set region failed. Check that REGION describes a valid range" << endl;
737 // everything checks out, just iterate through specified region, filtering alignments
738 while ( reader.GetNextAlignment(al) )
739 if ( CheckAlignment(al) )
740 writer.SaveAlignment(al);
743 // no index data available, we have to iterate through until we
744 // find overlapping alignments
746 while ( reader.GetNextAlignment(al) ) {
747 if ( (al.RefID >= region.LeftRefID) && ((al.Position + al.Length) >= region.LeftPosition) &&
748 (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
750 if ( CheckAlignment(al) )
751 writer.SaveAlignment(al);
757 // error parsing REGION string
759 cerr << "bamtools filter ERROR: could not parse REGION: " << m_settings->Region << endl;
760 cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
773 bool FilterTool::FilterToolPrivate::SetupFilters(void) {
775 // set up filter engine with supported properties
778 // parse script for filter rules, if given
779 if ( m_settings->HasScriptFilename )
780 return ParseScript();
782 // otherwise check command line for filters
783 else return ParseCommandLine();
786 // ---------------------------------------------
787 // FilterTool implementation
789 FilterTool::FilterTool(void)
791 , m_settings(new FilterSettings)
794 // set program details
795 Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>] [ [-script <filename] | [filterOptions] ]");
797 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
798 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn());
799 Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
800 Options::AddValueOption("-region", "REGION", "only read data from this genomic region (see documentation for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
801 Options::AddValueOption("-script", "filename", "the filter script file (see documentation for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts);
802 Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
804 OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters");
805 Options::AddValueOption("-alignmentFlag", "int", "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts);
806 Options::AddValueOption("-insertSize", "int", "keep reads with insert size that mathces pattern", "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts);
807 Options::AddValueOption("-mapQuality", "[0-255]", "keep reads with map quality that matches pattern", "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts);
808 Options::AddValueOption("-name", "string", "keep reads with name that matches pattern", "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts);
809 Options::AddValueOption("-queryBases", "string", "keep reads with motif that mathces pattern", "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts);
810 Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair", "", m_settings->HasTagFilter, m_settings->TagFilter, FilterOpts);
812 OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters");
813 Options::AddValueOption("-isDuplicate", "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter, m_settings->IsDuplicateFilter, AlignmentFlagOpts, TRUE_STR);
814 Options::AddValueOption("-isFailedQC", "true/false", "keep only alignments that failed QC?", "", m_settings->HasIsFailedQCFilter, m_settings->IsFailedQCFilter, AlignmentFlagOpts, TRUE_STR);
815 Options::AddValueOption("-isFirstMate", "true/false", "keep only alignments marked as first mate?", "", m_settings->HasIsFirstMateFilter, m_settings->IsFirstMateFilter, AlignmentFlagOpts, TRUE_STR);
816 Options::AddValueOption("-isMapped", "true/false", "keep only alignments that were mapped?", "", m_settings->HasIsMappedFilter, m_settings->IsMappedFilter, AlignmentFlagOpts, TRUE_STR);
817 Options::AddValueOption("-isMateMapped", "true/false", "keep only alignments with mates that mapped", "", m_settings->HasIsMateMappedFilter, m_settings->IsMateMappedFilter, AlignmentFlagOpts, TRUE_STR);
818 Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
819 Options::AddValueOption("-isPaired", "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter, m_settings->IsPairedFilter, AlignmentFlagOpts, TRUE_STR);
820 Options::AddValueOption("-isPrimaryAlignment", "true/false", "keep only alignments marked as primary?", "", m_settings->HasIsPrimaryAlignmentFilter, m_settings->IsPrimaryAlignmentFilter, AlignmentFlagOpts, TRUE_STR);
821 Options::AddValueOption("-isProperPair", "true/false", "keep only alignments that passed PE resolution?", "", m_settings->HasIsProperPairFilter, m_settings->IsProperPairFilter, AlignmentFlagOpts, TRUE_STR);
822 Options::AddValueOption("-isReverseStrand", "true/false", "keep only alignments on reverse strand?", "", m_settings->HasIsReverseStrandFilter, m_settings->IsReverseStrandFilter, AlignmentFlagOpts, TRUE_STR);
823 Options::AddValueOption("-isSecondMate", "true/false", "keep only alignments marked as second mate?", "", m_settings->HasIsSecondMateFilter, m_settings->IsSecondMateFilter, AlignmentFlagOpts, TRUE_STR);
826 FilterTool::~FilterTool(void) {
835 int FilterTool::Help(void) {
836 Options::DisplayHelp();
840 int FilterTool::Run(int argc, char* argv[]) {
842 // parse command line arguments
843 Options::Parse(argc, argv, 1);
845 // initialize FilterTool with settings
846 m_impl = new FilterToolPrivate(m_settings);
848 // run FilterTool, return success/fail