1 // ***************************************************************************
2 // bamtools_utilities.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 2 June 2010
7 // ---------------------------------------------------------------------------
8 // Provides general utilities used by BamTools sub-tools.
9 // ***************************************************************************
12 #include "bamtools_utilities.h"
13 #include "BamReader.h"
16 using namespace BamTools;
18 // Parses a region string, does validation (valid ID's, positions), stores in Region struct
19 bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, Region& region) {
21 // -------------------------------
22 // parse region string
24 // check first for empty string
25 if ( regionString.empty() )
28 // non-empty string, look for a colom
29 size_t foundFirstColon = regionString.find(':');
31 // store chrom strings, and numeric positions
38 // going to use entire contents of requested chromosome
39 // just store entire region string as startChrom name
40 // use BamReader methods to check if its valid for current BAM file
41 if ( foundFirstColon == string::npos ) {
42 startChrom = regionString;
44 stopChrom = regionString;
48 // colon found, so we at least have some sort of startPos requested
51 // store start chrom from beginning to first colon
52 startChrom = regionString.substr(0,foundFirstColon);
54 // look for ".." after the colon
55 size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
58 // so we have a startPos but no range
59 // store contents before colon as startChrom, after as startPos
60 if ( foundRangeDots == string::npos ) {
61 startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
62 stopChrom = startChrom;
66 // ".." found, so we have some sort of range selected
69 // store startPos between first colon and range dots ".."
70 startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
72 // look for second colon
73 size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
75 // no second colon found
76 // so we have a "standard" chrom:start..stop input format (on single chrom)
77 if ( foundSecondColon == string::npos ) {
78 stopChrom = startChrom;
79 stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
83 // so we have a range requested across 2 chrom's
85 stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
86 stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
91 // -------------------------------
92 // validate reference IDs & genomic positions
94 const RefVector references = reader.GetReferenceData();
96 // if startRefID not found, return false
97 int startRefID = reader.GetReferenceID(startChrom);
98 if ( startRefID == (int)references.size() ) return false;
100 // if startPos is larger than reference, return false
101 const RefData& startReference = references.at(startRefID);
102 if ( startPos > startReference.RefLength ) return false;
104 // if stopRefID not found, return false
105 int stopRefID = reader.GetReferenceID(stopChrom);
106 if ( stopRefID == (int)references.size() ) return false;
108 // if stopPosition larger than reference, return false
109 const RefData& stopReference = references.at(stopRefID);
110 if ( stopPos > stopReference.RefLength ) return false;
112 // if no stopPosition specified, set to reference end
113 if ( stopPos == -1 )stopPos = stopReference.RefLength;
115 // -------------------------------
116 // set up Region struct & return
118 region.StartChromID = startRefID;
119 region.StopChromID = stopRefID;
120 region.StartPosition = startPos;
121 region.StopPosition = stopPos;