1 // ***************************************************************************
2 // bamtools_utilities.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 2 June 2010
7 // ---------------------------------------------------------------------------
8 // Provides general utilities used by BamTools sub-tools.
9 // ***************************************************************************
12 #include "bamtools_utilities.h"
13 #include "BamReader.h"
14 #include "BamMultiReader.h"
17 using namespace BamTools;
19 // Parses a region string, does validation (valid ID's, positions), stores in Region struct
20 // Returns success (true/false)
21 bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, Region& region) {
23 // -------------------------------
24 // parse region string
26 // check first for empty string
27 if ( regionString.empty() )
30 // non-empty string, look for a colom
31 size_t foundFirstColon = regionString.find(':');
33 // store chrom strings, and numeric positions
40 // going to use entire contents of requested chromosome
41 // just store entire region string as startChrom name
42 // use BamReader methods to check if its valid for current BAM file
43 if ( foundFirstColon == string::npos ) {
44 startChrom = regionString;
46 stopChrom = regionString;
50 // colon found, so we at least have some sort of startPos requested
53 // store start chrom from beginning to first colon
54 startChrom = regionString.substr(0,foundFirstColon);
56 // look for ".." after the colon
57 size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
60 // so we have a startPos but no range
61 // store contents before colon as startChrom, after as startPos
62 if ( foundRangeDots == string::npos ) {
63 startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
64 stopChrom = startChrom;
68 // ".." found, so we have some sort of range selected
71 // store startPos between first colon and range dots ".."
72 startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
74 // look for second colon
75 size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
77 // no second colon found
78 // so we have a "standard" chrom:start..stop input format (on single chrom)
79 if ( foundSecondColon == string::npos ) {
80 stopChrom = startChrom;
81 stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
85 // so we have a range requested across 2 chrom's
87 stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
88 stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
93 // -------------------------------
94 // validate reference IDs & genomic positions
96 const RefVector references = reader.GetReferenceData();
98 // if startRefID not found, return false
99 int startRefID = reader.GetReferenceID(startChrom);
100 if ( startRefID == (int)references.size() ) return false;
102 // if startPos is larger than reference, return false
103 const RefData& startReference = references.at(startRefID);
104 if ( startPos > startReference.RefLength ) return false;
106 // if stopRefID not found, return false
107 int stopRefID = reader.GetReferenceID(stopChrom);
108 if ( stopRefID == (int)references.size() ) return false;
110 // if stopPosition larger than reference, return false
111 const RefData& stopReference = references.at(stopRefID);
112 if ( stopPos > stopReference.RefLength ) return false;
114 // if no stopPosition specified, set to reference end
115 if ( stopPos == -1 ) stopPos = stopReference.RefLength;
117 // -------------------------------
118 // set up Region struct & return
120 region.StartChromID = startRefID;
121 region.StopChromID = stopRefID;
122 region.StartPosition = startPos;
123 region.StopPosition = stopPos;
127 // Same as ParseRegionString() above, but accepts a BamMultiReader
128 bool Utilities::ParseRegionString(const std::string& regionString, const BamMultiReader& reader, Region& region) {
130 // -------------------------------
131 // parse region string
133 // check first for empty string
134 if ( regionString.empty() )
137 // non-empty string, look for a colom
138 size_t foundFirstColon = regionString.find(':');
140 // store chrom strings, and numeric positions
147 // going to use entire contents of requested chromosome
148 // just store entire region string as startChrom name
149 // use BamReader methods to check if its valid for current BAM file
150 if ( foundFirstColon == string::npos ) {
151 startChrom = regionString;
153 stopChrom = regionString;
157 // colon found, so we at least have some sort of startPos requested
160 // store start chrom from beginning to first colon
161 startChrom = regionString.substr(0,foundFirstColon);
163 // look for ".." after the colon
164 size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
167 // so we have a startPos but no range
168 // store contents before colon as startChrom, after as startPos
169 if ( foundRangeDots == string::npos ) {
170 startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
171 stopChrom = startChrom;
175 // ".." found, so we have some sort of range selected
178 // store startPos between first colon and range dots ".."
179 startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
181 // look for second colon
182 size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
184 // no second colon found
185 // so we have a "standard" chrom:start..stop input format (on single chrom)
186 if ( foundSecondColon == string::npos ) {
187 stopChrom = startChrom;
188 stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
191 // second colon found
192 // so we have a range requested across 2 chrom's
194 stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
195 stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
200 // -------------------------------
201 // validate reference IDs & genomic positions
203 const RefVector references = reader.GetReferenceData();
205 // if startRefID not found, return false
206 int startRefID = reader.GetReferenceID(startChrom);
207 if ( startRefID == (int)references.size() ) return false;
209 // if startPos is larger than reference, return false
210 const RefData& startReference = references.at(startRefID);
211 if ( startPos > startReference.RefLength ) return false;
213 // if stopRefID not found, return false
214 int stopRefID = reader.GetReferenceID(stopChrom);
215 if ( stopRefID == (int)references.size() ) return false;
217 // if stopPosition larger than reference, return false
218 const RefData& stopReference = references.at(stopRefID);
219 if ( stopPos > stopReference.RefLength ) return false;
221 // if no stopPosition specified, set to reference end
222 if ( stopPos == -1 ) stopPos = stopReference.RefLength;
224 // -------------------------------
225 // set up Region struct & return
227 region.StartChromID = startRefID;
228 region.StopChromID = stopRefID;
229 region.StartPosition = startPos;
230 region.StopPosition = stopPos;