1 // ***************************************************************************
2 // bamtools_utilities.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 30 August 2010
7 // ---------------------------------------------------------------------------
8 // Provides general utilities used by BamTools sub-tools.
9 // ***************************************************************************
13 #include "bamtools_utilities.h"
14 #include "BamReader.h"
15 #include "BamMultiReader.h"
17 using namespace BamTools;
19 // check if a file exists
20 bool Utilities::FileExists(const std::string& filename) {
22 return stat(filename.c_str(), &fileInfo) == 0;
25 // Parses a region string, does validation (valid ID's, positions), stores in Region struct
26 // Returns success (true/false)
27 bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, BamRegion& region) {
29 // -------------------------------
30 // parse region string
32 // check first for empty string
33 if ( regionString.empty() )
36 // non-empty string, look for a colom
37 size_t foundFirstColon = regionString.find(':');
39 // store chrom strings, and numeric positions
46 // going to use entire contents of requested chromosome
47 // just store entire region string as startChrom name
48 // use BamReader methods to check if its valid for current BAM file
49 if ( foundFirstColon == string::npos ) {
50 startChrom = regionString;
52 stopChrom = regionString;
56 // colon found, so we at least have some sort of startPos requested
59 // store start chrom from beginning to first colon
60 startChrom = regionString.substr(0,foundFirstColon);
62 // look for ".." after the colon
63 size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
66 // so we have a startPos but no range
67 // store contents before colon as startChrom, after as startPos
68 if ( foundRangeDots == string::npos ) {
69 startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
70 stopChrom = startChrom;
74 // ".." found, so we have some sort of range selected
77 // store startPos between first colon and range dots ".."
78 startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
80 // look for second colon
81 size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
83 // no second colon found
84 // so we have a "standard" chrom:start..stop input format (on single chrom)
85 if ( foundSecondColon == string::npos ) {
86 stopChrom = startChrom;
87 stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
91 // so we have a range requested across 2 chrom's
93 stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
94 stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
99 // -------------------------------
100 // validate reference IDs & genomic positions
102 const RefVector references = reader.GetReferenceData();
104 // if startRefID not found, return false
105 int startRefID = reader.GetReferenceID(startChrom);
106 if ( startRefID == (int)references.size() ) return false;
108 // if startPos is larger than reference, return false
109 const RefData& startReference = references.at(startRefID);
110 if ( startPos > startReference.RefLength ) return false;
112 // if stopRefID not found, return false
113 int stopRefID = reader.GetReferenceID(stopChrom);
114 if ( stopRefID == (int)references.size() ) return false;
116 // if stopPosition larger than reference, return false
117 const RefData& stopReference = references.at(stopRefID);
118 if ( stopPos > stopReference.RefLength ) return false;
120 // if no stopPosition specified, set to reference end
121 if ( stopPos == -1 ) stopPos = stopReference.RefLength;
123 // -------------------------------
124 // set up Region struct & return
126 region.LeftRefID = startRefID;
127 region.LeftPosition = startPos;
128 region.RightRefID = stopRefID;;
129 region.RightPosition = stopPos;
133 // Same as ParseRegionString() above, but accepts a BamMultiReader
134 bool Utilities::ParseRegionString(const std::string& regionString, const BamMultiReader& reader, BamRegion& region) {
136 // -------------------------------
137 // parse region string
139 // check first for empty string
140 if ( regionString.empty() )
143 // non-empty string, look for a colom
144 size_t foundFirstColon = regionString.find(':');
146 // store chrom strings, and numeric positions
153 // going to use entire contents of requested chromosome
154 // just store entire region string as startChrom name
155 // use BamReader methods to check if its valid for current BAM file
156 if ( foundFirstColon == string::npos ) {
157 startChrom = regionString;
159 stopChrom = regionString;
163 // colon found, so we at least have some sort of startPos requested
166 // store start chrom from beginning to first colon
167 startChrom = regionString.substr(0,foundFirstColon);
169 // look for ".." after the colon
170 size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
173 // so we have a startPos but no range
174 // store contents before colon as startChrom, after as startPos
175 if ( foundRangeDots == string::npos ) {
176 startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
177 stopChrom = startChrom;
181 // ".." found, so we have some sort of range selected
184 // store startPos between first colon and range dots ".."
185 startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
187 // look for second colon
188 size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
190 // no second colon found
191 // so we have a "standard" chrom:start..stop input format (on single chrom)
192 if ( foundSecondColon == string::npos ) {
193 stopChrom = startChrom;
194 stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
197 // second colon found
198 // so we have a range requested across 2 chrom's
200 stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
201 stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
206 // -------------------------------
207 // validate reference IDs & genomic positions
209 const RefVector references = reader.GetReferenceData();
211 // if startRefID not found, return false
212 int startRefID = reader.GetReferenceID(startChrom);
213 if ( startRefID == (int)references.size() ) return false;
215 // if startPos is larger than reference, return false
216 const RefData& startReference = references.at(startRefID);
217 if ( startPos > startReference.RefLength ) return false;
219 // if stopRefID not found, return false
220 int stopRefID = reader.GetReferenceID(stopChrom);
221 if ( stopRefID == (int)references.size() ) return false;
223 // if stopPosition larger than reference, return false
224 const RefData& stopReference = references.at(stopRefID);
225 if ( stopPos > stopReference.RefLength ) return false;
227 // if no stopPosition specified, set to reference end
228 if ( stopPos == -1 ) stopPos = stopReference.RefLength;
230 // -------------------------------
231 // set up Region struct & return
233 region.LeftRefID = startRefID;
234 region.LeftPosition = startPos;
235 region.RightRefID = stopRefID;;
236 region.RightPosition = stopPos;