1 // ***************************************************************************
2 // bamtools_utilities.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 3 September 2010
7 // ---------------------------------------------------------------------------
8 // Provides general utilities used by BamTools sub-tools.
9 // ***************************************************************************
14 #include "bamtools_utilities.h"
15 #include "BamReader.h"
16 #include "BamMultiReader.h"
18 using namespace BamTools;
20 // check if a file exists
21 bool Utilities::FileExists(const std::string& filename) {
22 ifstream f(filename.c_str(), ifstream::in);
26 // Parses a region string, does validation (valid ID's, positions), stores in Region struct
27 // Returns success (true/false)
28 bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, BamRegion& region) {
30 // -------------------------------
31 // parse region string
33 // check first for empty string
34 if ( regionString.empty() )
37 // non-empty string, look for a colom
38 size_t foundFirstColon = regionString.find(':');
40 // store chrom strings, and numeric positions
47 // going to use entire contents of requested chromosome
48 // just store entire region string as startChrom name
49 // use BamReader methods to check if its valid for current BAM file
50 if ( foundFirstColon == string::npos ) {
51 startChrom = regionString;
53 stopChrom = regionString;
57 // colon found, so we at least have some sort of startPos requested
60 // store start chrom from beginning to first colon
61 startChrom = regionString.substr(0,foundFirstColon);
63 // look for ".." after the colon
64 size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
67 // so we have a startPos but no range
68 // store contents before colon as startChrom, after as startPos
69 if ( foundRangeDots == string::npos ) {
70 startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
71 stopChrom = startChrom;
75 // ".." found, so we have some sort of range selected
78 // store startPos between first colon and range dots ".."
79 startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
81 // look for second colon
82 size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
84 // no second colon found
85 // so we have a "standard" chrom:start..stop input format (on single chrom)
86 if ( foundSecondColon == string::npos ) {
87 stopChrom = startChrom;
88 stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
92 // so we have a range requested across 2 chrom's
94 stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
95 stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
100 // -------------------------------
101 // validate reference IDs & genomic positions
103 const RefVector references = reader.GetReferenceData();
105 // if startRefID not found, return false
106 int startRefID = reader.GetReferenceID(startChrom);
107 if ( startRefID == (int)references.size() ) return false;
109 // if startPos is larger than reference, return false
110 const RefData& startReference = references.at(startRefID);
111 if ( startPos > startReference.RefLength ) return false;
113 // if stopRefID not found, return false
114 int stopRefID = reader.GetReferenceID(stopChrom);
115 if ( stopRefID == (int)references.size() ) return false;
117 // if stopPosition larger than reference, return false
118 const RefData& stopReference = references.at(stopRefID);
119 if ( stopPos > stopReference.RefLength ) return false;
121 // if no stopPosition specified, set to reference end
122 if ( stopPos == -1 ) stopPos = stopReference.RefLength;
124 // -------------------------------
125 // set up Region struct & return
127 region.LeftRefID = startRefID;
128 region.LeftPosition = startPos;
129 region.RightRefID = stopRefID;;
130 region.RightPosition = stopPos;
134 // Same as ParseRegionString() above, but accepts a BamMultiReader
135 bool Utilities::ParseRegionString(const std::string& regionString, const BamMultiReader& reader, BamRegion& region) {
137 // -------------------------------
138 // parse region string
140 // check first for empty string
141 if ( regionString.empty() )
144 // non-empty string, look for a colom
145 size_t foundFirstColon = regionString.find(':');
147 // store chrom strings, and numeric positions
154 // going to use entire contents of requested chromosome
155 // just store entire region string as startChrom name
156 // use BamReader methods to check if its valid for current BAM file
157 if ( foundFirstColon == string::npos ) {
158 startChrom = regionString;
160 stopChrom = regionString;
164 // colon found, so we at least have some sort of startPos requested
167 // store start chrom from beginning to first colon
168 startChrom = regionString.substr(0,foundFirstColon);
170 // look for ".." after the colon
171 size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
174 // so we have a startPos but no range
175 // store contents before colon as startChrom, after as startPos
176 if ( foundRangeDots == string::npos ) {
177 startPos = atoi( regionString.substr(foundFirstColon+1).c_str() );
178 stopChrom = startChrom;
182 // ".." found, so we have some sort of range selected
185 // store startPos between first colon and range dots ".."
186 startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
188 // look for second colon
189 size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
191 // no second colon found
192 // so we have a "standard" chrom:start..stop input format (on single chrom)
193 if ( foundSecondColon == string::npos ) {
194 stopChrom = startChrom;
195 stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() );
198 // second colon found
199 // so we have a range requested across 2 chrom's
201 stopChrom = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
202 stopPos = atoi( regionString.substr(foundSecondColon+1).c_str() );
207 // -------------------------------
208 // validate reference IDs & genomic positions
210 const RefVector references = reader.GetReferenceData();
212 // if startRefID not found, return false
213 int startRefID = reader.GetReferenceID(startChrom);
214 if ( startRefID == (int)references.size() ) return false;
216 // if startPos is larger than reference, return false
217 const RefData& startReference = references.at(startRefID);
218 if ( startPos > startReference.RefLength ) return false;
220 // if stopRefID not found, return false
221 int stopRefID = reader.GetReferenceID(stopChrom);
222 if ( stopRefID == (int)references.size() ) return false;
224 // if stopPosition larger than reference, return false
225 const RefData& stopReference = references.at(stopRefID);
226 if ( stopPos > stopReference.RefLength ) return false;
228 // if no stopPosition specified, set to reference end
229 if ( stopPos == -1 ) stopPos = stopReference.RefLength;
231 // -------------------------------
232 // set up Region struct & return
234 region.LeftRefID = startRefID;
235 region.LeftPosition = startPos;
236 region.RightRefID = stopRefID;;
237 region.RightPosition = stopPos;