]> git.donarmstrong.com Git - bamtools.git/blob - BamReaderMain.cpp
5426763a76c20b9fb03ba95a36f135f35284a62a
[bamtools.git] / BamReaderMain.cpp
1 // BamReaderMain.cpp\r
2 \r
3 // Derek Barnett\r
4 // Marth Lab, Boston College\r
5 // Last modified: 6 April 2009\r
6 \r
7 #include "BamReader.h"\r
8 \r
9 // C++ includes\r
10 #include <iostream>\r
11 using std::cerr;\r
12 using std::cout;\r
13 using std::endl;\r
14 \r
15 #include <vector>\r
16 using std::vector;\r
17 \r
18 #include <string>\r
19 using std::string;\r
20 \r
21 int main(int argc, char* argv[]) {\r
22 \r
23         // check command line parameters\r
24         if (argc != 3) {\r
25                 cerr << endl;\r
26                 cerr << "Usage: BamReaderTest <bamFile> <bamIndexFile>" << endl;\r
27                 cerr << endl;\r
28                 exit(1);\r
29         }\r
30 \r
31         // get filenames from command line\r
32         const char* bamFilename = argv[1];\r
33         const char* bamIndexFilename = argv[2];\r
34 \r
35         string refName;\r
36         int refID;\r
37 \r
38         int alignmentCount;\r
39 \r
40         vector<string> refNames;\r
41 \r
42         BamAlignment bAlignment;\r
43         BamAlignmentVector alignments;\r
44 \r
45         RefVector references;\r
46 \r
47         // ------------------------------------------------------------------------------------------------------ //\r
48         // Declare BamReader & open BAM file - automatically loads up header and index file (.bai) information\r
49         // ------------------------------------------------------------------------------------------------------ //\r
50         \r
51         BamReader bReader(bamFilename, bamIndexFilename);\r
52 \r
53         cerr << endl;\r
54         cerr << "Opening BamReader for BAM file: " << bamFilename << " ..... ";\r
55         \r
56         if ( bReader.Open() ) { \r
57                 cerr << "ok" << endl; \r
58         }\r
59         else {\r
60                 cerr << "error" << endl;\r
61                 exit(1);\r
62         }\r
63         \r
64         // ------------------------------------------------------------ //\r
65         // Find out how many reference sequences are in BAM file\r
66         // ------------------------------------------------------------ //\r
67         \r
68         references = bReader.GetReferenceData();\r
69 \r
70         cerr << endl;\r
71         cerr << "Total number of ref seqs: " << references.size() << endl;\r
72         \r
73         // ---------------------------------------------------------------------------- //\r
74         // Get the names/lengths of all the reference sequences that have alignments\r
75         // ---------------------------------------------------------------------------- //\r
76         \r
77         cerr << endl;\r
78         cerr << "All ref sequences with alignments:" << endl;\r
79         cerr << endl;\r
80 \r
81 \r
82         if ( !references.empty() ) {\r
83                 \r
84                 RefVector::iterator refIter = references.begin();\r
85                 RefVector::iterator refEnd  = references.end();\r
86                 \r
87                 refID = 0;\r
88                 // iterate over reference names, print to STDERR\r
89                 for( ; refIter != refEnd; ++refIter) {\r
90 \r
91                         if ( (*refIter).RefHasAlignments ) {\r
92                                 cerr << "ID:     " << refID << endl;\r
93                                 cerr << "Name:   " << (*refIter).RefName << endl;\r
94                                 cerr << "Length: " << (*refIter).RefLength << endl;\r
95                                 cerr << endl;\r
96                         }\r
97 \r
98                         ++refID;\r
99                 }\r
100         }\r
101 \r
102         // --------------------------------------------------------------------------------------------- //\r
103         // Get the SAM-style header text, if available (not available in the example file shown here)\r
104         // --------------------------------------------------------------------------------------------- //     \r
105         \r
106         cerr << "------------------------------------" << endl;\r
107         cerr << "SAM header text: " << endl;\r
108         \r
109         // get (SAM) header text\r
110         string header = bReader.GetHeaderText();\r
111         \r
112         cerr << ( (header.empty()) ? "no header data" : header ) << endl;\r
113         cerr << "------------------------------------" << endl;\r
114 \r
115         // --------------------------------------------------------------------------------------------- //\r
116         // Here we start accessing alignments\r
117         // The first method shows how to iterate over all reads in a BamFile\r
118         // This method will work on any BAM file (sorted/non-sorted, with/without an index)\r
119         // --------------------------------------------------------------------------------------------- //\r
120 \r
121         // Call Rewind() to make sure you're at the 1st alignment in the file\r
122         // Please note, however, it's not necessary in this case, since file pointer initially set to 1st alignment\r
123         // but this is probably a good habit to develop to ensure correctness and make your intent obvious in the code\r
124 \r
125         cerr << "Getting (up to) first 1000 alignments" << endl;\r
126 \r
127         // start at 1st alignment\r
128         if ( bReader.Rewind() ) {\r
129                 \r
130                 alignmentCount = 0;\r
131                 while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {\r
132                 \r
133                         // disregard unmapped alignments\r
134                         if ( bAlignment.IsMapped() ) {\r
135 \r
136                                 ++alignmentCount;\r
137                                 \r
138                                 cout << "----------------------------" << endl;\r
139                                 cout << "Alignment " << alignmentCount << endl;\r
140                                 cout << bAlignment.Name << endl;\r
141                                 cout << bAlignment.AlignedBases << endl;\r
142                                 cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
143                                 \r
144                                 cout << "Cigar Data: " << endl;\r
145 \r
146                                 vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();\r
147                                 vector<CigarOp>::const_iterator cigarEnd  = bAlignment.CigarData.end();\r
148                                 for ( ; cigarIter != cigarEnd; ++cigarIter ) {\r
149                                         cout << "Type: " << (*cigarIter).Type << "\tLength: " << (*cigarIter).Length << endl;\r
150                                 }\r
151 \r
152                                 if(!bAlignment.TagData.empty()) {\r
153                                   cout << "Tag data is present." << endl;\r
154                                   string readGroup;\r
155                                   if(bAlignment.GetReadGroup(readGroup)) {\r
156                                     cout << "- read group: " << readGroup << endl;\r
157                                   }\r
158                                 }\r
159                         }\r
160                 }\r
161 \r
162                 cerr << "Found " << alignmentCount << " alignments." << endl;\r
163         } else { cerr << "Could not rewind" << endl; }\r
164 \r
165         // ---------------------------------------------------------------------------------------------------------- //\r
166         // You can iterate over each individual alignment that overlaps a specified region\r
167         // Set the refID & left boundary parameters using Jump(refID, left)\r
168         // Jump() actually positions the file pointer actually before the left boundary\r
169         // This ensures that reads beginning before, but overlapping 'left' are included as well \r
170         // Client is responsible for setting/checking right boundary - see below\r
171         // ---------------------------------------------------------------------------------------------------------- //\r
172 /*\r
173         cerr << "Jumping to region" << endl;\r
174 \r
175         // get refID using a known refName\r
176         refName = "seq1";\r
177         refID = bReader.GetRefID(refName);\r
178         if (refID == (int)references.size()) { \r
179                 cerr << "Reference " << refName << " not found." << endl;\r
180                 exit(1);\r
181         }\r
182 \r
183         // set left boundary\r
184         unsigned int leftBound = 500;\r
185 \r
186         // set right boundary - either user-specified number to set a discrete region\r
187         //                      OR you can query the BamReader for the end of the reference\r
188         unsigned int rightBound = references.at(refID).RefLength;\r
189 \r
190         cerr << endl;\r
191         cerr << "Iterating over alignments on reference: " << refName << " from " << leftBound << " to ref end (" << rightBound << ")" << endl;\r
192 \r
193         // set region - specific region on reference\r
194         if ( bReader.Jump(refID, leftBound) ) { \r
195 \r
196                 alignmentCount = 0;\r
197                 while ( bReader.GetNextAlignment(bAlignment) && (bAlignment.Position <= rightBound) ) {\r
198                 \r
199                         if ( bAlignment.IsMapped() ) {\r
200 \r
201                                 ++alignmentCount;\r
202                         \r
203                                 if ( (alignmentCount % 100000) == 0) { cerr << "Retrieved " << alignmentCount << " so far..." << endl; }\r
204                         \r
205                                 cout << "----------------------------" << endl;\r
206                                 cout << "Alignment " << alignmentCount << endl;\r
207                                 cout << bAlignment.Name << endl;\r
208                                 cout << bAlignment.AlignedBases << endl;\r
209                                 cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
210                         }\r
211                 }\r
212 \r
213                 cerr << "Found " << alignmentCount << " alignments." << endl; \r
214         } else { cerr << "Could not jump to region specified" << endl; }\r
215 \r
216         // ----------------------------------------------------------------------------------------------------- //\r
217         // You can 'rewind' back to beginning of BAM file at any time \r
218         // ----------------------------------------------------------------------------------------------------- //\r
219 \r
220         cerr << endl;\r
221         cerr << "Rewinding BAM file... then getting first 1000 alignments" << endl;\r
222 \r
223         alignmentCount = 0;\r
224         if (bReader.Rewind() ) {\r
225                 while ( bReader.GetNextAlignment(bAlignment) && (alignmentCount < 1000) ) {\r
226                 \r
227                         // disregard unmapped alignments\r
228                         if ( bAlignment.IsMapped() ) {\r
229 \r
230                                 ++alignmentCount;\r
231                         \r
232                                 cout << "----------------------------" << endl;\r
233                                 cout << "Alignment " << alignmentCount << endl;\r
234                                 cout << bAlignment.Name << endl;\r
235                                 cout << bAlignment.AlignedBases << endl;\r
236                                 cout << "Aligned to " << references.at(bAlignment.RefID).RefName << ":" << bAlignment.Position << endl;\r
237                         }\r
238                 }\r
239 \r
240                 cerr << "Found " << alignmentCount << " alignments." << endl;\r
241         } else { cerr << "Could not rewind" << endl; }\r
242 */\r
243         // ---------------------------------------------------------------------- //\r
244         // Close BamReader object (releases internal header/index data) and exit\r
245         // ---------------------------------------------------------------------- //\r
246         \r
247         cerr << endl;\r
248         cerr << "Closing BAM file: " << bamFilename << endl;\r
249         \r
250         bReader.Close();\r
251 \r
252         cerr << "Exiting..." << endl << endl;\r
253         return 0;\r
254 }\r