]> git.donarmstrong.com Git - bamtools.git/blob - src/toolkit/bamtools_count.cpp
7244f241361cbba1603f3f1d93b0b1732dc1e9d4
[bamtools.git] / src / toolkit / bamtools_count.cpp
1 // ***************************************************************************
2 // bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 7 April 2011
6 // ---------------------------------------------------------------------------
7 // Prints alignment count for BAM file(s)
8 // ***************************************************************************
9
10 #include "bamtools_count.h"
11
12 #include <api/BamAlgorithms.h>
13 #include <api/BamMultiReader.h>
14 #include <utils/bamtools_options.h>
15 #include <utils/bamtools_utilities.h>
16 using namespace BamTools;
17
18 #include <iostream>
19 #include <string>
20 #include <vector>
21 using namespace std;
22
23 // ---------------------------------------------  
24 // CountSettings implementation
25
26 struct CountTool::CountSettings {
27
28     // flags
29     bool HasInput;
30     bool HasRegion;
31
32     // filenames
33     vector<string> InputFiles;
34     string Region;
35     
36     // constructor
37     CountSettings(void)
38         : HasInput(false)
39         , HasRegion(false)
40     { }  
41 }; 
42   
43 // ---------------------------------------------
44 // CountToolPrivate implementation
45
46 struct CountTool::CountToolPrivate {
47
48     // ctor & dtro
49     public:
50         CountToolPrivate(CountTool::CountSettings* settings)
51             : m_settings(settings)
52         { }
53
54         ~CountToolPrivate(void) { }
55
56     // interface
57     public:
58         bool Run(void);
59
60     // data members
61     private:
62         CountTool::CountSettings* m_settings;
63 };
64
65 static
66 void printAlignments(const vector<BamAlignment>& alignments) {
67
68     vector<BamAlignment>::const_iterator alIter = alignments.begin();
69     vector<BamAlignment>::const_iterator alEnd  = alignments.end();
70     for ( ; alIter != alEnd; ++alIter ) {
71         const BamAlignment& a = (*alIter);
72
73         cerr << a.Name
74              << "\t" << a.RefID << ":" << a.Position;
75
76         int aqValue;
77         bool hasTag = a.GetTag("Aq", aqValue);
78         cerr << "\tAq=";
79         if ( hasTag ) cerr << aqValue;
80         else cerr << "?";
81         cerr << endl;
82     }
83 }
84
85 bool CountTool::CountToolPrivate::Run(void) {
86
87     // if no '-in' args supplied, default to stdin
88     if ( !m_settings->HasInput )
89         m_settings->InputFiles.push_back(Options::StandardIn());
90
91     // open reader without index
92     BamMultiReader reader;
93     if ( !reader.Open(m_settings->InputFiles) ) {
94         cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl;
95         return false;
96     }
97
98     // alignment counter
99     BamAlignment al;
100     int alignmentCount(0);
101
102     // if no region specified, count entire file
103     if ( !m_settings->HasRegion ) {
104
105
106         vector<BamAlignment> alignments;
107         while ( reader.GetNextAlignment(al) ) {
108             ++alignmentCount;
109
110             if ( alignments.size() < 100 )
111                 alignments.push_back(al);
112         }
113
114         using namespace BamTools::Algorithms;
115
116         cerr << endl
117              << "------------------------------" << endl
118              << "Unsorted Alignments" << endl
119              << "------------------------------" << endl
120              << endl;
121         std::stable_sort(alignments.begin(), alignments.end(), Sort::Unsorted());
122         printAlignments(alignments);
123         cerr << "------------------------------" << endl
124              << endl;
125
126         cerr << endl
127              << "------------------------------" << endl
128              << "Sorted Alignments (by name)" << endl
129              << "------------------------------" << endl
130              << endl;
131 //        std::sort(alignments.begin(), alignments.end(), Sort::ByName());
132         Algorithms::SortAlignments(alignments, Sort::ByName());
133         printAlignments(alignments);
134         cerr << endl
135              << "------------------------------" << endl
136              << endl;
137
138         cerr << endl
139              << "------------------------------" << endl
140              << "Sorted Alignments (by tag Aq)" << endl
141              << "------------------------------" << endl
142              << endl;
143 //        std::sort(alignments.begin(), alignments.end(), Sort::ByTag<int>("Aq"));
144         Algorithms::SortAlignments(alignments, Sort::ByTag<int>("Aq"));
145         printAlignments(alignments);
146         cerr << endl
147              << "------------------------------" << endl
148              << endl;
149
150         cerr << endl
151              << "------------------------------" << endl
152              << "Sorted Alignments (by tag Aq) desc" << endl
153              << "------------------------------" << endl
154              << endl;
155         std::sort(alignments.begin(), alignments.end(), Sort::ByTag<int>("Aq", Sort::DescendingOrder));
156         printAlignments(alignments);
157         cerr << endl
158              << "------------------------------" << endl
159              << endl;
160
161
162
163
164 //        // ########################################
165 //        // original
166 //        // ########################################
167 //
168 //        while ( reader.GetNextAlignmentCore(al) )
169 //            ++alignmentCount;
170 //
171 //        //#########################################
172     }
173
174     // otherwise attempt to use region as constraint
175     else {
176
177         // if region string parses OK
178         BamRegion region;
179         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
180
181             // attempt to find index files
182             reader.LocateIndexes();
183
184             // if index data available for all BAM files, we can use SetRegion
185             if ( reader.HasIndexes() ) {
186
187                 vector<BamAlignment> alignments;
188                 using namespace BamTools::Algorithms;
189
190                 alignments = GetSortedRegion(reader, region, Sort::ByName() );
191                 printAlignments(alignments);
192
193                 cerr << "################################" << endl;
194
195                 alignments = GetSortedRegion(reader, region, Sort::ByTag<int>("Aq"));
196                 printAlignments(alignments);
197
198 //                // attempt to set region on reader
199 //                if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
200 //                    cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl;
201 //                    reader.Close();
202 //                    return false;
203 //                }
204
205 //                // everything checks out, just iterate through specified region, counting alignments
206 //                while ( reader.GetNextAlignmentCore(al) )
207 //                    ++alignmentCount;
208             }
209
210             // no index data available, we have to iterate through until we
211             // find overlapping alignments
212             else {
213                 while ( reader.GetNextAlignmentCore(al) ) {
214                     if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
215                           (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
216                     {
217                         ++alignmentCount;
218                     }
219                 }
220             }
221         }
222
223         // error parsing REGION string
224         else {
225             cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl;
226             cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
227                  << endl;
228             reader.Close();
229             return false;
230         }
231     }
232
233     // print results
234     cout << alignmentCount << endl;
235
236     // clean up & exit
237     reader.Close();
238     return true;
239 }
240
241 // ---------------------------------------------
242 // CountTool implementation
243
244 CountTool::CountTool(void) 
245     : AbstractTool()
246     , m_settings(new CountSettings)
247     , m_impl(0)
248
249     // set program details
250     Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
251     
252     // set up options 
253     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
254     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInput,  m_settings->InputFiles, IO_Opts, Options::StandardIn());
255     Options::AddValueOption("-region", "REGION",       "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
256 }
257
258 CountTool::~CountTool(void) { 
259
260     delete m_settings;
261     m_settings = 0;
262
263     delete m_impl;
264     m_impl = 0;
265 }
266
267 int CountTool::Help(void) { 
268     Options::DisplayHelp();
269     return 0;
270
271
272 int CountTool::Run(int argc, char* argv[]) { 
273
274     // parse command line arguments
275     Options::Parse(argc, argv, 1);
276
277     // initialize CountTool with settings
278     m_impl = new CountToolPrivate(m_settings);
279
280     // run CountTool, return success/fail
281     if ( m_impl->Run() )
282         return 0;
283     else
284         return 1;
285 }