]> git.donarmstrong.com Git - bamtools.git/blob - src/toolkit/bamtools_count.cpp
dfd1fa6bcfa7c42ff84702e224396296cbee76d8
[bamtools.git] / src / toolkit / bamtools_count.cpp
1 // ***************************************************************************
2 // bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 7 April 2011
7 // ---------------------------------------------------------------------------
8 // Prints alignment count for BAM file(s)
9 // ***************************************************************************
10
11 #include "bamtools_count.h"
12
13 #include <api/BamAlgorithms.h>
14 #include <api/BamMultiReader.h>
15 #include <utils/bamtools_options.h>
16 #include <utils/bamtools_utilities.h>
17 using namespace BamTools;
18
19 #include <iostream>
20 #include <string>
21 #include <vector>
22 using namespace std;
23
24 // ---------------------------------------------  
25 // CountSettings implementation
26
27 struct CountTool::CountSettings {
28
29     // flags
30     bool HasInput;
31     bool HasRegion;
32
33     // filenames
34     vector<string> InputFiles;
35     string Region;
36     
37     // constructor
38     CountSettings(void)
39         : HasInput(false)
40         , HasRegion(false)
41     { }  
42 }; 
43   
44 // ---------------------------------------------
45 // CountToolPrivate implementation
46
47 struct CountTool::CountToolPrivate {
48
49     // ctor & dtro
50     public:
51         CountToolPrivate(CountTool::CountSettings* settings)
52             : m_settings(settings)
53         { }
54
55         ~CountToolPrivate(void) { }
56
57     // interface
58     public:
59         bool Run(void);
60
61     // data members
62     private:
63         CountTool::CountSettings* m_settings;
64 };
65
66 static
67 void printAlignments(const vector<BamAlignment>& alignments) {
68
69     vector<BamAlignment>::const_iterator alIter = alignments.begin();
70     vector<BamAlignment>::const_iterator alEnd  = alignments.end();
71     for ( ; alIter != alEnd; ++alIter ) {
72         const BamAlignment& a = (*alIter);
73
74         cerr << a.Name
75              << "\t" << a.RefID << ":" << a.Position;
76
77         int aqValue;
78         bool hasTag = a.GetTag("Aq", aqValue);
79         cerr << "\tAq=";
80         if ( hasTag ) cerr << aqValue;
81         else cerr << "?";
82         cerr << endl;
83     }
84 }
85
86 bool CountTool::CountToolPrivate::Run(void) {
87
88     // if no '-in' args supplied, default to stdin
89     if ( !m_settings->HasInput )
90         m_settings->InputFiles.push_back(Options::StandardIn());
91
92     // open reader without index
93     BamMultiReader reader;
94     if ( !reader.Open(m_settings->InputFiles) ) {
95         cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl;
96         return false;
97     }
98
99     // alignment counter
100     BamAlignment al;
101     int alignmentCount(0);
102
103     // if no region specified, count entire file
104     if ( !m_settings->HasRegion ) {
105
106
107         vector<BamAlignment> alignments;
108         while ( reader.GetNextAlignment(al) ) {
109             ++alignmentCount;
110
111             if ( alignments.size() < 100 )
112                 alignments.push_back(al);
113         }
114
115         cerr << endl
116              << "------------------------------" << endl
117              << "Unsorted Alignments" << endl
118              << "------------------------------" << endl
119              << endl;
120         std::stable_sort(alignments.begin(), alignments.end(), Algorithms::Unsorted());
121         printAlignments(alignments);
122         cerr << "------------------------------" << endl
123              << endl;
124
125         cerr << endl
126              << "------------------------------" << endl
127              << "Sorted Alignments (by name)" << endl
128              << "------------------------------" << endl
129              << endl;
130         std::sort(alignments.begin(), alignments.end(), Algorithms::SortByName<>());
131         printAlignments(alignments);
132         cerr << endl
133              << "------------------------------" << endl
134              << endl;
135
136         cerr << endl
137              << "------------------------------" << endl
138              << "Sorted Alignments (by tag Aq)" << endl
139              << "------------------------------" << endl
140              << endl;
141         std::sort(alignments.begin(), alignments.end(), Algorithms::SortByTag<int>("Aq"));
142         printAlignments(alignments);
143         cerr << endl
144              << "------------------------------" << endl
145              << endl;
146
147         cerr << endl
148              << "------------------------------" << endl
149              << "Sorted Alignments (by tag Aq) desc" << endl
150              << "------------------------------" << endl
151              << endl;
152         std::sort(alignments.begin(), alignments.end(), Algorithms::SortByTag<int, std::greater>("Aq"));
153         printAlignments(alignments);
154         cerr << endl
155              << "------------------------------" << endl
156              << endl;
157
158
159
160
161 //        // ########################################
162 //        // original
163 //        // ########################################
164 //
165 //        while ( reader.GetNextAlignmentCore(al) )
166 //            ++alignmentCount;
167 //
168 //        //#########################################
169     }
170
171     // otherwise attempt to use region as constraint
172     else {
173
174         // if region string parses OK
175         BamRegion region;
176         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
177
178             // attempt to find index files
179             reader.LocateIndexes();
180
181             // if index data available for all BAM files, we can use SetRegion
182             if ( reader.IsIndexLoaded() ) {
183
184                 vector<BamAlignment> alignments;
185
186                 alignments = Algorithms::SortReaderRegion(reader, region, Algorithms::SortByName<>() );
187                 printAlignments(alignments);
188
189                 cerr << "################################" << endl;
190
191                 alignments = Algorithms::SortReaderRegion(reader, region, Algorithms::SortByTag<int>("Aq"));
192                 printAlignments(alignments);
193
194 //                // attempt to set region on reader
195 //                if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
196 //                    cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl;
197 //                    reader.Close();
198 //                    return false;
199 //                }
200
201 //                // everything checks out, just iterate through specified region, counting alignments
202 //                while ( reader.GetNextAlignmentCore(al) )
203 //                    ++alignmentCount;
204             }
205
206             // no index data available, we have to iterate through until we
207             // find overlapping alignments
208             else {
209                 while ( reader.GetNextAlignmentCore(al) ) {
210                     if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
211                           (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
212                     {
213                         ++alignmentCount;
214                     }
215                 }
216             }
217         }
218
219         // error parsing REGION string
220         else {
221             cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl;
222             cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
223                  << endl;
224             reader.Close();
225             return false;
226         }
227     }
228
229     // print results
230     cout << alignmentCount << endl;
231
232     // clean up & exit
233     reader.Close();
234     return true;
235 }
236
237 // ---------------------------------------------
238 // CountTool implementation
239
240 CountTool::CountTool(void) 
241     : AbstractTool()
242     , m_settings(new CountSettings)
243     , m_impl(0)
244
245     // set program details
246     Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
247     
248     // set up options 
249     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
250     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInput,  m_settings->InputFiles, IO_Opts, Options::StandardIn());
251     Options::AddValueOption("-region", "REGION",       "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
252 }
253
254 CountTool::~CountTool(void) { 
255
256     delete m_settings;
257     m_settings = 0;
258
259     delete m_impl;
260     m_impl = 0;
261 }
262
263 int CountTool::Help(void) { 
264     Options::DisplayHelp();
265     return 0;
266
267
268 int CountTool::Run(int argc, char* argv[]) { 
269
270     // parse command line arguments
271     Options::Parse(argc, argv, 1);
272
273     // initialize CountTool with settings
274     m_impl = new CountToolPrivate(m_settings);
275
276     // run CountTool, return success/fail
277     if ( m_impl->Run() )
278         return 0;
279     else
280         return 1;
281 }