]> git.donarmstrong.com Git - bamtools.git/blob - src/toolkit/bamtools_count.cpp
MultiReader (&MultiMerger) now using Algorithms::Sort objects
[bamtools.git] / src / toolkit / bamtools_count.cpp
1 // ***************************************************************************
2 // bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 7 April 2011
7 // ---------------------------------------------------------------------------
8 // Prints alignment count for BAM file(s)
9 // ***************************************************************************
10
11 #include "bamtools_count.h"
12
13 #include <api/BamAlgorithms.h>
14 #include <api/BamMultiReader.h>
15 #include <utils/bamtools_options.h>
16 #include <utils/bamtools_utilities.h>
17 using namespace BamTools;
18
19 #include <iostream>
20 #include <string>
21 #include <vector>
22 using namespace std;
23
24 // ---------------------------------------------  
25 // CountSettings implementation
26
27 struct CountTool::CountSettings {
28
29     // flags
30     bool HasInput;
31     bool HasRegion;
32
33     // filenames
34     vector<string> InputFiles;
35     string Region;
36     
37     // constructor
38     CountSettings(void)
39         : HasInput(false)
40         , HasRegion(false)
41     { }  
42 }; 
43   
44 // ---------------------------------------------
45 // CountToolPrivate implementation
46
47 struct CountTool::CountToolPrivate {
48
49     // ctor & dtro
50     public:
51         CountToolPrivate(CountTool::CountSettings* settings)
52             : m_settings(settings)
53         { }
54
55         ~CountToolPrivate(void) { }
56
57     // interface
58     public:
59         bool Run(void);
60
61     // data members
62     private:
63         CountTool::CountSettings* m_settings;
64 };
65
66 static
67 void printAlignments(const vector<BamAlignment>& alignments) {
68
69     vector<BamAlignment>::const_iterator alIter = alignments.begin();
70     vector<BamAlignment>::const_iterator alEnd  = alignments.end();
71     for ( ; alIter != alEnd; ++alIter ) {
72         const BamAlignment& a = (*alIter);
73
74         cerr << a.Name
75              << "\t" << a.RefID << ":" << a.Position;
76
77         int aqValue;
78         bool hasTag = a.GetTag("Aq", aqValue);
79         cerr << "\tAq=";
80         if ( hasTag ) cerr << aqValue;
81         else cerr << "?";
82         cerr << endl;
83     }
84 }
85
86 bool CountTool::CountToolPrivate::Run(void) {
87
88     // if no '-in' args supplied, default to stdin
89     if ( !m_settings->HasInput )
90         m_settings->InputFiles.push_back(Options::StandardIn());
91
92     // open reader without index
93     BamMultiReader reader;
94     if ( !reader.Open(m_settings->InputFiles) ) {
95         cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl;
96         return false;
97     }
98
99     // alignment counter
100     BamAlignment al;
101     int alignmentCount(0);
102
103     // if no region specified, count entire file
104     if ( !m_settings->HasRegion ) {
105
106
107         vector<BamAlignment> alignments;
108         while ( reader.GetNextAlignment(al) ) {
109             ++alignmentCount;
110
111             if ( alignments.size() < 100 )
112                 alignments.push_back(al);
113         }
114
115         using namespace BamTools::Algorithms;
116
117         cerr << endl
118              << "------------------------------" << endl
119              << "Unsorted Alignments" << endl
120              << "------------------------------" << endl
121              << endl;
122         std::stable_sort(alignments.begin(), alignments.end(), Sort::Unsorted());
123         printAlignments(alignments);
124         cerr << "------------------------------" << endl
125              << endl;
126
127         cerr << endl
128              << "------------------------------" << endl
129              << "Sorted Alignments (by name)" << endl
130              << "------------------------------" << endl
131              << endl;
132 //        std::sort(alignments.begin(), alignments.end(), Sort::ByName());
133         Algorithms::SortAlignments(alignments, Sort::ByName());
134         printAlignments(alignments);
135         cerr << endl
136              << "------------------------------" << endl
137              << endl;
138
139         cerr << endl
140              << "------------------------------" << endl
141              << "Sorted Alignments (by tag Aq)" << endl
142              << "------------------------------" << endl
143              << endl;
144 //        std::sort(alignments.begin(), alignments.end(), Sort::ByTag<int>("Aq"));
145         Algorithms::SortAlignments(alignments, Sort::ByTag<int>("Aq"));
146         printAlignments(alignments);
147         cerr << endl
148              << "------------------------------" << endl
149              << endl;
150
151         cerr << endl
152              << "------------------------------" << endl
153              << "Sorted Alignments (by tag Aq) desc" << endl
154              << "------------------------------" << endl
155              << endl;
156         std::sort(alignments.begin(), alignments.end(), Sort::ByTag<int>("Aq", Sort::DescendingOrder));
157         printAlignments(alignments);
158         cerr << endl
159              << "------------------------------" << endl
160              << endl;
161
162
163
164
165 //        // ########################################
166 //        // original
167 //        // ########################################
168 //
169 //        while ( reader.GetNextAlignmentCore(al) )
170 //            ++alignmentCount;
171 //
172 //        //#########################################
173     }
174
175     // otherwise attempt to use region as constraint
176     else {
177
178         // if region string parses OK
179         BamRegion region;
180         if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
181
182             // attempt to find index files
183             reader.LocateIndexes();
184
185             // if index data available for all BAM files, we can use SetRegion
186             if ( reader.HasIndexes() ) {
187
188                 vector<BamAlignment> alignments;
189                 using namespace BamTools::Algorithms;
190
191                 alignments = GetSortedRegion(reader, region, Sort::ByName() );
192                 printAlignments(alignments);
193
194                 cerr << "################################" << endl;
195
196                 alignments = GetSortedRegion(reader, region, Sort::ByTag<int>("Aq"));
197                 printAlignments(alignments);
198
199 //                // attempt to set region on reader
200 //                if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
201 //                    cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl;
202 //                    reader.Close();
203 //                    return false;
204 //                }
205
206 //                // everything checks out, just iterate through specified region, counting alignments
207 //                while ( reader.GetNextAlignmentCore(al) )
208 //                    ++alignmentCount;
209             }
210
211             // no index data available, we have to iterate through until we
212             // find overlapping alignments
213             else {
214                 while ( reader.GetNextAlignmentCore(al) ) {
215                     if ( (al.RefID >= region.LeftRefID)  && ( (al.Position + al.Length) >= region.LeftPosition ) &&
216                           (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
217                     {
218                         ++alignmentCount;
219                     }
220                 }
221             }
222         }
223
224         // error parsing REGION string
225         else {
226             cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl;
227             cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
228                  << endl;
229             reader.Close();
230             return false;
231         }
232     }
233
234     // print results
235     cout << alignmentCount << endl;
236
237     // clean up & exit
238     reader.Close();
239     return true;
240 }
241
242 // ---------------------------------------------
243 // CountTool implementation
244
245 CountTool::CountTool(void) 
246     : AbstractTool()
247     , m_settings(new CountSettings)
248     , m_impl(0)
249
250     // set program details
251     Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
252     
253     // set up options 
254     OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
255     Options::AddValueOption("-in",     "BAM filename", "the input BAM file(s)", "", m_settings->HasInput,  m_settings->InputFiles, IO_Opts, Options::StandardIn());
256     Options::AddValueOption("-region", "REGION",       "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
257 }
258
259 CountTool::~CountTool(void) { 
260
261     delete m_settings;
262     m_settings = 0;
263
264     delete m_impl;
265     m_impl = 0;
266 }
267
268 int CountTool::Help(void) { 
269     Options::DisplayHelp();
270     return 0;
271
272
273 int CountTool::Run(int argc, char* argv[]) { 
274
275     // parse command line arguments
276     Options::Parse(argc, argv, 1);
277
278     // initialize CountTool with settings
279     m_impl = new CountToolPrivate(m_settings);
280
281     // run CountTool, return success/fail
282     if ( m_impl->Run() )
283         return 0;
284     else
285         return 1;
286 }