1 // ***************************************************************************
2 // bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 7 April 2011
7 // ---------------------------------------------------------------------------
8 // Prints alignment count for BAM file(s)
9 // ***************************************************************************
11 #include "bamtools_count.h"
13 #include <api/BamAlgorithms.h>
14 #include <api/BamMultiReader.h>
15 #include <utils/bamtools_options.h>
16 #include <utils/bamtools_utilities.h>
17 using namespace BamTools;
24 // ---------------------------------------------
25 // CountSettings implementation
27 struct CountTool::CountSettings {
34 vector<string> InputFiles;
44 // ---------------------------------------------
45 // CountToolPrivate implementation
47 struct CountTool::CountToolPrivate {
51 CountToolPrivate(CountTool::CountSettings* settings)
52 : m_settings(settings)
55 ~CountToolPrivate(void) { }
63 CountTool::CountSettings* m_settings;
67 void printAlignments(const vector<BamAlignment>& alignments) {
69 vector<BamAlignment>::const_iterator alIter = alignments.begin();
70 vector<BamAlignment>::const_iterator alEnd = alignments.end();
71 for ( ; alIter != alEnd; ++alIter ) {
72 const BamAlignment& a = (*alIter);
75 << "\t" << a.RefID << ":" << a.Position;
78 bool hasTag = a.GetTag("Aq", aqValue);
80 if ( hasTag ) cerr << aqValue;
86 bool CountTool::CountToolPrivate::Run(void) {
88 // if no '-in' args supplied, default to stdin
89 if ( !m_settings->HasInput )
90 m_settings->InputFiles.push_back(Options::StandardIn());
92 // open reader without index
93 BamMultiReader reader;
94 if ( !reader.Open(m_settings->InputFiles) ) {
95 cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl;
101 int alignmentCount(0);
103 // if no region specified, count entire file
104 if ( !m_settings->HasRegion ) {
107 vector<BamAlignment> alignments;
108 while ( reader.GetNextAlignment(al) ) {
111 if ( alignments.size() < 100 )
112 alignments.push_back(al);
116 << "------------------------------" << endl
117 << "Unsorted Alignments" << endl
118 << "------------------------------" << endl
120 std::stable_sort(alignments.begin(), alignments.end(), Algorithms::Unsorted());
121 printAlignments(alignments);
122 cerr << "------------------------------" << endl
126 << "------------------------------" << endl
127 << "Sorted Alignments (by name)" << endl
128 << "------------------------------" << endl
130 std::sort(alignments.begin(), alignments.end(), Algorithms::SortByName<>());
131 printAlignments(alignments);
133 << "------------------------------" << endl
137 << "------------------------------" << endl
138 << "Sorted Alignments (by tag Aq)" << endl
139 << "------------------------------" << endl
141 std::sort(alignments.begin(), alignments.end(), Algorithms::SortByTag<int>("Aq"));
142 printAlignments(alignments);
144 << "------------------------------" << endl
148 << "------------------------------" << endl
149 << "Sorted Alignments (by tag Aq) desc" << endl
150 << "------------------------------" << endl
152 std::sort(alignments.begin(), alignments.end(), Algorithms::SortByTag<int, std::greater>("Aq"));
153 printAlignments(alignments);
155 << "------------------------------" << endl
161 // // ########################################
163 // // ########################################
165 // while ( reader.GetNextAlignmentCore(al) )
168 // //#########################################
171 // otherwise attempt to use region as constraint
174 // if region string parses OK
176 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
178 // attempt to find index files
179 reader.LocateIndexes();
181 // if index data available for all BAM files, we can use SetRegion
182 if ( reader.IsIndexLoaded() ) {
184 vector<BamAlignment> alignments;
186 alignments = Algorithms::SortReaderRegion(reader, region, Algorithms::SortByName<>() );
187 printAlignments(alignments);
189 cerr << "################################" << endl;
191 alignments = Algorithms::SortReaderRegion(reader, region, Algorithms::SortByTag<int>("Aq"));
192 printAlignments(alignments);
194 // // attempt to set region on reader
195 // if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
196 // cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl;
201 // // everything checks out, just iterate through specified region, counting alignments
202 // while ( reader.GetNextAlignmentCore(al) )
206 // no index data available, we have to iterate through until we
207 // find overlapping alignments
209 while ( reader.GetNextAlignmentCore(al) ) {
210 if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) &&
211 (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
219 // error parsing REGION string
221 cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl;
222 cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
230 cout << alignmentCount << endl;
237 // ---------------------------------------------
238 // CountTool implementation
240 CountTool::CountTool(void)
242 , m_settings(new CountSettings)
245 // set program details
246 Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
249 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
250 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
251 Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
254 CountTool::~CountTool(void) {
263 int CountTool::Help(void) {
264 Options::DisplayHelp();
268 int CountTool::Run(int argc, char* argv[]) {
270 // parse command line arguments
271 Options::Parse(argc, argv, 1);
273 // initialize CountTool with settings
274 m_impl = new CountToolPrivate(m_settings);
276 // run CountTool, return success/fail