1 // ***************************************************************************
2 // bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 7 April 2011
7 // ---------------------------------------------------------------------------
8 // Prints alignment count for BAM file(s)
9 // ***************************************************************************
11 #include "bamtools_count.h"
13 #include <api/BamAlgorithms.h>
14 #include <api/BamMultiReader.h>
15 #include <utils/bamtools_options.h>
16 #include <utils/bamtools_utilities.h>
17 using namespace BamTools;
24 // ---------------------------------------------
25 // CountSettings implementation
27 struct CountTool::CountSettings {
34 vector<string> InputFiles;
44 // ---------------------------------------------
45 // CountToolPrivate implementation
47 struct CountTool::CountToolPrivate {
51 CountToolPrivate(CountTool::CountSettings* settings)
52 : m_settings(settings)
55 ~CountToolPrivate(void) { }
63 CountTool::CountSettings* m_settings;
67 void printAlignments(const vector<BamAlignment>& alignments) {
69 vector<BamAlignment>::const_iterator alIter = alignments.begin();
70 vector<BamAlignment>::const_iterator alEnd = alignments.end();
71 for ( ; alIter != alEnd; ++alIter ) {
72 const BamAlignment& a = (*alIter);
75 << "\t" << a.RefID << ":" << a.Position;
78 bool hasTag = a.GetTag("Aq", aqValue);
80 if ( hasTag ) cerr << aqValue;
86 bool CountTool::CountToolPrivate::Run(void) {
88 // if no '-in' args supplied, default to stdin
89 if ( !m_settings->HasInput )
90 m_settings->InputFiles.push_back(Options::StandardIn());
92 // open reader without index
93 BamMultiReader reader;
94 if ( !reader.Open(m_settings->InputFiles) ) {
95 cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl;
101 int alignmentCount(0);
103 // if no region specified, count entire file
104 if ( !m_settings->HasRegion ) {
107 vector<BamAlignment> alignments;
108 while ( reader.GetNextAlignment(al) ) {
111 if ( alignments.size() < 100 )
112 alignments.push_back(al);
115 using namespace BamTools::Algorithms;
118 << "------------------------------" << endl
119 << "Unsorted Alignments" << endl
120 << "------------------------------" << endl
122 std::stable_sort(alignments.begin(), alignments.end(), Sort::Unsorted());
123 printAlignments(alignments);
124 cerr << "------------------------------" << endl
128 << "------------------------------" << endl
129 << "Sorted Alignments (by name)" << endl
130 << "------------------------------" << endl
132 // std::sort(alignments.begin(), alignments.end(), Sort::ByName());
133 Algorithms::SortAlignments(alignments, Sort::ByName());
134 printAlignments(alignments);
136 << "------------------------------" << endl
140 << "------------------------------" << endl
141 << "Sorted Alignments (by tag Aq)" << endl
142 << "------------------------------" << endl
144 // std::sort(alignments.begin(), alignments.end(), Sort::ByTag<int>("Aq"));
145 Algorithms::SortAlignments(alignments, Sort::ByTag<int>("Aq"));
146 printAlignments(alignments);
148 << "------------------------------" << endl
152 << "------------------------------" << endl
153 << "Sorted Alignments (by tag Aq) desc" << endl
154 << "------------------------------" << endl
156 std::sort(alignments.begin(), alignments.end(), Sort::ByTag<int>("Aq", Sort::DescendingOrder));
157 printAlignments(alignments);
159 << "------------------------------" << endl
165 // // ########################################
167 // // ########################################
169 // while ( reader.GetNextAlignmentCore(al) )
172 // //#########################################
175 // otherwise attempt to use region as constraint
178 // if region string parses OK
180 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
182 // attempt to find index files
183 reader.LocateIndexes();
185 // if index data available for all BAM files, we can use SetRegion
186 if ( reader.HasIndexes() ) {
188 vector<BamAlignment> alignments;
189 using namespace BamTools::Algorithms;
191 alignments = GetSortedRegion(reader, region, Sort::ByName() );
192 printAlignments(alignments);
194 cerr << "################################" << endl;
196 alignments = GetSortedRegion(reader, region, Sort::ByTag<int>("Aq"));
197 printAlignments(alignments);
199 // // attempt to set region on reader
200 // if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
201 // cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl;
206 // // everything checks out, just iterate through specified region, counting alignments
207 // while ( reader.GetNextAlignmentCore(al) )
211 // no index data available, we have to iterate through until we
212 // find overlapping alignments
214 while ( reader.GetNextAlignmentCore(al) ) {
215 if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) &&
216 (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
224 // error parsing REGION string
226 cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl;
227 cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
235 cout << alignmentCount << endl;
242 // ---------------------------------------------
243 // CountTool implementation
245 CountTool::CountTool(void)
247 , m_settings(new CountSettings)
250 // set program details
251 Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
254 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
255 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
256 Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
259 CountTool::~CountTool(void) {
268 int CountTool::Help(void) {
269 Options::DisplayHelp();
273 int CountTool::Run(int argc, char* argv[]) {
275 // parse command line arguments
276 Options::Parse(argc, argv, 1);
278 // initialize CountTool with settings
279 m_impl = new CountToolPrivate(m_settings);
281 // run CountTool, return success/fail