1 // ***************************************************************************
2 // bamtools_count.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // ---------------------------------------------------------------------------
5 // Last modified: 7 April 2011
6 // ---------------------------------------------------------------------------
7 // Prints alignment count for BAM file(s)
8 // ***************************************************************************
10 #include "bamtools_count.h"
12 #include <api/BamAlgorithms.h>
13 #include <api/BamMultiReader.h>
14 #include <utils/bamtools_options.h>
15 #include <utils/bamtools_utilities.h>
16 using namespace BamTools;
23 // ---------------------------------------------
24 // CountSettings implementation
26 struct CountTool::CountSettings {
33 vector<string> InputFiles;
43 // ---------------------------------------------
44 // CountToolPrivate implementation
46 struct CountTool::CountToolPrivate {
50 CountToolPrivate(CountTool::CountSettings* settings)
51 : m_settings(settings)
54 ~CountToolPrivate(void) { }
62 CountTool::CountSettings* m_settings;
66 void printAlignments(const vector<BamAlignment>& alignments) {
68 vector<BamAlignment>::const_iterator alIter = alignments.begin();
69 vector<BamAlignment>::const_iterator alEnd = alignments.end();
70 for ( ; alIter != alEnd; ++alIter ) {
71 const BamAlignment& a = (*alIter);
74 << "\t" << a.RefID << ":" << a.Position;
77 bool hasTag = a.GetTag("Aq", aqValue);
79 if ( hasTag ) cerr << aqValue;
85 bool CountTool::CountToolPrivate::Run(void) {
87 // if no '-in' args supplied, default to stdin
88 if ( !m_settings->HasInput )
89 m_settings->InputFiles.push_back(Options::StandardIn());
91 // open reader without index
92 BamMultiReader reader;
93 if ( !reader.Open(m_settings->InputFiles) ) {
94 cerr << "bamtools count ERROR: could not open input BAM file(s)... Aborting." << endl;
100 int alignmentCount(0);
102 // if no region specified, count entire file
103 if ( !m_settings->HasRegion ) {
106 vector<BamAlignment> alignments;
107 while ( reader.GetNextAlignment(al) ) {
110 if ( alignments.size() < 100 )
111 alignments.push_back(al);
114 using namespace BamTools::Algorithms;
117 << "------------------------------" << endl
118 << "Unsorted Alignments" << endl
119 << "------------------------------" << endl
121 std::stable_sort(alignments.begin(), alignments.end(), Sort::Unsorted());
122 printAlignments(alignments);
123 cerr << "------------------------------" << endl
127 << "------------------------------" << endl
128 << "Sorted Alignments (by name)" << endl
129 << "------------------------------" << endl
131 // std::sort(alignments.begin(), alignments.end(), Sort::ByName());
132 Algorithms::SortAlignments(alignments, Sort::ByName());
133 printAlignments(alignments);
135 << "------------------------------" << endl
139 << "------------------------------" << endl
140 << "Sorted Alignments (by tag Aq)" << endl
141 << "------------------------------" << endl
143 // std::sort(alignments.begin(), alignments.end(), Sort::ByTag<int>("Aq"));
144 Algorithms::SortAlignments(alignments, Sort::ByTag<int>("Aq"));
145 printAlignments(alignments);
147 << "------------------------------" << endl
151 << "------------------------------" << endl
152 << "Sorted Alignments (by tag Aq) desc" << endl
153 << "------------------------------" << endl
155 std::sort(alignments.begin(), alignments.end(), Sort::ByTag<int>("Aq", Sort::DescendingOrder));
156 printAlignments(alignments);
158 << "------------------------------" << endl
164 // // ########################################
166 // // ########################################
168 // while ( reader.GetNextAlignmentCore(al) )
171 // //#########################################
174 // otherwise attempt to use region as constraint
177 // if region string parses OK
179 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
181 // attempt to find index files
182 reader.LocateIndexes();
184 // if index data available for all BAM files, we can use SetRegion
185 if ( reader.HasIndexes() ) {
187 vector<BamAlignment> alignments;
188 using namespace BamTools::Algorithms;
190 alignments = GetSortedRegion(reader, region, Sort::ByName() );
191 printAlignments(alignments);
193 cerr << "################################" << endl;
195 alignments = GetSortedRegion(reader, region, Sort::ByTag<int>("Aq"));
196 printAlignments(alignments);
198 // // attempt to set region on reader
199 // if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
200 // cerr << "bamtools count ERROR: set region failed. Check that REGION describes a valid range" << endl;
205 // // everything checks out, just iterate through specified region, counting alignments
206 // while ( reader.GetNextAlignmentCore(al) )
210 // no index data available, we have to iterate through until we
211 // find overlapping alignments
213 while ( reader.GetNextAlignmentCore(al) ) {
214 if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) &&
215 (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
223 // error parsing REGION string
225 cerr << "bamtools count ERROR: could not parse REGION - " << m_settings->Region << endl;
226 cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
234 cout << alignmentCount << endl;
241 // ---------------------------------------------
242 // CountTool implementation
244 CountTool::CountTool(void)
246 , m_settings(new CountSettings)
249 // set program details
250 Options::SetProgramInfo("bamtools count", "prints number of alignments in BAM file(s)", "[-in <filename> -in <filename> ...] [-region <REGION>]");
253 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
254 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn());
255 Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
258 CountTool::~CountTool(void) {
267 int CountTool::Help(void) {
268 Options::DisplayHelp();
272 int CountTool::Run(int argc, char* argv[]) {
274 // parse command line arguments
275 Options::Parse(argc, argv, 1);
277 // initialize CountTool with settings
278 m_impl = new CountToolPrivate(m_settings);
280 // run CountTool, return success/fail