+// ---------------------------------------------
+// StatsToolPrivate implementation
+
+struct StatsTool::StatsToolPrivate {
+
+ // ctor & dtor
+ public:
+ StatsToolPrivate(StatsTool::StatsSettings* _settings);
+ ~StatsToolPrivate(void);
+
+ // 'public' interface
+ public:
+ bool Run(void);
+
+ // internal methods
+ private:
+ bool CalculateMedian(vector<int>& data, double& median);
+ void PrintStats(void);
+ void ProcessAlignment(const BamAlignment& al);
+
+ // data members
+ private:
+ StatsTool::StatsSettings* settings;
+ unsigned int numReads;
+ unsigned int numPaired;
+ unsigned int numProperPair;
+ unsigned int numMapped;
+ unsigned int numBothMatesMapped;
+ unsigned int numForwardStrand;
+ unsigned int numReverseStrand;
+ unsigned int numFirstMate;
+ unsigned int numSecondMate;
+ unsigned int numSingletons;
+ unsigned int numFailedQC;
+ unsigned int numDuplicates;
+ vector<int> insertSizes;
+};
+
+StatsTool::StatsToolPrivate::StatsToolPrivate(StatsTool::StatsSettings* _settings)
+ : settings(_settings)
+ , numReads(0)
+ , numPaired(0)
+ , numProperPair(0)
+ , numMapped(0)
+ , numBothMatesMapped(0)
+ , numForwardStrand(0)
+ , numReverseStrand(0)
+ , numFirstMate(0)
+ , numSecondMate(0)
+ , numSingletons(0)
+ , numFailedQC(0)
+ , numDuplicates(0)
+{
+ insertSizes.reserve(100000);
+}
+
+StatsTool::StatsToolPrivate::~StatsToolPrivate(void) { }
+
+bool StatsTool::StatsToolPrivate::CalculateMedian(vector<int>& data, double& median) { // median is double in case of even data size, need to return average of middle 2 elements
+
+ // check that data exists
+ if ( data.empty() ) return false;
+
+ size_t dataSize = data.size();
+ size_t middleIndex = dataSize / 2;
+
+ vector<int>::iterator target = data.begin() + middleIndex;
+ nth_element(data.begin(), target, data.end());
+
+ // odd number of elements
+ if ( (dataSize % 2) != 0) {
+ median = (double)(*target);
+ return true;
+ }
+
+ // even number of elements
+ else {
+ double rightTarget = (double)(*target);
+ vector<int>::iterator leftTarget = target - 1;
+ nth_element(data.begin(), leftTarget, data.end());
+ median = (double)((rightTarget+*leftTarget)/2.0);
+ return true;
+ }
+}
+
+// print BAM file alignment stats
+void StatsTool::StatsToolPrivate::PrintStats(void) {
+
+ cout << endl;
+ cout << "**********************************************" << endl;
+ cout << "Stats for BAM file(s): " << endl;
+ cout << "**********************************************" << endl;
+ cout << endl;
+ cout << "Total reads: " << numReads << endl;
+ cout << "Mapped reads: " << numMapped << "\t(" << ((float)numMapped/numReads)*100 << "%)" << endl;
+ cout << "Forward strand: " << numForwardStrand << "\t(" << ((float)numForwardStrand/numReads)*100 << "%)" << endl;
+ cout << "Reverse strand: " << numReverseStrand << "\t(" << ((float)numReverseStrand/numReads)*100 << "%)" << endl;
+ cout << "Failed QC: " << numFailedQC << "\t(" << ((float)numFailedQC/numReads)*100 << "%)" << endl;
+ cout << "Duplicates: " << numDuplicates << "\t(" << ((float)numDuplicates/numReads)*100 << "%)" << endl;
+ cout << "Paired-end reads: " << numPaired << "\t(" << ((float)numPaired/numReads)*100 << "%)" << endl;
+
+ if ( numPaired != 0 ) {
+ cout << "'Proper-pairs': " << numProperPair << "\t(" << ((float)numProperPair/numPaired)*100 << "%)" << endl;
+ cout << "Both pairs mapped: " << numBothMatesMapped << "\t(" << ((float)numBothMatesMapped/numPaired)*100 << "%)" << endl;
+ cout << "Read 1: " << numFirstMate << endl;
+ cout << "Read 2: " << numSecondMate << endl;
+ cout << "Singletons: " << numSingletons << "\t(" << ((float)numSingletons/numPaired)*100 << "%)" << endl;
+ }
+
+
+ if ( settings->IsShowingInsertSizeSummary ) {
+
+ double avgInsertSize = 0.0;
+ if ( !insertSizes.empty() ) {
+ avgInsertSize = ( accumulate(insertSizes.begin(), insertSizes.end(), 0.0) / (double)insertSizes.size() );
+ cout << "Average insert size (absolute value): " << avgInsertSize << endl;
+ }
+
+ double medianInsertSize = 0.0;
+ if ( CalculateMedian(insertSizes, medianInsertSize) )
+ cout << "Median insert size (absolute value): " << medianInsertSize << endl;
+ }
+ cout << endl;
+}
+
+// use current input alignment to update BAM file alignment stats
+void StatsTool::StatsToolPrivate::ProcessAlignment(const BamAlignment& al) {
+
+ // increment total alignment counter
+ ++numReads;
+
+ // check the paired-independent flags
+ if ( al.IsDuplicate() ) ++numDuplicates;
+ if ( al.IsFailedQC() ) ++numFailedQC;
+ if ( al.IsMapped() ) ++numMapped;
+
+ // check forward/reverse strand
+ if ( al.IsReverseStrand() )
+ ++numReverseStrand;
+ else
+ ++numForwardStrand;
+
+ // if alignment is paired-end
+ if ( al.IsPaired() ) {
+
+ // increment PE counter
+ ++numPaired;
+
+ // increment first mate/second mate counters
+ if ( al.IsFirstMate() ) ++numFirstMate;
+ if ( al.IsSecondMate() ) ++numSecondMate;
+
+ // if alignment is mapped, check mate status
+ if ( al.IsMapped() ) {
+ // if mate mapped
+ if ( al.IsMateMapped() )
+ ++numBothMatesMapped;
+ // else singleton
+ else
+ ++numSingletons;
+ }
+
+ // check for explicit proper pair flag
+ if ( al.IsProperPair() ) ++numProperPair;
+
+ // store insert size for first mate
+ if ( settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) {
+ int insertSize = abs(al.InsertSize);
+ insertSizes.push_back( insertSize );
+ }
+ }
+}
+
+bool StatsTool::StatsToolPrivate::Run() {
+
+ // opens the BAM files without checking for indexes
+ BamMultiReader reader;
+ if ( !reader.Open(settings->InputFiles, false, true) ) {
+ cerr << "Could not open input BAM file(s)... quitting." << endl;
+ reader.Close();
+ return false;
+ }
+
+ // plow through file, keeping track of stats
+ BamAlignment al;
+ while ( reader.GetNextAlignmentCore(al) ) {
+ ProcessAlignment(al);
+ }
+
+ // print stats
+ PrintStats();
+
+ // clean and exit
+ reader.Close();
+ return true;
+}
+