From 875f0cdc2eeaa1c1d35d1e8bee1e27f8150f1092 Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Wed, 9 Jun 2010 09:31:01 -0400 Subject: [PATCH] fixed bug with @RG handling Prior to this commit files merged with bamtools merge would have one @RG tag for each file. This is undesirable behavior. This commit fixes the issue by tracking unique @RG tags in our unified header (BamMultiReader::GetHeaderText) and prevents the MultiReader from observing more than one @RG tag in the header. Future merges will have the correct header. --- BamMultiReader.cpp | 21 ++++++++++++++++++--- BamMultiReader.h | 2 ++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/BamMultiReader.cpp b/BamMultiReader.cpp index 2d8580d..58d82a2 100644 --- a/BamMultiReader.cpp +++ b/BamMultiReader.cpp @@ -204,6 +204,7 @@ bool BamMultiReader::CreateIndexes(void) { const string BamMultiReader::GetHeaderText(void) const { string mergedHeader = ""; + map readGroups; // foreach extraction entry (each BAM file) bool isFirstTime = true; @@ -231,10 +232,24 @@ const string BamMultiReader::GetHeaderText(void) const { } } - // (for all files) append RG entries + // (for all files) append RG entries if they are unique if ( headerLine.find("@RG") == 0 ) { - mergedHeader.append(headerLine.c_str() ); - mergedHeader.append(1, '\n'); + stringstream headerLineSs(headerLine); + string part, readGroupPart, readGroup; + while(std::getline(headerLineSs, part, '\t')) { + if (part == "@RG") { + std::getline(headerLineSs, readGroupPart, '\t'); + stringstream readGroupPartSs(readGroupPart); + std::getline(readGroupPartSs, readGroup, ':'); + std::getline(readGroupPartSs, readGroup, ':'); + break; + } + } + if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries + mergedHeader.append(headerLine.c_str() ); + mergedHeader.append(1, '\n'); + readGroups[readGroup] = true; + } } } diff --git a/BamMultiReader.h b/BamMultiReader.h index f1a2bb2..6d5a805 100644 --- a/BamMultiReader.h +++ b/BamMultiReader.h @@ -15,6 +15,7 @@ #include #include #include // for pair +#include using namespace std; @@ -27,6 +28,7 @@ namespace BamTools { // index mapping reference/position pairings to bamreaders and their alignments typedef multimap, pair > AlignmentIndex; + class BamMultiReader { // constructor / destructor -- 2.39.2