]> git.donarmstrong.com Git - bamtools.git/blobdiff - BamMultiReader.cpp
further cleanup of duplicate @RG tag warning reporting
[bamtools.git] / BamMultiReader.cpp
index 58d82a2d016b49de532d1ec85be39650a982d915..51372c3feef1d3bf8a73d4c93ade22b227411a3e 100644 (file)
@@ -207,10 +207,11 @@ const string BamMultiReader::GetHeaderText(void) const {
     map<string, bool> readGroups;
 
     // foreach extraction entry (each BAM file)
-    bool isFirstTime = true;
-    for (vector<pair<BamReader*, BamAlignment*> >::const_iterator it = readers.begin(); it != readers.end(); ++it) {
+    for (vector<pair<BamReader*, BamAlignment*> >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) {
 
-        BamReader* reader = it->first;
+        map<string, bool> currentFileReadGroups;
+
+        BamReader* reader = rs->first;
 
         stringstream header(reader->GetHeaderText());
         vector<string> lines;
@@ -225,7 +226,7 @@ const string BamMultiReader::GetHeaderText(void) const {
             if ( headerLine.empty() ) { continue; }
 
             // if first file, save HD & SQ entries
-            if ( isFirstTime ) {
+            if ( rs == readers.begin() ) {
                 if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) {
                     mergedHeader.append(headerLine.c_str());
                     mergedHeader.append(1, '\n');
@@ -237,25 +238,29 @@ const string BamMultiReader::GetHeaderText(void) const {
                 stringstream headerLineSs(headerLine);
                 string part, readGroupPart, readGroup;
                 while(std::getline(headerLineSs, part, '\t')) {
-                    if (part == "@RG") {
-                       std::getline(headerLineSs, readGroupPart, '\t');
-                       stringstream readGroupPartSs(readGroupPart);
-                       std::getline(readGroupPartSs, readGroup, ':');
-                       std::getline(readGroupPartSs, readGroup, ':');
-                       break;
+                    stringstream partSs(part);
+                    string subtag;
+                    std::getline(partSs, subtag, ':');
+                    if (subtag == "ID") {
+                        std::getline(partSs, readGroup, ':');
+                        break;
                     }
                 }
                 if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries
                     mergedHeader.append(headerLine.c_str() );
                     mergedHeader.append(1, '\n');
                     readGroups[readGroup] = true;
+                    currentFileReadGroups[readGroup] = true;
+                } else {
+                    // warn iff we are reading one file and discover duplicated @RG tags in the header
+                    // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags
+                    if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) {
+                        cerr << "WARNING: duplicate @RG tag " << readGroup 
+                            << " entry in header of " << reader->GetFilename() << endl;
+                    }
                 }
             }
-
         }
-
-        // set iteration flag
-        isFirstTime = false;
     }
 
     // return merged header text