const string BamMultiReader::GetHeaderText(void) const {
string mergedHeader = "";
+ map<string, bool> readGroups;
// foreach extraction entry (each BAM file)
- bool isFirstTime = true;
- for (vector<pair<BamReader*, BamAlignment*> >::const_iterator it = readers.begin(); it != readers.end(); ++it) {
+ for (vector<pair<BamReader*, BamAlignment*> >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) {
- BamReader* reader = it->first;
+ map<string, bool> currentFileReadGroups;
+
+ BamReader* reader = rs->first;
stringstream header(reader->GetHeaderText());
vector<string> lines;
if ( headerLine.empty() ) { continue; }
// if first file, save HD & SQ entries
- if ( isFirstTime ) {
+ if ( rs == readers.begin() ) {
if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) {
mergedHeader.append(headerLine.c_str());
mergedHeader.append(1, '\n');
}
}
- // (for all files) append RG entries
+ // (for all files) append RG entries if they are unique
if ( headerLine.find("@RG") == 0 ) {
- mergedHeader.append(headerLine.c_str() );
- mergedHeader.append(1, '\n');
+ stringstream headerLineSs(headerLine);
+ string part, readGroupPart, readGroup;
+ while(std::getline(headerLineSs, part, '\t')) {
+ stringstream partSs(part);
+ string subtag;
+ std::getline(partSs, subtag, ':');
+ if (subtag == "ID") {
+ std::getline(partSs, readGroup, ':');
+ break;
+ }
+ }
+ if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries
+ mergedHeader.append(headerLine.c_str() );
+ mergedHeader.append(1, '\n');
+ readGroups[readGroup] = true;
+ currentFileReadGroups[readGroup] = true;
+ } else {
+ // warn iff we are reading one file and discover duplicated @RG tags in the header
+ // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags
+ if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) {
+ cerr << "WARNING: duplicate @RG tag " << readGroup
+ << " entry in header of " << reader->GetFilename() << endl;
+ }
+ }
}
-
}
-
- // set iteration flag
- isFirstTime = false;
}
// return merged header text