+// makes a virtual, unified header for all the bam files in the multireader
+const string BamMultiReader::GetHeaderText(void) const {
+
+ string mergedHeader = "";
+ map<string, bool> readGroups;
+
+ // foreach extraction entry (each BAM file)
+ for (vector<pair<BamReader*, BamAlignment*> >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) {
+
+ map<string, bool> currentFileReadGroups;
+
+ BamReader* reader = rs->first;
+
+ stringstream header(reader->GetHeaderText());
+ vector<string> lines;
+ string item;
+ while (getline(header, item))
+ lines.push_back(item);
+
+ for (vector<string>::const_iterator it = lines.begin(); it != lines.end(); ++it) {
+
+ // get next line from header, skip if empty
+ string headerLine = *it;
+ if ( headerLine.empty() ) { continue; }
+
+ // if first file, save HD & SQ entries
+ if ( rs == readers.begin() ) {
+ if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) {
+ mergedHeader.append(headerLine.c_str());
+ mergedHeader.append(1, '\n');
+ }
+ }
+
+ // (for all files) append RG entries if they are unique
+ if ( headerLine.find("@RG") == 0 ) {
+ stringstream headerLineSs(headerLine);
+ string part, readGroupPart, readGroup;
+ while(std::getline(headerLineSs, part, '\t')) {
+ stringstream partSs(part);
+ string subtag;
+ std::getline(partSs, subtag, ':');
+ if (subtag == "ID") {
+ std::getline(partSs, readGroup, ':');
+ break;
+ }
+ }
+ if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries
+ mergedHeader.append(headerLine.c_str() );
+ mergedHeader.append(1, '\n');
+ readGroups[readGroup] = true;
+ currentFileReadGroups[readGroup] = true;
+ } else {
+ // warn iff we are reading one file and discover duplicated @RG tags in the header
+ // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags
+ if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) {
+ cerr << "WARNING: duplicate @RG tag " << readGroup
+ << " entry in header of " << reader->GetFilename() << endl;
+ }
+ }
+ }
+ }
+ }
+
+ // return merged header text
+ return mergedHeader;