--- /dev/null
+#include <iostream>\r
+#include <map>\r
+#include "BamReader.h"\r
+#include "BamWriter.h"\r
+\r
+using namespace std;\r
+\r
+int main(int argc, char* argv[]) {\r
+\r
+ for (int a=0; a<argc; a++) {\r
+ cout << argv[a] << " ";\r
+ }\r
+ cout << endl;\r
+ \r
+ if(argc != 4) {\r
+ cout << "USAGE: " << argv[0] << " <input BAM file 1> <input BAM file 1> <output BAM file>" << endl;\r
+ exit(1);\r
+ }\r
+\r
+ // localize our arguments\r
+ const char* inputFilename1 = argv[1];\r
+ const char* inputFilename2 = argv[2];\r
+ const char* outputFilename = argv[3];\r
+\r
+ // open our BAM reader1 for input file 1\r
+ BamReader reader1;\r
+ reader1.SetFilename(inputFilename1);\r
+\r
+ // open our BAM reader2 for input file 2\r
+ BamReader reader2;\r
+ reader2.SetFilename(inputFilename2);\r
+\r
+ // check reader1\r
+ if(!reader1.Open()) {\r
+ cout << "ERROR: Unable to open the BAM file 1 (" << inputFilename1 << ")." << endl;\r
+ exit(1);\r
+ }\r
+\r
+ // check reader2\r
+ if(!reader2.Open()) {\r
+ cout << "ERROR: Unable to open the BAM file 1 (" << inputFilename2 << ")." << endl;\r
+ exit(1);\r
+ }\r
+ \r
+ // retrieve the header text from both files\r
+ string samHeader1 = reader1.GetHeaderText();\r
+ string samHeader2 = reader2.GetHeaderText();\r
+\r
+ // retrieve the reference sequence vectors\r
+ RefVector referenceSequences1 = reader1.GetReferenceData();\r
+ RefVector referenceSequences2 = reader2.GetReferenceData();\r
+\r
+ // process reference sequences from file 1\r
+ map<string, unsigned int, less<string> > refLengthMap;\r
+ map<string, unsigned int, less<string> > refIdMap;\r
+ map<string, RefData, less<string> > refDataMap;\r
+ \r
+ cerr << "Reference list from file 1" << endl;\r
+ unsigned int refCounter = 0;\r
+ for (RefVector::const_iterator refIter = referenceSequences1.begin();\r
+ refIter != referenceSequences1.end(); refIter++) {\r
+\r
+ // retrieve\r
+ RefData rd = *refIter;\r
+\r
+ // get member data\r
+ string refName = rd.RefName;\r
+ unsigned int refLength = rd.RefLength;\r
+ int refId = reader1.GetRefID(refName);\r
+\r
+ // report\r
+ cerr << " refName=" << refName << " refId=" << refId << endl;\r
+\r
+ // store in maps\r
+ refLengthMap[refName] = refLength;\r
+ refIdMap[refName] = refCounter;\r
+ refDataMap[refName] = rd;\r
+ \r
+ // increment ref count\r
+ refCounter++;\r
+ }\r
+\r
+ // process reference sequences from file 1\r
+ map<unsigned int, unsigned int, less<unsigned int> > recodedRefId;\r
+\r
+ cerr << "Reference list from file 2" << endl;\r
+ for (RefVector::const_iterator refIter = referenceSequences2.begin();\r
+ refIter != referenceSequences2.end(); refIter++) {\r
+\r
+ // retrieve\r
+ RefData rd = *refIter;\r
+\r
+ // get member data\r
+ string refName = rd.RefName;\r
+ unsigned int refLength = rd.RefLength;\r
+ int refId = reader2.GetRefID(refName);\r
+\r
+ // report\r
+ cerr << " refName=" << refName << " refId=" << refId << endl;\r
+\r
+ // if refName already in map, check ref length\r
+ if (refLengthMap.count(refName) > 0) {\r
+ \r
+ // check if length is the same\r
+ if (refLengthMap[refName] != refLength) {\r
+ cerr << "Refernce name in two files with inconsistent lengths: " << refName << " ... exiting." << endl;\r
+ exit(1);\r
+ }\r
+\r
+ // make ref id recoding entry\r
+ unsigned int refIdNew = refIdMap[refName];\r
+ recodedRefId[refId] = refIdNew;\r
+ }\r
+ // otherwise make new refId and RefData\r
+ else {\r
+ // store in maps\r
+ refLengthMap[refName] = refLength;\r
+ refIdMap[refName] = refCounter;\r
+ refDataMap[refName] = rd;\r
+ \r
+ // make ref id recoding entry\r
+ recodedRefId[refId] = refCounter;\r
+\r
+ // increment ref count\r
+ refCounter++;\r
+ }\r
+ }\r
+\r
+ // make new referenceSequences vector\r
+ RefVector referenceSequences3;\r
+ unsigned rc = 0;\r
+ for (map<string, RefData, less<string> >::const_iterator rdIter = refDataMap.begin();\r
+ rdIter != refDataMap.end(); rdIter++) {\r
+ RefData rd = rdIter->second;\r
+ referenceSequences3.push_back(rd);\r
+ cerr << "Adding RefData for refName=" << rd.RefName << " refId=" << rc << endl;\r
+ rc++;\r
+ }\r
+ cerr << "Length of referenceSequences vector=" << referenceSequences3.size() << endl;\r
+ \r
+\r
+ // open output bam file and write:\r
+ // empty header\r
+ // new referenceSequences3 vector\r
+ string samHeader3 = "";\r
+\r
+ BamWriter writer;\r
+ writer.Open(outputFilename, samHeader3, referenceSequences3);\r
+\r
+ // iterate through alignments from file 1 and write them unchanged\r
+ BamAlignment ba;\r
+ \r
+ unsigned int ac1 = 0;\r
+ while(reader1.GetNextAlignment(ba) && ac1 < 400) {\r
+ writer.SaveAlignment(ba);\r
+ ac1++;\r
+ }\r
+ \r
+ cerr << "Write " << ac1 << " " << inputFilename1 << " records to " << outputFilename << endl;\r
+\r
+ // close input file 1\r
+ reader1.Close();\r
+\r
+ // iterate through alignments from file 2 and:\r
+ // assign re-coded refId\r
+ // write alignment\r
+ int ac2 = 0;\r
+ while(reader2.GetNextAlignment(ba) && ac2 < 400) {\r
+ ac2++;\r
+\r
+ // retrieve original refId\r
+ unsigned int refId = ba.RefID;\r
+ \r
+ // check if original refId is in recoding table... bomb if not\r
+ if (recodedRefId.count(refId) <= 0) {\r
+ cerr << "Alignment in file 2 has inconsistent reference sequence ID... exiting." << endl;\r
+ exit(1);\r
+ }\r
+\r
+ // assign new refId\r
+ ba.RefID = recodedRefId[refId];\r
+\r
+ // cerr << " recoding refId=" << refId << " to refId=" << recodedRefId[refId] << endl;\r
+\r
+ writer.SaveAlignment(ba);\r
+ }\r
+\r
+ cerr << "Write " << ac2 << " " << inputFilename2 << " records to " << outputFilename << endl;\r
+ \r
+ // close input file 2\r
+ reader2.Close();\r
+\r
+ // close output file\r
+ writer.Close();\r
+\r
+ cerr << "Total " << ac1+ac2 << " records to " << outputFilename << endl;\r
+\r
+ // return\r
+ return 0;\r
+}\r
-CC= gcc
-CXX= g++
-CFLAGS= -Wall -O3
-CXXFLAGS= $(CFLAGS)
-DFLAGS= -D_IOLIB=2 #-D_NDEBUG
-OBJS= BamReader.o bgzf.o
-PROG= BamReaderTest
-INCLUDES=
-ARFLAGS= -crs
-LIBS= -lz
-SUBDIRS= .
-
-.SUFFIXES:.c .cpp .o
-
-.c.o:
- $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
-
-.cpp.o:
- $(CXX) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
-
-all: $(PROG) BamConversion
-
-lib:libbambc.a
-
-libbambc.a:$(OBJS)
- $(AR) $(ARFLAGS) $@ $(OBJS)
-
-BamReaderTest:lib BamReaderMain.o
- $(CXX) $(CXXFLAGS) -o $@ BamReaderMain.o $(LIBS) -L. -lbambc
-
-BamConversion: lib BamWriter.o BamConversionMain.o
- $(CXX) $(CXXFLAGS) -o $@ BamWriter.o BamConversionMain.o $(LIBS) -L. -lbambc
-
-clean:
- rm -fr gmon.out *.o *.a a.out $(PROG) BamConversion *~
+CC= gcc\r
+CXX= g++\r
+CFLAGS= -Wall -O3\r
+CXXFLAGS= $(CFLAGS)\r
+DFLAGS= -D_IOLIB=2 #-D_NDEBUG\r
+OBJS= BamReader.o bgzf.o\r
+PROG= BamReaderTest\r
+INCLUDES= \r
+ARFLAGS= -crs\r
+LIBS= -lz\r
+SUBDIRS= .\r
+\r
+.SUFFIXES:.c .cpp .o\r
+\r
+.c.o:\r
+ $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@\r
+\r
+.cpp.o:\r
+ $(CXX) -c $(CXXFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@\r
+\r
+all: $(PROG) BamConversion\r
+\r
+lib:libbambc.a\r
+\r
+libbambc.a:$(OBJS)\r
+ $(AR) $(ARFLAGS) $@ $(OBJS)\r
+\r
+BamReaderTest:lib BamReaderMain.o\r
+ $(CXX) $(CXXFLAGS) -o $@ BamReaderMain.o $(LIBS) -L. -lbambc\r
+\r
+BamConversion: lib BamWriter.o BamConversionMain.o\r
+ $(CXX) $(CXXFLAGS) -o $@ BamWriter.o BamConversionMain.o $(LIBS) -L. -lbambc\r
+\r
+BamMerge: lib BamMerge.o\r
+ $(CXX) $(CXXFLAGS) -o $@ BamWriter.o BamMerge.o $(LIBS) -L. -lbambc\r
+\r
+\r
+clean:\r
+ rm -fr gmon.out *.o *.a a.out $(PROG) BamConversion *~\r