9 int main(int argc, char* argv[]) {
11 for (int a=0; a<argc; a++) {
12 cout << argv[a] << " ";
17 cout << "USAGE: " << argv[0] << " <input BAM file 1> <input BAM file 1> <output BAM file> [<maxReads>]" << endl;
21 // localize our arguments
22 const char* inputFilename1 = argv[1];
23 const char* inputFilename2 = argv[2];
24 const char* outputFilename = argv[3];
27 const char* cmaxRead = argv[4];
28 Nmax = atoi(cmaxRead);
32 // open our BAM reader1 for input file 1
34 reader1.SetFilename(inputFilename1);
36 // open our BAM reader2 for input file 2
38 reader2.SetFilename(inputFilename2);
42 cout << "ERROR: Unable to open the BAM file 1 (" << inputFilename1 << ")." << endl;
48 cout << "ERROR: Unable to open the BAM file 1 (" << inputFilename2 << ")." << endl;
52 // retrieve the header text from both files
53 string samHeader1 = reader1.GetHeaderText();
54 string samHeader2 = reader2.GetHeaderText();
56 // retrieve the reference sequence vectors
57 RefVector referenceSequences1 = reader1.GetReferenceData();
58 RefVector referenceSequences2 = reader2.GetReferenceData();
60 // merged referenceSequences
61 RefVector referenceSequences12;
63 // process reference sequences from file 1
64 map<string, unsigned int, less<string> > refLengthMap;
65 map<string, unsigned int, less<string> > refIdMap;
66 map<string, RefData, less<string> > refDataMap;
68 cerr << "Reference list from file 1" << endl;
69 unsigned int refCounter = 0;
70 for (RefVector::const_iterator refIter = referenceSequences1.begin(); efIter != referenceSequences1.end(); refIter++) {
73 RefData rd = *refIter;
76 string refName = rd.RefName;
77 unsigned int refLength = rd.RefLength;
78 int refId = reader1.GetRefID(refName);
81 cerr << " refName=" << refName << " refId=" << refId << endl;
84 refLengthMap[refName] = refLength;
85 refIdMap[refName] = refCounter;
86 refDataMap[refName] = rd;
88 // add this to merged refVector
89 referenceSequences12.push_back(rd);
91 // increment ref count
95 // map file 2 refId to merged refID
96 map<unsigned int, unsigned int, less<unsigned int> > mapRefId12;
98 cerr << "Reference list from file 2" << endl;
99 for (RefVector::const_iterator refIter = referenceSequences2.begin(); refIter != referenceSequences2.end(); refIter++) {
102 RefData rd = *refIter;
105 string refName = rd.RefName;
106 unsigned int refLength = rd.RefLength;
107 int refId = reader2.GetRefID(refName);
110 cerr << " refName=" << refName << " refId=" << refId << endl;
112 // if refName already in map, check ref length
113 if (refLengthMap.count(refName) > 0) {
115 // check if length is the same
116 if (refLengthMap[refName] != refLength) {
117 cerr << "Refernce name in two files with inconsistent lengths: " << refName << " ... exiting." << endl;
121 // make ref id recoding entry
122 unsigned int refIdNew = refIdMap[refName];
123 mapRefId12[refId] = refIdNew;
126 // otherwise make new refId and RefData
128 refLengthMap[refName] = refLength;
129 refIdMap[refName] = refCounter;
130 refDataMap[refName] = rd;
132 // make ref id recoding entry
133 mapRefId12[refId] = refCounter;
135 // add this to merged refVector
136 referenceSequences12.push_back(rd);
138 // increment ref count
143 cerr << "Length of referenceSequences vector=" << referenceSequences12.size() << endl;
145 // open output bam file and write:
147 string samHeader3 = "";
150 writer.Open(outputFilename, samHeader3, referenceSequences12);
152 // iterate through alignments from file 1 and write them unchanged
156 unsigned int ac1 = 0;
157 while(reader1.GetNextAlignment(ba) && keepgoing ) {
158 writer.SaveAlignment(ba);
160 keepgoing=(Nmax<1)||(ac1<Nmax);
163 cerr << "Write " << ac1 << " " << inputFilename1 << " records to " << outputFilename << endl;
165 // close input file 1
168 // iterate through alignments from file 2 and:
169 // assign re-coded refId
172 unsigned int ac2 = 0;
173 while(reader2.GetNextAlignment(ba)&&keepgoing ) {
175 keepgoing=(Nmax<1)||(ac2<Nmax);
177 // retrieve original refId
178 unsigned int refId = ba.RefID;
180 // check if original refId is in recoding table... bomb if not
181 if (mapRefId12.count(refId) <= 0) {
182 cerr << "Alignment in file 2 has inconsistent reference sequence ID... exiting." << endl;
187 ba.RefID = mapRefId12[refId];
189 // cerr << " recoding refId=" << refId << " to refId=" << recodedRefId[refId] << endl;
191 writer.SaveAlignment(ba);
194 cerr << "Write " << ac2 << " " << inputFilename2 << " records to " << outputFilename << endl;
196 // close input file 2
202 cerr << "Total " << ac1+ac2 << " records to " << outputFilename << endl;