]> git.donarmstrong.com Git - mothur.git/blob - oligos.cpp
update .gitignore
[mothur.git] / oligos.cpp
1 //
2 //  oligos.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 4/4/14.
6 //  Copyright (c) 2014 Schloss Lab. All rights reserved.
7 //
8
9 #include "oligos.h"
10
11 /**************************************************************************************************/
12
13 Oligos::Oligos(string o){
14         try {
15                 m = MothurOut::getInstance();
16         hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true;
17         indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0;
18                 oligosfile = o;
19         readOligos();
20                 if (pairedOligos) {
21             numBarcodes = pairedBarcodes.size();
22             numFPrimers = pairedPrimers.size();
23         }else {
24             numBarcodes = barcodes.size();
25             numFPrimers = primers.size();
26         }
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "Oligos", "Oligos");
30                 exit(1);
31         }
32 }
33 /**************************************************************************************************/
34
35 Oligos::Oligos(){
36         try {
37                 m = MothurOut::getInstance();
38         hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true;
39         indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0;
40         numFPrimers = 0; numBarcodes = 0;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "Oligos", "Oligos");
44                 exit(1);
45         }
46 }
47 /**************************************************************************************************/
48 int Oligos::read(string o){
49         try {
50                 oligosfile = o;
51         readOligos();
52         if (pairedOligos) {
53             numBarcodes = pairedBarcodes.size();
54             numFPrimers = pairedPrimers.size();
55         }else {
56             numBarcodes = barcodes.size();
57             numFPrimers = primers.size();
58         }
59         return 0;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "Oligos", "read");
63                 exit(1);
64         }
65 }
66 /**************************************************************************************************/
67 int Oligos::read(string o, bool reverse){
68         try {
69                 oligosfile = o;
70         reversePairs = reverse;
71         readOligos();
72         if (pairedOligos) {
73             numBarcodes = pairedBarcodes.size();
74             numFPrimers = pairedPrimers.size();
75         }else {
76             numBarcodes = barcodes.size();
77             numFPrimers = primers.size();
78         }
79         return 0;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "Oligos", "read");
83                 exit(1);
84         }
85 }
86 //***************************************************************************************************************
87
88 int Oligos::readOligos(){
89         try {
90                 ifstream inOligos;
91                 m->openInputFile(oligosfile, inOligos);
92                 
93                 string type, oligo, roligo, group;
94                 
95                 while(!inOligos.eof()){
96             
97                         inOligos >> type;
98             
99                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
100             
101                         if(type[0] == '#'){
102                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
103                                 m->gobble(inOligos);
104                         }
105                         else{
106                                 m->gobble(inOligos);
107                                 //make type case insensitive
108                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
109                                 
110                                 inOligos >> oligo;
111                 
112                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
113                                 
114                                 for(int i=0;i<oligo.length();i++){
115                                         oligo[i] = toupper(oligo[i]);
116                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
117                                 }
118                                 
119                                 if(type == "FORWARD"){
120                                         group = "";
121                                         
122                                         // get rest of line in case there is a primer name
123                                         while (!inOligos.eof()) {
124                                                 char c = inOligos.get();
125                                                 if (c == 10 || c == 13 || c == -1){     break;  }
126                                                 else if (c == 32 || c == 9){;} //space or tab
127                                                 else {  group += c;  }
128                                         }
129                                         
130                                         //check for repeat barcodes
131                                         map<string, int>::iterator itPrime = primers.find(oligo);
132                                         if (itPrime != primers.end()) { m->mothurOut("[WARNING]: primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
133                                         
134                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
135                     
136                                         primers[oligo]=indexPrimer; indexPrimer++;
137                                         primerNameVector.push_back(group);
138                                 }
139                 else if (type == "PRIMER"){
140                     m->gobble(inOligos);
141                                         
142                     inOligos >> roligo;
143                     
144                     for(int i=0;i<roligo.length();i++){
145                         roligo[i] = toupper(roligo[i]);
146                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
147                     }
148                     if (reversePairs) {  roligo = reverseOligo(roligo); }
149                     group = "";
150                     
151                                         // get rest of line in case there is a primer name
152                                         while (!inOligos.eof()) {
153                                                 char c = inOligos.get();
154                                                 if (c == 10 || c == 13 || c == -1){     break;  }
155                                                 else if (c == 32 || c == 9){;} //space or tab
156                                                 else {  group += c;  }
157                                         }
158                     
159                     oligosPair newPrimer(oligo, roligo);
160                     
161                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
162                                         
163                                         //check for repeat barcodes
164                     string tempPair = oligo+roligo;
165                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
166                     else { uniquePrimers.insert(tempPair); }
167                                         
168                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
169                     
170                                         pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
171                                         primerNameVector.push_back(group);
172                     hasPPrimers = true;
173                 }
174                                 else if(type == "REVERSE"){
175                     string oligoRC = reverseOligo(oligo);
176                                         revPrimer.push_back(oligoRC);
177                                 }
178                                 else if(type == "BARCODE"){
179                                         inOligos >> group;
180                     
181                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
182                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
183                     
184                     string temp = "";
185                     while (!inOligos.eof())     {
186                                                 char c = inOligos.get();
187                                                 if (c == 10 || c == 13 || c == -1){     break;  }
188                                                 else if (c == 32 || c == 9){;} //space or tab
189                                                 else {  temp += c;  }
190                                         }
191                                         
192                     //then this is illumina data with 4 columns
193                     if (temp != "") {
194                         hasPBarcodes = true;
195                         string reverseBarcode = group; //reverseOligo(group); //reverse barcode
196                         group = temp;
197                         
198                         for(int i=0;i<reverseBarcode.length();i++){
199                             reverseBarcode[i] = toupper(reverseBarcode[i]);
200                             if(reverseBarcode[i] == 'U')        {       reverseBarcode[i] = 'T';        }
201                         }
202                         
203                         if (reversePairs) {   reverseBarcode = reverseOligo(reverseBarcode); }
204                         oligosPair newPair(oligo, reverseBarcode);
205                         
206                         if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
207                         
208                         //check for repeat barcodes
209                         string tempPair = oligo+reverseBarcode;
210                         if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
211                         else { uniqueBarcodes.insert(tempPair); }
212                         
213                         pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
214                         barcodeNameVector.push_back(group);
215                     }else {
216                         //check for repeat barcodes
217                         map<string, int>::iterator itBar = barcodes.find(oligo);
218                         if (itBar != barcodes.end()) { m->mothurOut("[WARNING]: barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
219                         
220                         barcodes[oligo]=indexBarcode; indexBarcode++;
221                         barcodeNameVector.push_back(group);
222                     }
223                                 }else if(type == "LINKER"){
224                                         linker.push_back(oligo);
225                                 }else if(type == "SPACER"){
226                                         spacer.push_back(oligo);
227                                 }
228                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
229                         }
230                         m->gobble(inOligos);
231                 }
232                 inOligos.close();
233                 
234         if (hasPBarcodes || hasPPrimers) {
235             pairedOligos = true;
236             if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
237         }
238         
239         
240                 //add in potential combos
241                 if(barcodeNameVector.size() == 0){
242             if (pairedOligos) {
243                 oligosPair newPair("", "");
244                 pairedBarcodes[0] = newPair;
245             }else {
246                 barcodes[""] = 0;
247             }
248                         barcodeNameVector.push_back("");
249                 }
250                 
251                 if(primerNameVector.size() == 0){
252             if (pairedOligos) {
253                 oligosPair newPair("", "");
254                 pairedPrimers[0] = newPair;
255             }else {
256                 primers[""] = 0;
257             }
258                         primerNameVector.push_back("");
259                 }
260                 
261                 
262         if (pairedOligos) {
263             for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
264                 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
265                     
266                     string primerName = primerNameVector[itPrimer->first];
267                     string barcodeName = barcodeNameVector[itBar->first];
268                     
269                     if (m->debug) {  m->mothurOut("[DEBUG]: primerName = " + primerName + " barcodeName = " + barcodeName + "\n");  }
270                     
271                     if ((primerName == "ignore") || (barcodeName == "ignore")) { if (m->debug) {  m->mothurOut("[DEBUG]: in ignore. \n");  }  } //do nothing
272                     else if ((primerName == "") && (barcodeName == "")) { if (m->debug) {  m->mothurOut("[DEBUG]: in blank. \n");  }  } //do nothing
273                     else {
274                         string comboGroupName = "";
275                         string fastqFileName = "";
276                         
277                         if(primerName == ""){
278                             comboGroupName = barcodeNameVector[itBar->first];
279                         }
280                         else{
281                             if(barcodeName == ""){
282                                 comboGroupName = primerNameVector[itPrimer->first];
283                             }
284                             else{
285                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
286                             }
287                         }
288                         
289                         if (m->debug) {  m->mothurOut("[DEBUG]: comboGroupName = " + comboGroupName +  "\n");  }
290                         
291                         uniqueNames.insert(comboGroupName);
292                         
293                         map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
294                         if (itGroup2Barcode == Group2Barcode.end()) {
295                             vector<string> tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse);
296                             Group2Barcode[comboGroupName] = tempBarcodes;
297                         }else {
298                             Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse);
299                         }
300                         
301                         itGroup2Barcode = Group2Primer.find(comboGroupName);
302                         if (itGroup2Barcode == Group2Primer.end()) {
303                             vector<string> tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
304                             Group2Primer[comboGroupName] = tempPrimers;
305                         }else {
306                             Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
307                         }
308                     }
309                 }
310             }
311         }else {
312             for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
313                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
314                     
315                     string primerName = primerNameVector[itPrimer->second];
316                     string barcodeName = barcodeNameVector[itBar->second];
317                     
318                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
319                     else if ((primerName == "") && (barcodeName == "")) { } //do nothing
320                     else {
321                         string comboGroupName = "";
322                         string fastqFileName = "";
323                         
324                         if(primerName == ""){
325                             comboGroupName = barcodeNameVector[itBar->second];
326                         }
327                         else{
328                             if(barcodeName == ""){
329                                 comboGroupName = primerNameVector[itPrimer->second];
330                             }
331                             else{
332                                 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
333                             }
334                         }
335                         uniqueNames.insert(comboGroupName);
336                         
337                         map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
338                         if (itGroup2Barcode == Group2Barcode.end()) {
339                             vector<string> tempBarcodes; tempBarcodes.push_back(itBar->first);
340                             Group2Barcode[comboGroupName] = tempBarcodes;
341                         }else {
342                             Group2Barcode[comboGroupName].push_back(itBar->first);
343                         }
344                         
345                         itGroup2Barcode = Group2Primer.find(comboGroupName);
346                         if (itGroup2Barcode == Group2Primer.end()) {
347                             vector<string> tempPrimers; tempPrimers.push_back(itPrimer->first);
348                             Group2Primer[comboGroupName] = tempPrimers;
349                         }else {
350                             Group2Primer[comboGroupName].push_back(itPrimer->first);
351                         }
352                     }
353                 }
354             }
355         }
356         
357         
358         if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
359                 
360         
361         Groups.clear();
362         for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {  Groups.push_back(*it); }
363         
364         return 0;
365         }
366         catch(exception& e) {
367                 m->errorOut(e, "Oligos", "readOligos");
368                 exit(1);
369         }
370 }
371 //********************************************************************/
372 vector<string> Oligos::getBarcodes(string groupName){
373         try {
374         vector<string> thisGroupsBarcodes;
375         
376         map<string, vector<string> >::iterator it = Group2Barcode.find(groupName);
377         
378         if (it == Group2Barcode.end()) {  m->mothurOut("[ERROR]: no barcodes found for group " + groupName + ".\n"); m->control_pressed=true;
379         }else { thisGroupsBarcodes = it->second; }
380         
381         return thisGroupsBarcodes;
382     }
383         catch(exception& e) {
384                 m->errorOut(e, "Oligos", "getBarcodes");
385                 exit(1);
386         }
387 }
388 //********************************************************************/
389 vector<string> Oligos::getPrimers(string groupName){
390         try {
391         vector<string> thisGroupsPrimers;
392         
393         map<string, vector<string> >::iterator it = Group2Primer.find(groupName);
394         
395         if (it == Group2Primer.end()) {  m->mothurOut("[ERROR]: no primers found for group " + groupName + ".\n"); m->control_pressed=true;
396         }else { thisGroupsPrimers = it->second; }
397         
398         return thisGroupsPrimers;
399     }
400         catch(exception& e) {
401                 m->errorOut(e, "Oligos", "getPrimers");
402                 exit(1);
403         }
404 }
405 //********************************************************************/
406 //can't have paired and unpaired so this function will either run the paired map or the unpaired
407 map<int, oligosPair> Oligos::getReorientedPairedPrimers(){
408         try {
409         map<int, oligosPair> rpairedPrimers;
410         
411         for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
412             string forward = (it->second).reverse;
413             if (reversePairs) { forward = reverseOligo(forward); }
414             string reverse = (it->second).forward;
415             if (reversePairs) { reverse = reverseOligo(reverse); }
416             oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer
417             rpairedPrimers[it->first] = tempPair;
418         }
419         
420         
421         for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
422             oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
423             rpairedPrimers[it->second] = tempPair;
424         }
425         
426         return rpairedPrimers;
427     }
428         catch(exception& e) {
429                 m->errorOut(e, "Oligos", "getReorientedPairedPrimers");
430                 exit(1);
431         }
432 }
433 //********************************************************************/
434 //can't have paired and unpaired so this function will either run the paired map or the unpaired
435 map<int, oligosPair> Oligos::getReorientedPairedBarcodes(){
436         try {
437         map<int, oligosPair> rpairedBarcodes;
438         
439         for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
440             string forward = (it->second).reverse;
441             if (reversePairs) { forward = reverseOligo(forward); }
442             string reverse = (it->second).forward;
443             if (reversePairs) { reverse = reverseOligo(reverse); }
444             oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer
445             rpairedBarcodes[it->first] = tempPair;
446         }
447         
448         for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
449             oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
450             rpairedBarcodes[it->second] = tempPair;
451         }
452
453         return rpairedBarcodes;
454     }
455         catch(exception& e) {
456                 m->errorOut(e, "Oligos", "getReorientedPairedBarcodes");
457                 exit(1);
458         }
459 }
460
461 //********************************************************************/
462 string Oligos::reverseOligo(string oligo){
463         try {
464         string reverse = "";
465         
466         for(int i=oligo.length()-1;i>=0;i--){
467             
468             if(oligo[i] == 'A')         {       reverse += 'T'; }
469             else if(oligo[i] == 'T'){   reverse += 'A'; }
470             else if(oligo[i] == 'U'){   reverse += 'A'; }
471             
472             else if(oligo[i] == 'G'){   reverse += 'C'; }
473             else if(oligo[i] == 'C'){   reverse += 'G'; }
474             
475             else if(oligo[i] == 'R'){   reverse += 'Y'; }
476             else if(oligo[i] == 'Y'){   reverse += 'R'; }
477             
478             else if(oligo[i] == 'M'){   reverse += 'K'; }
479             else if(oligo[i] == 'K'){   reverse += 'M'; }
480             
481             else if(oligo[i] == 'W'){   reverse += 'W'; }
482             else if(oligo[i] == 'S'){   reverse += 'S'; }
483             
484             else if(oligo[i] == 'B'){   reverse += 'V'; }
485             else if(oligo[i] == 'V'){   reverse += 'B'; }
486             
487             else if(oligo[i] == 'D'){   reverse += 'H'; }
488             else if(oligo[i] == 'H'){   reverse += 'D'; }
489             
490             else                                                {       reverse += 'N'; }
491         }
492         
493         
494         return reverse;
495     }
496         catch(exception& e) {
497                 m->errorOut(e, "Oligos", "reverseOligo");
498                 exit(1);
499         }
500 }
501 //********************************************************************/
502 string Oligos::getBarcodeName(int index){
503         try {
504         string name = "";
505         
506         if ((index >= 0) && (index < barcodeNameVector.size())) { name = barcodeNameVector[index]; }
507             
508         return name;
509     }
510         catch(exception& e) {
511                 m->errorOut(e, "Oligos", "getBarcodeName");
512                 exit(1);
513         }
514 }
515 //********************************************************************/
516 string Oligos::getPrimerName(int index){
517     try {
518         string name = "";
519         
520         if ((index >= 0) && (index < primerNameVector.size())) { name = primerNameVector[index]; }
521         
522         return name;
523     }
524     catch(exception& e) {
525         m->errorOut(e, "Oligos", "getPrimerName");
526         exit(1);
527     }
528 }
529 //********************************************************************/
530 string Oligos::getGroupName(int barcodeIndex, int primerIndex){
531     try {
532         
533         string thisGroup = "";
534             if(numBarcodes != 0){
535                 thisGroup = getBarcodeName(barcodeIndex);
536                 if (numFPrimers != 0) {
537                     if (getPrimerName(primerIndex) != "") {
538                         if(thisGroup != "") {
539                             thisGroup += "." + getPrimerName(primerIndex);
540                         }else {
541                             thisGroup = getPrimerName(primerIndex);
542                         }
543                     } 
544                 }
545             }
546         
547         return thisGroup;
548     }
549     catch(exception& e) {
550         m->errorOut(e, "Oligos", "getGroupName");
551         exit(1);
552     }
553 }
554
555 /**************************************************************************************************/
556