]> git.donarmstrong.com Git - mothur.git/blob - consensusseqscommand.cpp
removed read.dist, read.otu, read.tree and globaldata. added current to defaults...
[mothur.git] / consensusseqscommand.cpp
1 /*
2  *  consensusseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/23/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "consensusseqscommand.h"
11 #include "sequence.hpp"
12 #include "inputdata.h"
13
14 //**********************************************************************************************************************
15 vector<string> ConsensusSeqsCommand::setParameters(){   
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plist);
20                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "ConsensusSeqsCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string ConsensusSeqsCommand::getHelpString(){   
35         try {
36                 string helpString = "";
37                 helpString += "The consensus.seqs command can be used in 2 ways: create a consensus sequence from a fastafile, or with a listfile create a consensus sequence for each otu. Sequences must be aligned.\n";
38                 helpString += "The consensus.seqs command parameters are fasta, list, name and label.\n";
39                 helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n";
40                 helpString += "The list parameter allows you to enter a your list file. \n";
41                 helpString += "The name parameter allows you to enter a names file associated with the fasta file. \n";
42                 helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n";
43                 helpString += "The consensus.seqs command should be in the following format: \n";
44                 helpString += "consensus.seqs(fasta=yourFastaFile, list=yourListFile) \n";      
45                 helpString += "Example: consensus.seqs(fasta=abrecovery.align, list=abrecovery.fn.list) \n";
46                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n";     
47                 return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "ConsensusSeqsCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54
55 //**********************************************************************************************************************
56 ConsensusSeqsCommand::ConsensusSeqsCommand(){   
57         try {
58                 abort = true; calledHelp = true; 
59                 setParameters();
60                 vector<string> tempOutNames;
61                 outputTypes["fasta"] = tempOutNames;
62                 outputTypes["name"] = tempOutNames;
63                 outputTypes["summary"] = tempOutNames;
64         }
65         catch(exception& e) {
66                 m->errorOut(e, "ConsensusSeqsCommand", "ConsensusSeqsCommand");
67                 exit(1);
68         }
69 }
70 //***************************************************************************************************************
71 ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
72         try {
73                 abort = false; calledHelp = false;   
74                 allLines = 1;
75                 
76                 //allow user to run help
77                 if(option == "help") { help(); abort = true; calledHelp = true; }
78                 
79                 else {
80                         
81                         vector<string> myArray = setParameters();
82                         
83                         OptionParser parser(option);
84                         map<string,string> parameters = parser.getParameters();
85                         
86                         ValidParameters validParameter;
87                         map<string,string>::iterator it;
88                         
89                         //check to make sure all parameters are valid for command
90                         for (it = parameters.begin(); it != parameters.end(); it++) { 
91                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
92                         }
93                         
94                         //initialize outputTypes
95                         vector<string> tempOutNames;
96                         outputTypes["fasta"] = tempOutNames;
97                         outputTypes["name"] = tempOutNames;
98                         outputTypes["summary"] = tempOutNames;
99                         
100                                                 
101                         //if the user changes the input directory command factory will send this info to us in the output parameter 
102                         string inputDir = validParameter.validFile(parameters, "inputdir", false);      
103                         if (inputDir == "not found"){   inputDir = "";          }
104                         else {
105                                 string path;
106                                 it = parameters.find("fasta");
107                                 //user has given a template file
108                                 if(it != parameters.end()){ 
109                                         path = m->hasPath(it->second);
110                                         //if the user has not given a path then, add inputdir. else leave path alone.
111                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
112                                 }
113                                 
114                                 it = parameters.find("name");
115                                 //user has given a template file
116                                 if(it != parameters.end()){ 
117                                         path = m->hasPath(it->second);
118                                         //if the user has not given a path then, add inputdir. else leave path alone.
119                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
120                                 }
121                                 
122                                 it = parameters.find("list");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
128                                 }
129                         }
130                         
131                         
132                         //check for parameters
133                         fastafile = validParameter.validFile(parameters, "fasta", true);
134                         if (fastafile == "not open") { abort = true; }
135                         else if (fastafile == "not found") {                    
136                                 fastafile = m->getFastaFile(); 
137                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
138                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
139                         }       
140                         
141                         namefile = validParameter.validFile(parameters, "name", true);
142                         if (namefile == "not open") { abort = true; }
143                         else if (namefile == "not found") { namefile = ""; }    
144                         
145                         listfile = validParameter.validFile(parameters, "list", true);
146                         if (listfile == "not open") { abort = true; }
147                         else if (listfile == "not found") { listfile = "";  }   
148                         
149                         label = validParameter.validFile(parameters, "label", false);                   
150                         if (label == "not found") { label = ""; }
151                         else { 
152                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
153                                 else { allLines = 1;  }
154                         }
155                         
156                         //if the user changes the output directory command factory will send this info to us in the output parameter 
157                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
158
159                 }
160         }
161         catch(exception& e) {
162                 m->errorOut(e, "ConsensusSeqsCommand", "ConsensusSeqsCommand");
163                 exit(1);
164         }
165 }
166 //***************************************************************************************************************
167
168 int ConsensusSeqsCommand::execute(){
169         try{
170                 
171                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
172                 
173                 readFasta();
174                 
175                 if (m->control_pressed) { return 0; }
176                 
177                 if (namefile != "") { readNames(); }
178                 
179                 if (m->control_pressed) { return 0; }
180                 
181                                 
182                 if (listfile == "") {
183                         
184                         ofstream outSummary;
185                         string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "cons.summary";
186                         m->openOutputFile(outputSummaryFile, outSummary);
187                         outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
188                         outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
189                         
190                         outSummary << "PositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
191                         
192                         ofstream outFasta;
193                         string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "cons.fasta";
194                         m->openOutputFile(outputFastaFile, outFasta);
195                         outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
196                         
197                         vector<string> seqs;
198                         int seqLength = 0;
199                         for (map<string, string>::iterator it = nameMap.begin(); it != nameMap.end(); it++) {
200                                 
201                                 if (m->control_pressed) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
202                                 
203                                 string seq = fastaMap[it->second];
204                                 seqs.push_back(seq);
205                                 
206                                 if (seqLength == 0) { seqLength = seq.length(); }
207                                 else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
208
209                         }
210                         
211                         vector< vector<float> > percentages; percentages.resize(5);
212                         for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
213                         
214                         string consSeq = "";
215                         //get counts
216                         for (int j = 0; j < seqLength; j++) {
217                                 
218                                 if (m->control_pressed) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
219                                 
220                                 vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
221                                 int numDots = 0;
222                                 
223                                 for (int i = 0; i < seqs.size(); i++) {
224                                         
225                                         if (seqs[i][j] == '.') { numDots++; }
226                                         
227                                         char base = toupper(seqs[i][j]);
228                                         if (base == 'A') { counts[0]++; }
229                                         else if (base == 'T') { counts[1]++; }
230                                         else if (base == 'G') { counts[2]++; }
231                                         else if (base == 'C') { counts[3]++; }
232                                         else { counts[4]++; }
233                                 }
234                                 
235                                 char conBase = '.';
236                                 if (numDots != seqs.size()) { conBase = getBase(counts); }
237                                 
238                                 consSeq += conBase;
239                                 
240                                 percentages[0][j] = counts[0] / (float) seqs.size();
241                                 percentages[1][j] = counts[1] / (float) seqs.size();
242                                 percentages[2][j] = counts[2] / (float) seqs.size();
243                                 percentages[3][j] = counts[3] / (float) seqs.size();
244                                 percentages[4][j] = counts[4] / (float) seqs.size();
245                                 
246                         }
247                         
248                         for (int j = 0; j < seqLength; j++) { 
249                                 outSummary << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
250                         }
251                         
252                                 
253                         outFasta << ">conseq" << endl << consSeq << endl;
254                         
255                         outSummary.close(); outFasta.close();
256                         
257                 
258                 }else {
259                         
260                                                 
261                         InputData* input = new InputData(listfile, "list");
262                         ListVector* list = input->getListVector();
263                         
264                         string lastLabel = list->getLabel();
265                         set<string> processedLabels;
266                         set<string> userLabels = labels;
267
268                         //as long as you are not at the end of the file or done wih the lines you want
269                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
270                                 
271                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } delete list; delete input;  return 0;  }
272                                 
273                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
274                                         
275                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
276                                         
277                                         processList(list);
278                                         
279                                         processedLabels.insert(list->getLabel());
280                                         userLabels.erase(list->getLabel());
281                                 }
282                                 
283                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
284                                         string saveLabel = list->getLabel();
285                                         
286                                         delete list; 
287                                         
288                                         list = input->getListVector(lastLabel);
289                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
290                                         
291                                         processList(list);
292                                         
293                                         processedLabels.insert(list->getLabel());
294                                         userLabels.erase(list->getLabel());
295                                         
296                                         //restore real lastlabel to save below
297                                         list->setLabel(saveLabel);
298                                 }
299                                 
300                                 lastLabel = list->getLabel();
301                                 
302                                 delete list; list = NULL;
303                                 
304                                 //get next line to process
305                                 list = input->getListVector();                          
306                         }
307                         
308                         
309                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } if (list != NULL) { delete list; } delete input; return 0;  }
310                         
311                         //output error messages about any remaining user labels
312                         set<string>::iterator it;
313                         bool needToRun = false;
314                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
315                                 m->mothurOut("Your file does not include the label " + *it); 
316                                 if (processedLabels.count(lastLabel) != 1) {
317                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
318                                         needToRun = true;
319                                 }else {
320                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
321                                 }
322                         }
323                         
324                         //run last label if you need to
325                         if (needToRun == true)  {
326                                 if (list != NULL) { delete list; }
327                                 
328                                 list = input->getListVector(lastLabel);
329                                 
330                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
331                                 
332                                 processList(list);
333                                 
334                                 delete list; list = NULL;
335                         }
336                         
337                         if (list != NULL) { delete list; }
338                         delete input;
339                 }
340                 
341                 m->mothurOutEndLine();
342                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
343                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
344                 m->mothurOutEndLine();
345                 
346                 
347                 return 0;
348                 
349         }
350         catch(exception& e) {
351                 m->errorOut(e, "ConsensusSeqsCommand", "execute");
352                 exit(1);
353         }
354 }
355 //***************************************************************************************************************
356
357 int ConsensusSeqsCommand::processList(ListVector*& list){
358         try{
359                 
360                 ofstream outSummary;
361                 string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.summary";
362                 m->openOutputFile(outputSummaryFile, outSummary);
363                 outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
364                 outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
365                 
366                 ofstream outName;
367                 string outputNameFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.names";
368                 m->openOutputFile(outputNameFile, outName);
369                 outputNames.push_back(outputNameFile); outputTypes["name"].push_back(outputNameFile);
370                 
371                 ofstream outFasta;
372                 string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.fasta";
373                 m->openOutputFile(outputFastaFile, outFasta);
374                 outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
375                 
376                 outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
377                 
378                 for (int i = 0; i < list->getNumBins(); i++) {
379                         
380                         if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
381                         
382                         string bin = list->get(i);
383                         
384                         string newName = "";
385                         string consSeq = getConsSeq(bin, outSummary, newName, i);
386                         
387                         outFasta << ">seq" << (i+1) << endl << consSeq << endl;
388                         outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << newName << endl;
389                 }
390                 
391                 outSummary.close(); outName.close(); outFasta.close();
392                 
393                 return 0;
394                 
395         }
396         catch(exception& e) {
397                 m->errorOut(e, "ConsensusSeqsCommand", "processList");
398                 exit(1);
399         }
400 }
401
402 //***************************************************************************************************************
403 //made this smart enough to owrk with unique or non unique list file
404 string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, string& name, int binNumber){
405         try{
406                 
407                 string consSeq = "";
408                 bool error = false;
409                 
410                 //the whole bin is the second column if no names file, otherwise build it
411                 name = bin;
412                 if (namefile != "") { name = ""; }
413                 
414                 vector<string> binNames;
415                 m->splitAtComma(bin, binNames);
416                 
417                 //get sequence strings for each name in the bin
418                 vector<string> seqs;
419                 
420                 set<string> addedAlready;
421                 int seqLength = 0;
422                 for (int i = 0; i < binNames.size(); i++) {
423                         
424                         map<string, string>::iterator it;
425                         
426                         it = nameMap.find(binNames[i]);
427                         if (it == nameMap.end()) { 
428                                 if (namefile == "") { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); error = true; }
429                                 else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta or name file, please correct."); m->mothurOutEndLine(); error = true; }
430                                 break;
431                         }else {
432                                 
433                                 //add sequence string to seqs vector to process below
434                                 string seq = fastaMap[it->second];
435                                 seqs.push_back(seq);
436                                 
437                                 if (seqLength == 0) { seqLength = seq.length(); }
438                                 else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); error = true; break; }
439                                 
440                                 if (namefile != "") { 
441                                         //did we add this line from name file already?
442                                         if (addedAlready.count(it->second) == 0) {
443                                                 name += "," + nameFileMap[it->second];
444                                                 addedAlready.insert(it->second);
445                                         }
446                                 }
447                                 
448                         }
449                 }
450                 
451                 if (error) { m->control_pressed = true; return consSeq; }
452                 
453                 if (namefile != "") { name = name.substr(1); }
454                 
455                 vector< vector<float> > percentages; percentages.resize(5);
456                 for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
457                 
458                 //get counts
459                 for (int j = 0; j < seqLength; j++) {
460                         
461                         if (m->control_pressed) { return consSeq; }
462                         
463                         vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
464                         int numDots = 0;
465                         
466                         for (int i = 0; i < seqs.size(); i++) {
467                                 
468                                 if (seqs[i][j] == '.') { numDots++; }
469                                 
470                                 char base = toupper(seqs[i][j]);
471                                 if (base == 'A') { counts[0]++; }
472                                 else if (base == 'T') { counts[1]++; }
473                                 else if (base == 'G') { counts[2]++; }
474                                 else if (base == 'C') { counts[3]++; }
475                                 else { counts[4]++; }
476                         }
477                         
478                         char conBase = '.';
479                         if (numDots != seqs.size()) { conBase = getBase(counts); }
480                         
481                         consSeq += conBase;
482                         
483                         percentages[0][j] = counts[0] / (float) seqs.size();
484                         percentages[1][j] = counts[1] / (float) seqs.size();
485                         percentages[2][j] = counts[2] / (float) seqs.size();
486                         percentages[3][j] = counts[3] / (float) seqs.size();
487                         percentages[4][j] = counts[4] / (float) seqs.size();
488                         
489                 }
490                 
491                 for (int j = 0; j < seqLength; j++) { 
492                         outSummary << (binNumber + 1) << '\t' << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
493                 }
494                 
495                 return consSeq;
496                 
497         }
498         catch(exception& e) {
499                 m->errorOut(e, "ConsensusSeqsCommand", "getConsSeq");
500                 exit(1);
501         }
502 }
503 //***************************************************************************************************************
504
505 char ConsensusSeqsCommand::getBase(vector<int> counts){  //A,T,G,C,Gap
506         try{
507                 /* A = adenine
508                 * C = cytosine
509                 * G = guanine
510                 * T = thymine
511                 * R = G A (purine)
512                 * Y = T C (pyrimidine)
513                 * K = G T (keto)
514                 * M = A C (amino)
515                 * S = G C (strong bonds)
516                 * W = A T (weak bonds)
517                 * B = G T C (all but A)
518                 * D = G A T (all but C)
519                 * H = A C T (all but G)
520                 * V = G C A (all but T)
521                 * N = A G C T (any) */
522                 
523                 char conBase = 'N';
524                 
525                 //any
526                 if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'n'; }
527                 //any no gap
528                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'N'; }
529                 //all but T
530                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'v'; }  
531                 //all but T no gap
532                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'V'; }  
533                 //all but G
534                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'h'; }  
535                 //all but G no gap
536                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'H'; }  
537                 //all but C
538                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'd'; }  
539                 //all but C no gap
540                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'D'; }  
541                 //all but A
542                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'b'; }  
543                 //all but A no gap
544                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'B'; }  
545                 //W = A T (weak bonds)
546                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'w'; }  
547                 //W = A T (weak bonds) no gap
548                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'W'; }  
549                 //S = G C (strong bonds)
550                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 's'; }  
551                 //S = G C (strong bonds) no gap
552                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'S'; }  
553                 //M = A C (amino)
554                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'm'; }  
555                 //M = A C (amino) no gap
556                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'M'; }  
557                 //K = G T (keto)
558                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'k'; }  
559                 //K = G T (keto) no gap
560                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'K'; }  
561                 //Y = T C (pyrimidine)
562                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'y'; }  
563                 //Y = T C (pyrimidine) no gap
564                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'Y'; }  
565                 //R = G A (purine)
566                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'r'; }  
567                 //R = G A (purine) no gap
568                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'R'; }  
569                 //only A
570                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'a'; }  
571                 //only A no gap
572                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'A'; }  
573                 //only T
574                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 't'; }  
575                 //only T no gap
576                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'T'; }  
577                 //only G
578                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'g'; }  
579                 //only G no gap
580                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'G'; }  
581                 //only C
582                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'c'; }  
583                 //only C no gap
584                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'C'; }  
585                 //only gap
586                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = '-'; }
587                 else{ m->mothurOut("[ERROR]: cannot find consensus base."); m->mothurOutEndLine(); }
588                 
589                 return conBase;
590                 
591         }
592         catch(exception& e) {
593                 m->errorOut(e, "ConsensusSeqsCommand", "getBase");
594                 exit(1);
595         }
596 }
597
598 //***************************************************************************************************************
599
600 int ConsensusSeqsCommand::readFasta(){
601         try{
602                 
603                 ifstream in;
604                 m->openInputFile(fastafile, in);
605                 
606                 while (!in.eof()) {
607                         
608                         if (m->control_pressed) { break; }
609                         
610                         Sequence seq(in); m->gobble(in);
611                         string name = seq.getName();
612                         
613                         if (name != "") {
614                                 fastaMap[name] = seq.getAligned();
615                                 nameMap[name] = name; //set nameMap incase no names file
616                                 nameFileMap[name] = name;
617                         }
618                 }
619                 
620                 in.close();
621                 
622                 return 0;
623                 
624         }
625         catch(exception& e) {
626                 m->errorOut(e, "ConsensusSeqsCommand", "readFasta");
627                 exit(1);
628         }
629 }
630 //***************************************************************************************************************
631
632 int ConsensusSeqsCommand::readNames(){
633          try{
634                  
635                  ifstream in;
636                  m->openInputFile(namefile, in);
637                  
638                  string thisname, repnames;
639                  map<string, string>::iterator it;
640                  
641                  bool error = false;
642                  
643                  while(!in.eof()){
644                          
645                          if (m->control_pressed) { break; }
646                          
647                          in >> thisname;                m->gobble(in);          //read from first column
648                          in >> repnames;                        //read from second column
649                          
650                          it = nameMap.find(thisname);
651                          if (it != nameMap.end()) { //then this sequence was in the fastafile
652                                  
653                                  vector<string> splitRepNames;
654                                  m->splitAtComma(repnames, splitRepNames);
655                                  
656                                  nameFileMap[thisname] = repnames;      //for later when outputting the new namesFile if the list file is unique
657                                  for (int i = 0; i < splitRepNames.size(); i++) { nameMap[splitRepNames[i]] = thisname; }
658                                  
659                          }else{ m->mothurOut("[ERROR]: " + thisname + " is not in the fasta file, please correct."); m->mothurOutEndLine(); error = true; }
660                          
661                          m->gobble(in);
662                  }
663                  
664                  in.close();
665                  
666                  if (error) { m->control_pressed = true; }
667  
668                  return 0;
669  
670         }
671          catch(exception& e) {
672                  m->errorOut(e, "ConsensusSeqsCommand", "readNames");
673                  exit(1);
674          }
675  }
676  
677 //***************************************************************************************************************
678
679