]> git.donarmstrong.com Git - mothur.git/blob - consensusseqscommand.cpp
added citation function to commands
[mothur.git] / consensusseqscommand.cpp
1 /*
2  *  consensusseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/23/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "consensusseqscommand.h"
11 #include "sequence.hpp"
12 #include "inputdata.h"
13
14 //**********************************************************************************************************************
15 vector<string> ConsensusSeqsCommand::setParameters(){   
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plist);
20                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "ConsensusSeqsCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string ConsensusSeqsCommand::getHelpString(){   
35         try {
36                 string helpString = "";
37                 helpString += "The consensus.seqs command can be used in 2 ways: create a consensus sequence from a fastafile, or with a listfile create a consensus sequence for each otu. Sequences must be aligned.\n";
38                 helpString += "The consensus.seqs command parameters are fasta, list, name and label.\n";
39                 helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n";
40                 helpString += "The list parameter allows you to enter a your list file. \n";
41                 helpString += "The name parameter allows you to enter a names file associated with the fasta file. \n";
42                 helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n";
43                 helpString += "The consensus.seqs command should be in the following format: \n";
44                 helpString += "consensus.seqs(fasta=yourFastaFile, list=yourListFile) \n";      
45                 helpString += "Example: consensus.seqs(fasta=abrecovery.align, list=abrecovery.fn.list) \n";
46                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
47                 return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "ConsensusSeqsCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54
55 //**********************************************************************************************************************
56 ConsensusSeqsCommand::ConsensusSeqsCommand(){   
57         try {
58                 abort = true; calledHelp = true; 
59                 setParameters();
60                 vector<string> tempOutNames;
61                 outputTypes["fasta"] = tempOutNames;
62                 outputTypes["name"] = tempOutNames;
63                 outputTypes["summary"] = tempOutNames;
64         }
65         catch(exception& e) {
66                 m->errorOut(e, "ConsensusSeqsCommand", "ConsensusSeqsCommand");
67                 exit(1);
68         }
69 }
70 //***************************************************************************************************************
71 ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
72         try {
73                 abort = false; calledHelp = false;   
74                 allLines = 1;
75                 
76                 //allow user to run help
77                 if(option == "help") { help(); abort = true; calledHelp = true; }
78                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
79                 
80                 else {
81                         
82                         vector<string> myArray = setParameters();
83                         
84                         OptionParser parser(option);
85                         map<string,string> parameters = parser.getParameters();
86                         
87                         ValidParameters validParameter;
88                         map<string,string>::iterator it;
89                         
90                         //check to make sure all parameters are valid for command
91                         for (it = parameters.begin(); it != parameters.end(); it++) { 
92                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
93                         }
94                         
95                         //initialize outputTypes
96                         vector<string> tempOutNames;
97                         outputTypes["fasta"] = tempOutNames;
98                         outputTypes["name"] = tempOutNames;
99                         outputTypes["summary"] = tempOutNames;
100                         
101                                                 
102                         //if the user changes the input directory command factory will send this info to us in the output parameter 
103                         string inputDir = validParameter.validFile(parameters, "inputdir", false);      
104                         if (inputDir == "not found"){   inputDir = "";          }
105                         else {
106                                 string path;
107                                 it = parameters.find("fasta");
108                                 //user has given a template file
109                                 if(it != parameters.end()){ 
110                                         path = m->hasPath(it->second);
111                                         //if the user has not given a path then, add inputdir. else leave path alone.
112                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
113                                 }
114                                 
115                                 it = parameters.find("name");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
121                                 }
122                                 
123                                 it = parameters.find("list");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
129                                 }
130                         }
131                         
132                         
133                         //check for parameters
134                         fastafile = validParameter.validFile(parameters, "fasta", true);
135                         if (fastafile == "not open") { abort = true; }
136                         else if (fastafile == "not found") {                    
137                                 fastafile = m->getFastaFile(); 
138                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
139                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
140                         }       
141                         
142                         namefile = validParameter.validFile(parameters, "name", true);
143                         if (namefile == "not open") { abort = true; }
144                         else if (namefile == "not found") { namefile = ""; }    
145                         
146                         listfile = validParameter.validFile(parameters, "list", true);
147                         if (listfile == "not open") { abort = true; }
148                         else if (listfile == "not found") { listfile = "";  }   
149                         
150                         label = validParameter.validFile(parameters, "label", false);                   
151                         if (label == "not found") { label = ""; }
152                         else { 
153                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
154                                 else { allLines = 1;  }
155                         }
156                         
157                         //if the user changes the output directory command factory will send this info to us in the output parameter 
158                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
159
160                 }
161         }
162         catch(exception& e) {
163                 m->errorOut(e, "ConsensusSeqsCommand", "ConsensusSeqsCommand");
164                 exit(1);
165         }
166 }
167 //***************************************************************************************************************
168
169 int ConsensusSeqsCommand::execute(){
170         try{
171                 
172                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
173                 
174                 readFasta();
175                 
176                 if (m->control_pressed) { return 0; }
177                 
178                 if (namefile != "") { readNames(); }
179                 
180                 if (m->control_pressed) { return 0; }
181                 
182                                 
183                 if (listfile == "") {
184                         
185                         ofstream outSummary;
186                         string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "cons.summary";
187                         m->openOutputFile(outputSummaryFile, outSummary);
188                         outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
189                         outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
190                         
191                         outSummary << "PositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
192                         
193                         ofstream outFasta;
194                         string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "cons.fasta";
195                         m->openOutputFile(outputFastaFile, outFasta);
196                         outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
197                         
198                         vector<string> seqs;
199                         int seqLength = 0;
200                         for (map<string, string>::iterator it = nameMap.begin(); it != nameMap.end(); it++) {
201                                 
202                                 if (m->control_pressed) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
203                                 
204                                 string seq = fastaMap[it->second];
205                                 seqs.push_back(seq);
206                                 
207                                 if (seqLength == 0) { seqLength = seq.length(); }
208                                 else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
209
210                         }
211                         
212                         vector< vector<float> > percentages; percentages.resize(5);
213                         for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
214                         
215                         string consSeq = "";
216                         //get counts
217                         for (int j = 0; j < seqLength; j++) {
218                                 
219                                 if (m->control_pressed) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
220                                 
221                                 vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
222                                 int numDots = 0;
223                                 
224                                 for (int i = 0; i < seqs.size(); i++) {
225                                         
226                                         if (seqs[i][j] == '.') { numDots++; }
227                                         
228                                         char base = toupper(seqs[i][j]);
229                                         if (base == 'A') { counts[0]++; }
230                                         else if (base == 'T') { counts[1]++; }
231                                         else if (base == 'G') { counts[2]++; }
232                                         else if (base == 'C') { counts[3]++; }
233                                         else { counts[4]++; }
234                                 }
235                                 
236                                 char conBase = '.';
237                                 if (numDots != seqs.size()) { conBase = getBase(counts); }
238                                 
239                                 consSeq += conBase;
240                                 
241                                 percentages[0][j] = counts[0] / (float) seqs.size();
242                                 percentages[1][j] = counts[1] / (float) seqs.size();
243                                 percentages[2][j] = counts[2] / (float) seqs.size();
244                                 percentages[3][j] = counts[3] / (float) seqs.size();
245                                 percentages[4][j] = counts[4] / (float) seqs.size();
246                                 
247                         }
248                         
249                         for (int j = 0; j < seqLength; j++) { 
250                                 outSummary << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
251                         }
252                         
253                                 
254                         outFasta << ">conseq" << endl << consSeq << endl;
255                         
256                         outSummary.close(); outFasta.close();
257                         
258                 
259                 }else {
260                         
261                                                 
262                         InputData* input = new InputData(listfile, "list");
263                         ListVector* list = input->getListVector();
264                         
265                         string lastLabel = list->getLabel();
266                         set<string> processedLabels;
267                         set<string> userLabels = labels;
268
269                         //as long as you are not at the end of the file or done wih the lines you want
270                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
271                                 
272                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } delete list; delete input;  return 0;  }
273                                 
274                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
275                                         
276                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
277                                         
278                                         processList(list);
279                                         
280                                         processedLabels.insert(list->getLabel());
281                                         userLabels.erase(list->getLabel());
282                                 }
283                                 
284                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
285                                         string saveLabel = list->getLabel();
286                                         
287                                         delete list; 
288                                         
289                                         list = input->getListVector(lastLabel);
290                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
291                                         
292                                         processList(list);
293                                         
294                                         processedLabels.insert(list->getLabel());
295                                         userLabels.erase(list->getLabel());
296                                         
297                                         //restore real lastlabel to save below
298                                         list->setLabel(saveLabel);
299                                 }
300                                 
301                                 lastLabel = list->getLabel();
302                                 
303                                 delete list; list = NULL;
304                                 
305                                 //get next line to process
306                                 list = input->getListVector();                          
307                         }
308                         
309                         
310                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } if (list != NULL) { delete list; } delete input; return 0;  }
311                         
312                         //output error messages about any remaining user labels
313                         set<string>::iterator it;
314                         bool needToRun = false;
315                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
316                                 m->mothurOut("Your file does not include the label " + *it); 
317                                 if (processedLabels.count(lastLabel) != 1) {
318                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
319                                         needToRun = true;
320                                 }else {
321                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
322                                 }
323                         }
324                         
325                         //run last label if you need to
326                         if (needToRun == true)  {
327                                 if (list != NULL) { delete list; }
328                                 
329                                 list = input->getListVector(lastLabel);
330                                 
331                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
332                                 
333                                 processList(list);
334                                 
335                                 delete list; list = NULL;
336                         }
337                         
338                         if (list != NULL) { delete list; }
339                         delete input;
340                 }
341                 
342                 m->mothurOutEndLine();
343                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
344                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
345                 m->mothurOutEndLine();
346                 
347                 
348                 return 0;
349                 
350         }
351         catch(exception& e) {
352                 m->errorOut(e, "ConsensusSeqsCommand", "execute");
353                 exit(1);
354         }
355 }
356 //***************************************************************************************************************
357
358 int ConsensusSeqsCommand::processList(ListVector*& list){
359         try{
360                 
361                 ofstream outSummary;
362                 string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.summary";
363                 m->openOutputFile(outputSummaryFile, outSummary);
364                 outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
365                 outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
366                 
367                 ofstream outName;
368                 string outputNameFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.names";
369                 m->openOutputFile(outputNameFile, outName);
370                 outputNames.push_back(outputNameFile); outputTypes["name"].push_back(outputNameFile);
371                 
372                 ofstream outFasta;
373                 string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.fasta";
374                 m->openOutputFile(outputFastaFile, outFasta);
375                 outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
376                 
377                 outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
378                 
379                 for (int i = 0; i < list->getNumBins(); i++) {
380                         
381                         if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
382                         
383                         string bin = list->get(i);
384                         
385                         string newName = "";
386                         string consSeq = getConsSeq(bin, outSummary, newName, i);
387                         
388                         outFasta << ">seq" << (i+1) << endl << consSeq << endl;
389                         outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << newName << endl;
390                 }
391                 
392                 outSummary.close(); outName.close(); outFasta.close();
393                 
394                 return 0;
395                 
396         }
397         catch(exception& e) {
398                 m->errorOut(e, "ConsensusSeqsCommand", "processList");
399                 exit(1);
400         }
401 }
402
403 //***************************************************************************************************************
404 //made this smart enough to owrk with unique or non unique list file
405 string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, string& name, int binNumber){
406         try{
407                 
408                 string consSeq = "";
409                 bool error = false;
410                 
411                 //the whole bin is the second column if no names file, otherwise build it
412                 name = bin;
413                 if (namefile != "") { name = ""; }
414                 
415                 vector<string> binNames;
416                 m->splitAtComma(bin, binNames);
417                 
418                 //get sequence strings for each name in the bin
419                 vector<string> seqs;
420                 
421                 set<string> addedAlready;
422                 int seqLength = 0;
423                 for (int i = 0; i < binNames.size(); i++) {
424                         
425                         map<string, string>::iterator it;
426                         
427                         it = nameMap.find(binNames[i]);
428                         if (it == nameMap.end()) { 
429                                 if (namefile == "") { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); error = true; }
430                                 else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta or name file, please correct."); m->mothurOutEndLine(); error = true; }
431                                 break;
432                         }else {
433                                 
434                                 //add sequence string to seqs vector to process below
435                                 string seq = fastaMap[it->second];
436                                 seqs.push_back(seq);
437                                 
438                                 if (seqLength == 0) { seqLength = seq.length(); }
439                                 else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); error = true; break; }
440                                 
441                                 if (namefile != "") { 
442                                         //did we add this line from name file already?
443                                         if (addedAlready.count(it->second) == 0) {
444                                                 name += "," + nameFileMap[it->second];
445                                                 addedAlready.insert(it->second);
446                                         }
447                                 }
448                                 
449                         }
450                 }
451                 
452                 if (error) { m->control_pressed = true; return consSeq; }
453                 
454                 if (namefile != "") { name = name.substr(1); }
455                 
456                 vector< vector<float> > percentages; percentages.resize(5);
457                 for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
458                 
459                 //get counts
460                 for (int j = 0; j < seqLength; j++) {
461                         
462                         if (m->control_pressed) { return consSeq; }
463                         
464                         vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
465                         int numDots = 0;
466                         
467                         for (int i = 0; i < seqs.size(); i++) {
468                                 
469                                 if (seqs[i][j] == '.') { numDots++; }
470                                 
471                                 char base = toupper(seqs[i][j]);
472                                 if (base == 'A') { counts[0]++; }
473                                 else if (base == 'T') { counts[1]++; }
474                                 else if (base == 'G') { counts[2]++; }
475                                 else if (base == 'C') { counts[3]++; }
476                                 else { counts[4]++; }
477                         }
478                         
479                         char conBase = '.';
480                         if (numDots != seqs.size()) { conBase = getBase(counts); }
481                         
482                         consSeq += conBase;
483                         
484                         percentages[0][j] = counts[0] / (float) seqs.size();
485                         percentages[1][j] = counts[1] / (float) seqs.size();
486                         percentages[2][j] = counts[2] / (float) seqs.size();
487                         percentages[3][j] = counts[3] / (float) seqs.size();
488                         percentages[4][j] = counts[4] / (float) seqs.size();
489                         
490                 }
491                 
492                 for (int j = 0; j < seqLength; j++) { 
493                         outSummary << (binNumber + 1) << '\t' << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
494                 }
495                 
496                 return consSeq;
497                 
498         }
499         catch(exception& e) {
500                 m->errorOut(e, "ConsensusSeqsCommand", "getConsSeq");
501                 exit(1);
502         }
503 }
504 //***************************************************************************************************************
505
506 char ConsensusSeqsCommand::getBase(vector<int> counts){  //A,T,G,C,Gap
507         try{
508                 /* A = adenine
509                 * C = cytosine
510                 * G = guanine
511                 * T = thymine
512                 * R = G A (purine)
513                 * Y = T C (pyrimidine)
514                 * K = G T (keto)
515                 * M = A C (amino)
516                 * S = G C (strong bonds)
517                 * W = A T (weak bonds)
518                 * B = G T C (all but A)
519                 * D = G A T (all but C)
520                 * H = A C T (all but G)
521                 * V = G C A (all but T)
522                 * N = A G C T (any) */
523                 
524                 char conBase = 'N';
525                 
526                 //any
527                 if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'n'; }
528                 //any no gap
529                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'N'; }
530                 //all but T
531                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'v'; }  
532                 //all but T no gap
533                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'V'; }  
534                 //all but G
535                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'h'; }  
536                 //all but G no gap
537                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'H'; }  
538                 //all but C
539                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'd'; }  
540                 //all but C no gap
541                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'D'; }  
542                 //all but A
543                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'b'; }  
544                 //all but A no gap
545                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'B'; }  
546                 //W = A T (weak bonds)
547                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'w'; }  
548                 //W = A T (weak bonds) no gap
549                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'W'; }  
550                 //S = G C (strong bonds)
551                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 's'; }  
552                 //S = G C (strong bonds) no gap
553                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'S'; }  
554                 //M = A C (amino)
555                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'm'; }  
556                 //M = A C (amino) no gap
557                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'M'; }  
558                 //K = G T (keto)
559                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'k'; }  
560                 //K = G T (keto) no gap
561                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'K'; }  
562                 //Y = T C (pyrimidine)
563                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'y'; }  
564                 //Y = T C (pyrimidine) no gap
565                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'Y'; }  
566                 //R = G A (purine)
567                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'r'; }  
568                 //R = G A (purine) no gap
569                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'R'; }  
570                 //only A
571                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'a'; }  
572                 //only A no gap
573                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'A'; }  
574                 //only T
575                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 't'; }  
576                 //only T no gap
577                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'T'; }  
578                 //only G
579                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'g'; }  
580                 //only G no gap
581                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'G'; }  
582                 //only C
583                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'c'; }  
584                 //only C no gap
585                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'C'; }  
586                 //only gap
587                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = '-'; }
588                 else{ m->mothurOut("[ERROR]: cannot find consensus base."); m->mothurOutEndLine(); }
589                 
590                 return conBase;
591                 
592         }
593         catch(exception& e) {
594                 m->errorOut(e, "ConsensusSeqsCommand", "getBase");
595                 exit(1);
596         }
597 }
598
599 //***************************************************************************************************************
600
601 int ConsensusSeqsCommand::readFasta(){
602         try{
603                 
604                 ifstream in;
605                 m->openInputFile(fastafile, in);
606                 
607                 while (!in.eof()) {
608                         
609                         if (m->control_pressed) { break; }
610                         
611                         Sequence seq(in); m->gobble(in);
612                         string name = seq.getName();
613                         
614                         if (name != "") {
615                                 fastaMap[name] = seq.getAligned();
616                                 nameMap[name] = name; //set nameMap incase no names file
617                                 nameFileMap[name] = name;
618                         }
619                 }
620                 
621                 in.close();
622                 
623                 return 0;
624                 
625         }
626         catch(exception& e) {
627                 m->errorOut(e, "ConsensusSeqsCommand", "readFasta");
628                 exit(1);
629         }
630 }
631 //***************************************************************************************************************
632
633 int ConsensusSeqsCommand::readNames(){
634          try{
635                  
636                  ifstream in;
637                  m->openInputFile(namefile, in);
638                  
639                  string thisname, repnames;
640                  map<string, string>::iterator it;
641                  
642                  bool error = false;
643                  
644                  while(!in.eof()){
645                          
646                          if (m->control_pressed) { break; }
647                          
648                          in >> thisname;                m->gobble(in);          //read from first column
649                          in >> repnames;                        //read from second column
650                          
651                          it = nameMap.find(thisname);
652                          if (it != nameMap.end()) { //then this sequence was in the fastafile
653                                  
654                                  vector<string> splitRepNames;
655                                  m->splitAtComma(repnames, splitRepNames);
656                                  
657                                  nameFileMap[thisname] = repnames;      //for later when outputting the new namesFile if the list file is unique
658                                  for (int i = 0; i < splitRepNames.size(); i++) { nameMap[splitRepNames[i]] = thisname; }
659                                  
660                          }else{ m->mothurOut("[ERROR]: " + thisname + " is not in the fasta file, please correct."); m->mothurOutEndLine(); error = true; }
661                          
662                          m->gobble(in);
663                  }
664                  
665                  in.close();
666                  
667                  if (error) { m->control_pressed = true; }
668  
669                  return 0;
670  
671         }
672          catch(exception& e) {
673                  m->errorOut(e, "ConsensusSeqsCommand", "readNames");
674                  exit(1);
675          }
676  }
677  
678 //***************************************************************************************************************
679
680