]> git.donarmstrong.com Git - mothur.git/blob - consensusseqscommand.cpp
consensus.seqs summary file change
[mothur.git] / consensusseqscommand.cpp
1 /*
2  *  consensusseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/23/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "consensusseqscommand.h"
11 #include "sequence.hpp"
12 #include "inputdata.h"
13
14 //**********************************************************************************************************************
15 vector<string> ConsensusSeqsCommand::getValidParameters(){      
16         try {
17                 string Array[] =  {"fasta", "list", "name", "label","outputdir","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "ConsensusSeqsCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 ConsensusSeqsCommand::ConsensusSeqsCommand(){   
28         try {
29                 abort = true;
30                 //initialize outputTypes
31                 vector<string> tempOutNames;
32                 outputTypes["fasta"] = tempOutNames;
33                 outputTypes["name"] = tempOutNames;
34                 outputTypes["summary"] = tempOutNames;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "ConsensusSeqsCommand", "ConsensusSeqsCommand");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 vector<string> ConsensusSeqsCommand::getRequiredParameters(){   
43         try {
44                 string Array[] =  {"fasta", "list"};
45                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
46                 return myArray;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "ConsensusSeqsCommand", "getRequiredParameters");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 vector<string> ConsensusSeqsCommand::getRequiredFiles(){        
55         try {
56                 vector<string> myArray;
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "DegapSeqsCommand", "getRequiredFiles");
61                 exit(1);
62         }
63 }
64 //***************************************************************************************************************
65 ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
66         try {
67                 abort = false;
68                 allLines = 1;
69                 
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"fasta","list","name","label", "outputdir","inputdir"};
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string,string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                         map<string,string>::iterator it;
83                         
84                         //check to make sure all parameters are valid for command
85                         for (it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
87                         }
88                         
89                         //initialize outputTypes
90                         vector<string> tempOutNames;
91                         outputTypes["fasta"] = tempOutNames;
92                         outputTypes["name"] = tempOutNames;
93                         outputTypes["summary"] = tempOutNames;
94                         
95                                                 
96                         //if the user changes the input directory command factory will send this info to us in the output parameter 
97                         string inputDir = validParameter.validFile(parameters, "inputdir", false);      
98                         if (inputDir == "not found"){   inputDir = "";          }
99                         else {
100                                 string path;
101                                 it = parameters.find("fasta");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
107                                 }
108                                 
109                                 it = parameters.find("name");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
115                                 }
116                                 
117                                 it = parameters.find("list");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
123                                 }
124                         }
125                         
126                         
127                         //check for parameters
128                         fastafile = validParameter.validFile(parameters, "fasta", true);
129                         if (fastafile == "not open") { abort = true; }
130                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the consensus.seqs command."); m->mothurOutEndLine(); abort = true;  }     
131                         
132                         namefile = validParameter.validFile(parameters, "name", true);
133                         if (namefile == "not open") { abort = true; }
134                         else if (namefile == "not found") { namefile = ""; }    
135                         
136                         listfile = validParameter.validFile(parameters, "list", true);
137                         if (listfile == "not open") { abort = true; }
138                         else if (listfile == "not found") { listfile = "";  }   
139                         
140                         label = validParameter.validFile(parameters, "label", false);                   
141                         if (label == "not found") { label = ""; }
142                         else { 
143                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
144                                 else { allLines = 1;  }
145                         }
146                         
147                         //if the user changes the output directory command factory will send this info to us in the output parameter 
148                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
149
150                 }
151         }
152         catch(exception& e) {
153                 m->errorOut(e, "ConsensusSeqsCommand", "ConsensusSeqsCommand");
154                 exit(1);
155         }
156 }
157 //**********************************************************************************************************************
158
159 void ConsensusSeqsCommand::help(){
160         try {
161                 m->mothurOut("The consensus.seqs command can be used in 2 ways: create a consensus sequence from a fastafile, or with a listfile create a consensus sequence for each otu. Sequences must be aligned.\n");
162                 m->mothurOut("The consensus.seqs command parameters are fasta, list, name and label.\n");
163                 m->mothurOut("The fasta parameter allows you to enter the fasta file containing your sequences, and is required. \n");
164                 m->mothurOut("The list parameter allows you to enter a your list file. \n");
165                 m->mothurOut("The name parameter allows you to enter a names file associated with the fasta file. \n");
166                 m->mothurOut("The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n");
167                 m->mothurOut("The consensus.seqs command should be in the following format: \n");
168                 m->mothurOut("consensus.seqs(fasta=yourFastaFile, list=yourListFile) \n");      
169                 m->mothurOut("Example: consensus.seqs(fasta=abrecovery.align, list=abrecovery.fn.list) \n");
170                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
171         }
172         catch(exception& e) {
173                 m->errorOut(e, "ConsensusSeqsCommand", "help");
174                 exit(1);
175         }
176 }
177
178 //***************************************************************************************************************
179
180 ConsensusSeqsCommand::~ConsensusSeqsCommand(){  /*      do nothing      */      }
181
182 //***************************************************************************************************************
183
184 int ConsensusSeqsCommand::execute(){
185         try{
186                 
187                 if (abort == true) { return 0; }
188                 
189                 readFasta();
190                 
191                 if (m->control_pressed) { return 0; }
192                 
193                 if (namefile != "") { readNames(); }
194                 
195                 if (m->control_pressed) { return 0; }
196                 
197                                 
198                 if (listfile == "") {
199                         
200                         ofstream outSummary;
201                         string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "cons.summary";
202                         m->openOutputFile(outputSummaryFile, outSummary);
203                         outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
204                         outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
205                         
206                         outSummary << "PositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
207                         
208                         ofstream outFasta;
209                         string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "cons.fasta";
210                         m->openOutputFile(outputFastaFile, outFasta);
211                         outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
212                         
213                         vector<string> seqs;
214                         int seqLength = 0;
215                         for (map<string, string>::iterator it = nameMap.begin(); it != nameMap.end(); it++) {
216                                 
217                                 if (m->control_pressed) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
218                                 
219                                 string seq = fastaMap[it->second];
220                                 seqs.push_back(seq);
221                                 
222                                 if (seqLength == 0) { seqLength = seq.length(); }
223                                 else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
224
225                         }
226                         
227                         vector< vector<float> > percentages; percentages.resize(5);
228                         for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
229                         
230                         string consSeq = "";
231                         //get counts
232                         for (int j = 0; j < seqLength; j++) {
233                                 
234                                 if (m->control_pressed) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
235                                 
236                                 vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
237                                 int numDots = 0;
238                                 
239                                 for (int i = 0; i < seqs.size(); i++) {
240                                         
241                                         if (seqs[i][j] == '.') { numDots++; }
242                                         
243                                         char base = toupper(seqs[i][j]);
244                                         if (base == 'A') { counts[0]++; }
245                                         else if (base == 'T') { counts[1]++; }
246                                         else if (base == 'G') { counts[2]++; }
247                                         else if (base == 'C') { counts[3]++; }
248                                         else { counts[4]++; }
249                                 }
250                                 
251                                 char conBase = '.';
252                                 if (numDots != seqs.size()) { conBase = getBase(counts); }
253                                 
254                                 consSeq += conBase;
255                                 
256                                 percentages[0][j] = counts[0] / (float) seqs.size();
257                                 percentages[1][j] = counts[1] / (float) seqs.size();
258                                 percentages[2][j] = counts[2] / (float) seqs.size();
259                                 percentages[3][j] = counts[3] / (float) seqs.size();
260                                 percentages[4][j] = counts[4] / (float) seqs.size();
261                                 
262                         }
263                         
264                         for (int j = 0; j < seqLength; j++) { 
265                                 outSummary << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
266                         }
267                         
268                                 
269                         outFasta << ">conseq" << endl << consSeq << endl;
270                         
271                         outSummary.close(); outFasta.close();
272                         
273                 
274                 }else {
275                         
276                                                 
277                         InputData* input = new InputData(listfile, "list");
278                         ListVector* list = input->getListVector();
279                         
280                         string lastLabel = list->getLabel();
281                         set<string> processedLabels;
282                         set<string> userLabels = labels;
283
284                         //as long as you are not at the end of the file or done wih the lines you want
285                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
286                                 
287                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } delete list; delete input;  return 0;  }
288                                 
289                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
290                                         
291                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
292                                         
293                                         processList(list);
294                                         
295                                         processedLabels.insert(list->getLabel());
296                                         userLabels.erase(list->getLabel());
297                                 }
298                                 
299                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
300                                         string saveLabel = list->getLabel();
301                                         
302                                         delete list; 
303                                         
304                                         list = input->getListVector(lastLabel);
305                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
306                                         
307                                         processList(list);
308                                         
309                                         processedLabels.insert(list->getLabel());
310                                         userLabels.erase(list->getLabel());
311                                         
312                                         //restore real lastlabel to save below
313                                         list->setLabel(saveLabel);
314                                 }
315                                 
316                                 lastLabel = list->getLabel();
317                                 
318                                 delete list; list = NULL;
319                                 
320                                 //get next line to process
321                                 list = input->getListVector();                          
322                         }
323                         
324                         
325                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } if (list != NULL) { delete list; } delete input; return 0;  }
326                         
327                         //output error messages about any remaining user labels
328                         set<string>::iterator it;
329                         bool needToRun = false;
330                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
331                                 m->mothurOut("Your file does not include the label " + *it); 
332                                 if (processedLabels.count(lastLabel) != 1) {
333                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
334                                         needToRun = true;
335                                 }else {
336                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
337                                 }
338                         }
339                         
340                         //run last label if you need to
341                         if (needToRun == true)  {
342                                 if (list != NULL) { delete list; }
343                                 
344                                 list = input->getListVector(lastLabel);
345                                 
346                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
347                                 
348                                 processList(list);
349                                 
350                                 delete list; list = NULL;
351                         }
352                         
353                         if (list != NULL) { delete list; }
354                         delete input;
355                 }
356                 
357                 m->mothurOutEndLine();
358                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
359                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
360                 m->mothurOutEndLine();
361                 
362                 
363                 return 0;
364                 
365         }
366         catch(exception& e) {
367                 m->errorOut(e, "ConsensusSeqsCommand", "execute");
368                 exit(1);
369         }
370 }
371 //***************************************************************************************************************
372
373 int ConsensusSeqsCommand::processList(ListVector*& list){
374         try{
375                 
376                 ofstream outSummary;
377                 string outputSummaryFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.summary";
378                 m->openOutputFile(outputSummaryFile, outSummary);
379                 outSummary.setf(ios::fixed, ios::floatfield); outSummary.setf(ios::showpoint);
380                 outputNames.push_back(outputSummaryFile); outputTypes["summary"].push_back(outputSummaryFile);
381                 
382                 ofstream outName;
383                 string outputNameFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.names";
384                 m->openOutputFile(outputNameFile, outName);
385                 outputNames.push_back(outputNameFile); outputTypes["name"].push_back(outputNameFile);
386                 
387                 ofstream outFasta;
388                 string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + list->getLabel() + ".cons.fasta";
389                 m->openOutputFile(outputFastaFile, outFasta);
390                 outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
391                 
392                 outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
393                 
394                 for (int i = 0; i < list->getNumBins(); i++) {
395                         
396                         if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
397                         
398                         string bin = list->get(i);
399                         
400                         string newName = "";
401                         string consSeq = getConsSeq(bin, outSummary, newName, i);
402                         
403                         outFasta << ">seq" << (i+1) << endl << consSeq << endl;
404                         outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << newName << endl;
405                 }
406                 
407                 outSummary.close(); outName.close(); outFasta.close();
408                 
409                 return 0;
410                 
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "ConsensusSeqsCommand", "processList");
414                 exit(1);
415         }
416 }
417
418 //***************************************************************************************************************
419 //made this smart enough to owrk with unique or non unique list file
420 string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, string& name, int binNumber){
421         try{
422                 
423                 string consSeq = "";
424                 bool error = false;
425                 
426                 //the whole bin is the second column if no names file, otherwise build it
427                 name = bin;
428                 if (namefile != "") { name = ""; }
429                 
430                 vector<string> binNames;
431                 m->splitAtComma(bin, binNames);
432                 
433                 //get sequence strings for each name in the bin
434                 vector<string> seqs;
435                 
436                 set<string> addedAlready;
437                 int seqLength = 0;
438                 for (int i = 0; i < binNames.size(); i++) {
439                         
440                         map<string, string>::iterator it;
441                         
442                         it = nameMap.find(binNames[i]);
443                         if (it == nameMap.end()) { 
444                                 if (namefile == "") { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); error = true; }
445                                 else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta or name file, please correct."); m->mothurOutEndLine(); error = true; }
446                                 break;
447                         }else {
448                                 
449                                 //add sequence string to seqs vector to process below
450                                 string seq = fastaMap[it->second];
451                                 seqs.push_back(seq);
452                                 
453                                 if (seqLength == 0) { seqLength = seq.length(); }
454                                 else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); error = true; break; }
455                                 
456                                 if (namefile != "") { 
457                                         //did we add this line from name file already?
458                                         if (addedAlready.count(it->second) == 0) {
459                                                 name += "," + nameFileMap[it->second];
460                                                 addedAlready.insert(it->second);
461                                         }
462                                 }
463                                 
464                         }
465                 }
466                 
467                 if (error) { m->control_pressed = true; return consSeq; }
468                 
469                 if (namefile != "") { name = name.substr(1); }
470                 
471                 vector< vector<float> > percentages; percentages.resize(5);
472                 for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
473                 
474                 //get counts
475                 for (int j = 0; j < seqLength; j++) {
476                         
477                         if (m->control_pressed) { return consSeq; }
478                         
479                         vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
480                         int numDots = 0;
481                         
482                         for (int i = 0; i < seqs.size(); i++) {
483                                 
484                                 if (seqs[i][j] == '.') { numDots++; }
485                                 
486                                 char base = toupper(seqs[i][j]);
487                                 if (base == 'A') { counts[0]++; }
488                                 else if (base == 'T') { counts[1]++; }
489                                 else if (base == 'G') { counts[2]++; }
490                                 else if (base == 'C') { counts[3]++; }
491                                 else { counts[4]++; }
492                         }
493                         
494                         char conBase = '.';
495                         if (numDots != seqs.size()) { conBase = getBase(counts); }
496                         
497                         consSeq += conBase;
498                         
499                         percentages[0][j] = counts[0] / (float) seqs.size();
500                         percentages[1][j] = counts[1] / (float) seqs.size();
501                         percentages[2][j] = counts[2] / (float) seqs.size();
502                         percentages[3][j] = counts[3] / (float) seqs.size();
503                         percentages[4][j] = counts[4] / (float) seqs.size();
504                         
505                 }
506                 
507                 for (int j = 0; j < seqLength; j++) { 
508                         outSummary << (binNumber + 1) << '\t' << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
509                 }
510                 
511                 return consSeq;
512                 
513         }
514         catch(exception& e) {
515                 m->errorOut(e, "ConsensusSeqsCommand", "getConsSeq");
516                 exit(1);
517         }
518 }
519 //***************************************************************************************************************
520
521 char ConsensusSeqsCommand::getBase(vector<int> counts){  //A,T,G,C,Gap
522         try{
523                 /* A = adenine
524                 * C = cytosine
525                 * G = guanine
526                 * T = thymine
527                 * R = G A (purine)
528                 * Y = T C (pyrimidine)
529                 * K = G T (keto)
530                 * M = A C (amino)
531                 * S = G C (strong bonds)
532                 * W = A T (weak bonds)
533                 * B = G T C (all but A)
534                 * D = G A T (all but C)
535                 * H = A C T (all but G)
536                 * V = G C A (all but T)
537                 * N = A G C T (any) */
538                 
539                 char conBase = 'N';
540                 
541                 //any
542                 if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'n'; }
543                 //any no gap
544                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'N'; }
545                 //all but T
546                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'v'; }  
547                 //all but T no gap
548                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'V'; }  
549                 //all but G
550                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'h'; }  
551                 //all but G no gap
552                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'H'; }  
553                 //all but C
554                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'd'; }  
555                 //all but C no gap
556                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'D'; }  
557                 //all but A
558                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'b'; }  
559                 //all but A no gap
560                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'B'; }  
561                 //W = A T (weak bonds)
562                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'w'; }  
563                 //W = A T (weak bonds) no gap
564                 else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'W'; }  
565                 //S = G C (strong bonds)
566                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 's'; }  
567                 //S = G C (strong bonds) no gap
568                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'S'; }  
569                 //M = A C (amino)
570                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'm'; }  
571                 //M = A C (amino) no gap
572                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'M'; }  
573                 //K = G T (keto)
574                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'k'; }  
575                 //K = G T (keto) no gap
576                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'K'; }  
577                 //Y = T C (pyrimidine)
578                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'y'; }  
579                 //Y = T C (pyrimidine) no gap
580                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'Y'; }  
581                 //R = G A (purine)
582                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'r'; }  
583                 //R = G A (purine) no gap
584                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'R'; }  
585                 //only A
586                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'a'; }  
587                 //only A no gap
588                 else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'A'; }  
589                 //only T
590                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 't'; }  
591                 //only T no gap
592                 else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'T'; }  
593                 //only G
594                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'g'; }  
595                 //only G no gap
596                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'G'; }  
597                 //only C
598                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'c'; }  
599                 //only C no gap
600                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'C'; }  
601                 //only gap
602                 else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = '-'; }
603                 else{ m->mothurOut("[ERROR]: cannot find consensus base."); m->mothurOutEndLine(); }
604                 
605                 return conBase;
606                 
607         }
608         catch(exception& e) {
609                 m->errorOut(e, "ConsensusSeqsCommand", "getBase");
610                 exit(1);
611         }
612 }
613
614 //***************************************************************************************************************
615
616 int ConsensusSeqsCommand::readFasta(){
617         try{
618                 
619                 ifstream in;
620                 m->openInputFile(fastafile, in);
621                 
622                 while (!in.eof()) {
623                         
624                         if (m->control_pressed) { break; }
625                         
626                         Sequence seq(in); m->gobble(in);
627                         string name = seq.getName();
628                         
629                         if (name != "") {
630                                 fastaMap[name] = seq.getAligned();
631                                 nameMap[name] = name; //set nameMap incase no names file
632                                 nameFileMap[name] = name;
633                         }
634                 }
635                 
636                 in.close();
637                 
638                 return 0;
639                 
640         }
641         catch(exception& e) {
642                 m->errorOut(e, "ConsensusSeqsCommand", "readFasta");
643                 exit(1);
644         }
645 }
646 //***************************************************************************************************************
647
648 int ConsensusSeqsCommand::readNames(){
649          try{
650                  
651                  ifstream in;
652                  m->openInputFile(namefile, in);
653                  
654                  string thisname, repnames;
655                  map<string, string>::iterator it;
656                  
657                  bool error = false;
658                  
659                  while(!in.eof()){
660                          
661                          if (m->control_pressed) { break; }
662                          
663                          in >> thisname;                m->gobble(in);          //read from first column
664                          in >> repnames;                        //read from second column
665                          
666                          it = nameMap.find(thisname);
667                          if (it != nameMap.end()) { //then this sequence was in the fastafile
668                                  
669                                  vector<string> splitRepNames;
670                                  m->splitAtComma(repnames, splitRepNames);
671                                  
672                                  nameFileMap[thisname] = repnames;      //for later when outputting the new namesFile if the list file is unique
673                                  for (int i = 0; i < splitRepNames.size(); i++) { nameMap[splitRepNames[i]] = thisname; }
674                                  
675                          }else{ m->mothurOut("[ERROR]: " + thisname + " is not in the fasta file, please correct."); m->mothurOutEndLine(); error = true; }
676                          
677                          m->gobble(in);
678                  }
679                  
680                  in.close();
681                  
682                  if (error) { m->control_pressed = true; }
683  
684                  return 0;
685  
686         }
687          catch(exception& e) {
688                  m->errorOut(e, "ConsensusSeqsCommand", "readNames");
689                  exit(1);
690          }
691  }
692  
693 //***************************************************************************************************************
694
695