]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38 //**********************************************************************************************************************
39 GetOTURepCommand::GetOTURepCommand(){   
40         try {
41                 //initialize outputTypes
42                 vector<string> tempOutNames;
43                 outputTypes["fasta"] = tempOutNames;
44                 outputTypes["name"] = tempOutNames;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> GetOTURepCommand::getValidParameters(){  
53         try {
54                 string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
55                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "GetOTURepCommand", "getValidParameters");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 vector<string> GetOTURepCommand::getRequiredParameters(){       
65         try {
66                 string Array[] =  {"fasta","list"};
67                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
68                 return myArray;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "GetOTURepCommand", "getRequiredParameters");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 vector<string> GetOTURepCommand::getRequiredFiles(){    
77         try {
78                 vector<string> myArray;
79                 return myArray;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "GetOTURepCommand", "getRequiredFiles");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 GetOTURepCommand::GetOTURepCommand(string option)  {
88         try{
89                 globaldata = GlobalData::getInstance();
90                 abort = false;
91                 allLines = 1;
92                 labels.clear();
93                                 
94                 //allow user to run help
95                 if (option == "help") { 
96                         help(); abort = true;
97                 } else {
98                         //valid paramters for this command
99                         string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
100                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
101                         
102                         OptionParser parser(option);
103                         map<string, string> parameters = parser.getParameters();
104                         
105                         ValidParameters validParameter;
106                         map<string, string>::iterator it;
107                 
108                         //check to make sure all parameters are valid for command
109                         for (it = parameters.begin(); it != parameters.end(); it++) { 
110                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
111                         }
112                         
113                         //initialize outputTypes
114                         vector<string> tempOutNames;
115                         outputTypes["fasta"] = tempOutNames;
116                         outputTypes["name"] = tempOutNames;
117                         
118                         //if the user changes the input directory command factory will send this info to us in the output parameter 
119                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
120                         if (inputDir == "not found"){   inputDir = "";          }
121                         else {
122                                 string path;
123                                 it = parameters.find("list");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
129                                 }
130                                 
131                                 it = parameters.find("fasta");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
137                                 }
138                                 
139                                 it = parameters.find("phylip");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
145                                 }
146                                 
147                                 it = parameters.find("column");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
153                                 }
154                                 
155                                 it = parameters.find("name");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
161                                 }
162                                 
163                                 it = parameters.find("group");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
169                                 }
170                         }
171
172                         
173                         //if the user changes the output directory command factory will send this info to us in the output parameter 
174                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
175                         
176                         //check for required parameters
177                         fastafile = validParameter.validFile(parameters, "fasta", true);
178                         if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
179                         else if (fastafile == "not open") { abort = true; }     
180                 
181                         listfile = validParameter.validFile(parameters, "list", true);
182                         if (listfile == "not found") { m->mothurOut("list is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
183                         else if (listfile == "not open") { abort = true; }      
184                         
185                         phylipfile = validParameter.validFile(parameters, "phylip", true);
186                         if (phylipfile == "not found") { phylipfile = "";  }
187                         else if (phylipfile == "not open") { abort = true; }    
188                         else { distFile = phylipfile; format = "phylip";   }
189                         
190                         columnfile = validParameter.validFile(parameters, "column", true);
191                         if (columnfile == "not found") { columnfile = ""; }
192                         else if (columnfile == "not open") { abort = true; }    
193                         else { distFile = columnfile; format = "column";   }
194                         
195                         namefile = validParameter.validFile(parameters, "name", true);
196                         if (namefile == "not open") { abort = true; }   
197                         else if (namefile == "not found") { namefile = ""; }
198                         
199                         if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a get.oturep command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
200                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
201                 
202                         if (columnfile != "") {  if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }  }
203
204                         //check for optional parameter and set defaults
205                         // ...at some point should added some additional type checking...
206                         label = validParameter.validFile(parameters, "label", false);                   
207                         if (label == "not found") { label = ""; allLines = 1;  }
208                         else { 
209                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
210                                 else { allLines = 1;  }
211                         }
212                         
213                         groupfile = validParameter.validFile(parameters, "group", true);
214                         if (groupfile == "not open") { groupfile = ""; abort = true; }
215                         else if (groupfile == "not found") { groupfile = ""; }
216                                                 
217                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
218                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
219                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
220                                 sorted = "";
221                         }
222                         
223                         if ((sorted == "group") && (groupfile == "")) {
224                                 m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
225                                 sorted = "";
226                         }
227                         
228                         groups = validParameter.validFile(parameters, "groups", false);                 
229                         if (groups == "not found") { groups = ""; }
230                         else { 
231                                 if (groupfile == "") {
232                                         m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
233                                         abort = true;
234                                 }else { 
235                                         m->splitAtDash(groups, Groups);
236                                 }
237                         }
238                         globaldata->Groups = Groups;
239                         
240                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
241                         large = m->isTrue(temp);
242                         
243                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
244                         convert(temp, precision); 
245                         
246                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
247                         convert(temp, cutoff); 
248                         cutoff += (5 / (precision * 10.0));
249                 }
250         }
251         catch(exception& e) {
252                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
253                 exit(1);
254         }
255 }
256
257 //**********************************************************************************************************************
258
259 void GetOTURepCommand::help(){
260         try {
261                 m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, groups, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
262                 m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
263                 m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
264                 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
265                 m->mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
266                 m->mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
267                 m->mothurOut("The default value for label is all labels in your inputfile.\n");
268                 m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
269                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
270                 m->mothurOut("The group parameter allows you provide a group file.\n");
271                 m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n");
272                 m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
273                 m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
274                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
275         }
276         catch(exception& e) {
277                 m->errorOut(e, "GetOTURepCommand", "help");
278                 exit(1);
279         }
280 }
281
282 //**********************************************************************************************************************
283
284 GetOTURepCommand::~GetOTURepCommand(){}
285
286 //**********************************************************************************************************************
287
288 int GetOTURepCommand::execute(){
289         try {
290         
291                 if (abort == true) { return 0; }
292                 int error;
293                 
294                 if (!large) {
295                         //read distance files
296                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
297                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
298                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
299                         
300                         readMatrix->setCutoff(cutoff);
301         
302                         if(namefile != ""){     
303                                 nameMap = new NameAssignment(namefile);
304                                 nameMap->readMap();
305                         }else{  nameMap = NULL;         }
306                         
307                         readMatrix->read(nameMap);
308                         
309                         if (m->control_pressed) { delete readMatrix; return 0; }
310
311                         //get matrix
312                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
313                         globaldata->gListVector = readMatrix->getListVector();
314
315                         SparseMatrix* matrix = readMatrix->getMatrix();
316                         
317                         // Create a data structure to quickly access the distance information.
318                         // It consists of a vector of distance maps, where each map contains
319                         // all distances of a certain sequence. Vector and maps are accessed
320                         // via the index of a sequence in the distance matrix
321                         seqVec = vector<SeqMap>(globaldata->gListVector->size()); 
322                         for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
323                                 if (m->control_pressed) { delete readMatrix; return 0; }
324                                 seqVec[currentCell->row][currentCell->column] = currentCell->dist;
325                         }
326                         
327                         delete matrix;
328                         delete readMatrix;
329                         delete nameMap;
330                         
331                         if (m->control_pressed) { return 0; }
332                 }else {
333                         //process file and set up indexes
334                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
335                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
336                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
337                         
338                         formatMatrix->setCutoff(cutoff);
339         
340                         if(namefile != ""){     
341                                 nameMap = new NameAssignment(namefile);
342                                 nameMap->readMap();
343                         }else{  nameMap = NULL;         }
344                         
345                         formatMatrix->read(nameMap);
346                         
347                         if (m->control_pressed) { delete formatMatrix;  return 0; }
348
349                         //get matrix
350                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
351                         globaldata->gListVector = formatMatrix->getListVector();
352                         
353                         distFile = formatMatrix->getFormattedFileName();
354                         
355                         //positions in file where the distances for each sequence begin
356                         //rowPositions[1] = position in file where distance related to sequence 1 start.
357                         rowPositions = formatMatrix->getRowPositions();
358                         
359                         delete formatMatrix;
360                         delete nameMap;
361                         
362                         //openfile for getMap to use
363                         m->openInputFile(distFile, inRow);
364                         
365                         if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; }
366                 }
367                 
368                 
369                 //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
370                 if (globaldata->gListVector != NULL) {
371                         vector<string> names;
372                         string binnames;
373                         //map names to rows in sparsematrix
374                         for (int i = 0; i < globaldata->gListVector->size(); i++) {
375                                 names.clear();
376                                 binnames = globaldata->gListVector->get(i);
377                                 
378                                 m->splitAtComma(binnames, names);
379                                 
380                                 for (int j = 0; j < names.size(); j++) {
381                                         nameToIndex[names[j]] = i;
382                                 }
383                         }
384                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
385                 
386                                 
387                 if (m->control_pressed) { 
388                         if (large) {  inRow.close(); remove(distFile.c_str());  }
389                         return 0; 
390                 }
391                 
392                 if (groupfile != "") {
393                         //read in group map info.
394                         groupMap = new GroupMap(groupfile);
395                         int error = groupMap->readMap();
396                         if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
397                         
398                         if (Groups.size() != 0) {
399                                 SharedUtil* util = new SharedUtil();
400                                 util->setGroups(Groups, groupMap->namesOfGroups, "getoturep");
401                                 delete util;
402                         }
403                 }
404                                                                 
405                 //set format to list so input can get listvector
406                 globaldata->setFormat("list");
407         
408                 //read list file
409                 read = new ReadOTUFile(listfile);
410                 read->read(&*globaldata); 
411                 
412                 input = globaldata->ginput;
413                 list = globaldata->gListVector;
414                 string lastLabel = list->getLabel();
415
416                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
417                 set<string> processedLabels;
418                 set<string> userLabels = labels;
419                 
420                 if (m->control_pressed) { 
421                         if (large) {  inRow.close(); remove(distFile.c_str());  }
422                         delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
423                 }
424         
425                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
426                         
427                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
428                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
429                                         error = process(list);
430                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
431                                         
432                                         if (m->control_pressed) { 
433                                                 if (large) {  inRow.close(); remove(distFile.c_str());  }
434                                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
435                                                 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
436                                         }
437                                         
438                                         processedLabels.insert(list->getLabel());
439                                         userLabels.erase(list->getLabel());
440                         }
441                         
442                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
443                                         string saveLabel = list->getLabel();
444                                         
445                                         delete list;
446                                         list = input->getListVector(lastLabel);
447                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
448                                         error = process(list);
449                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
450                                         
451                                         if (m->control_pressed) { 
452                                                 if (large) {  inRow.close(); remove(distFile.c_str());  }
453                                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
454                                                 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
455                                         }
456                                         
457                                         processedLabels.insert(list->getLabel());
458                                         userLabels.erase(list->getLabel());
459                                         
460                                         //restore real lastlabel to save below
461                                         list->setLabel(saveLabel);
462                         }
463                         
464                         lastLabel = list->getLabel();
465         
466                         delete list;
467                         list = input->getListVector();
468                 }
469                 
470                 //output error messages about any remaining user labels
471                 bool needToRun = false;
472                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
473                         m->mothurOut("Your file does not include the label " + (*it)); 
474                         if (processedLabels.count(lastLabel) != 1) {
475                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
476                                 needToRun = true;
477                         }else {
478                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
479                         }
480                 }
481                 
482                 //run last label if you need to
483                 if (needToRun == true)  {
484                         if (list != NULL) {     delete list;    }
485                         list = input->getListVector(lastLabel);
486                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
487                         error = process(list);
488                         delete list;
489                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
490                         
491                         if (m->control_pressed) { 
492                                         if (large) {  inRow.close(); remove(distFile.c_str());  }
493                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
494                                         delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
495                         }
496                 }
497                 
498                 //close and remove formatted matrix file
499                 if (large) {
500                         inRow.close();
501                         remove(distFile.c_str());
502                 }
503                 
504                 globaldata->gListVector = NULL;
505                 delete input;  globaldata->ginput = NULL;
506                 delete read;
507                 
508                 //read fastafile
509                 fasta = new FastaMap();
510                 fasta->readFastaFile(fastafile);
511                 
512                 //if user gave a namesfile then use it
513                 if (namefile != "") {   readNamesFile();        }
514                 
515                 //output create and output the .rep.fasta files
516                 map<string, string>::iterator itNameFile;
517                 for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
518                         processNames(itNameFile->first, itNameFile->second);
519                 }
520                 
521                 delete fasta;
522                 if (groupfile != "") {
523                         delete groupMap;  globaldata->gGroupmap = NULL;
524                 }
525                 
526                 if (m->control_pressed) {  return 0; }
527                 
528                 m->mothurOutEndLine();
529                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
530                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
531                 m->mothurOutEndLine();
532                 
533                 return 0;
534         }
535         catch(exception& e) {
536                 m->errorOut(e, "GetOTURepCommand", "execute");
537                 exit(1);
538         }
539 }
540
541 //**********************************************************************************************************************
542 void GetOTURepCommand::readNamesFile() {
543         try {
544                 vector<string> dupNames;
545                 m->openInputFile(namefile, inNames);
546                 
547                 string name, names, sequence;
548         
549                 while(inNames){
550                         inNames >> name;                        //read from first column  A
551                         inNames >> names;               //read from second column  A,B,C,D
552                         
553                         dupNames.clear();
554                         
555                         //parse names into vector
556                         m->splitAtComma(names, dupNames);
557                         
558                         //store names in fasta map
559                         sequence = fasta->getSequence(name);
560                         for (int i = 0; i < dupNames.size(); i++) {
561                                 fasta->push_back(dupNames[i], sequence);
562                         }
563                 
564                         m->gobble(inNames);
565                 }
566                 inNames.close();
567
568         }
569         catch(exception& e) {
570                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
571                 exit(1);
572         }
573 }
574 //**********************************************************************************************************************
575 string GetOTURepCommand::findRep(vector<string> names) {
576         try{
577                 // if only 1 sequence in bin or processing the "unique" label, then 
578                 // the first sequence of the OTU is the representative one
579                 if ((names.size() == 2) || (names.size() == 1) || (list->getLabel() == "unique")) {
580                         return names[0];
581                 }else{
582                         vector<int> seqIndex(names.size());
583                         vector<float> max_dist(names.size());
584                         vector<float> total_dist(names.size());
585
586                         //fill seqIndex and initialize sums
587                         for (size_t i = 0; i < names.size(); i++) {
588                                 seqIndex[i] = nameToIndex[names[i]];
589                                 max_dist[i] = 0.0;
590                                 total_dist[i] = 0.0;
591                         }
592                         
593                         // loop through all entries in seqIndex
594                         SeqMap::iterator it;
595                         SeqMap currMap;
596                         for (size_t i=0; i < seqIndex.size(); i++) {
597                                 if (m->control_pressed) {  return  "control"; }
598                         
599                                 if (!large) {   currMap = seqVec[seqIndex[i]];  }
600                                 else            {       currMap = getMap(seqIndex[i]);  }
601                                 
602                                 for (size_t j=0; j < seqIndex.size(); j++) {
603                                         it = currMap.find(seqIndex[j]);         
604                                         if (it != currMap.end()) {
605                                                 max_dist[i] = max(max_dist[i], it->second);
606                                                 max_dist[j] = max(max_dist[j], it->second);
607                                                 total_dist[i] += it->second;
608                                                 total_dist[j] += it->second;
609                                         }else{ //if you can't find the distance make it the cutoff
610                                                 max_dist[i] = max(max_dist[i], cutoff);
611                                                 max_dist[j] = max(max_dist[j], cutoff);
612                                                 total_dist[i] += cutoff;
613                                                 total_dist[j] += cutoff;
614                                         }
615                                 }
616                         }
617                         
618                         // sequence with the smallest maximum distance is the representative
619                         //if tie occurs pick sequence with smallest average distance
620                         float min = 10000;
621                         int minIndex;
622                         for (size_t i=0; i < max_dist.size(); i++) {
623                                 if (m->control_pressed) {  return  "control"; }
624                                 if (max_dist[i] < min) {
625                                         min = max_dist[i];
626                                         minIndex = i;
627                                 }else if (max_dist[i] == min) {
628                                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
629                                         float newAverage = total_dist[i] / (float) total_dist.size();
630                                         
631                                         if (newAverage < currentAverage) {
632                                                 min = max_dist[i];
633                                                 minIndex = i;
634                                         }
635                                 }
636                         }
637                         
638                         return(names[minIndex]);
639                 }
640         }
641         catch(exception& e) {
642                 m->errorOut(e, "GetOTURepCommand", "FindRep");
643                 exit(1);
644         }
645 }
646
647 //**********************************************************************************************************************
648 int GetOTURepCommand::process(ListVector* processList) {
649         try{
650                 string name, sequence;
651                 string nameRep;
652
653                 //create output file
654                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
655                                 
656                 ofstream newNamesOutput;
657                 string outputNamesFile;
658                 map<string, ofstream*> filehandles;
659                 
660                 if (Groups.size() == 0) { //you don't want to use groups
661                         outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
662                         m->openOutputFile(outputNamesFile, newNamesOutput);
663                         outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
664                         outputNameFiles[outputNamesFile] = processList->getLabel();
665                 }else{ //you want to use groups
666                         ofstream* temp;
667                         for (int i=0; i<Groups.size(); i++) {
668                                 temp = new ofstream;
669                                 filehandles[Groups[i]] = temp;
670                                 outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
671                                 
672                                 m->openOutputFile(outputNamesFile, *(temp));
673                                 outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
674                                 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
675                         }
676                 }
677                 
678                 //for each bin in the list vector
679                 for (int i = 0; i < processList->size(); i++) {
680                         if (m->control_pressed) { 
681                                 out.close();  
682                                 if (Groups.size() == 0) { //you don't want to use groups
683                                         newNamesOutput.close();
684                                 }else{
685                                         for (int j=0; j<Groups.size(); j++) {
686                                                 (*(filehandles[Groups[j]])).close();
687                                                 delete filehandles[Groups[j]];
688                                         }
689                                 }
690                                 return 0; 
691                         }
692                         
693                         string temp = processList->get(i);
694                         vector<string> namesInBin;
695                         m->splitAtComma(temp, namesInBin);
696                         
697                         if (Groups.size() == 0) {
698                                 nameRep = findRep(namesInBin);
699                                 newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
700                         }else{
701                                 map<string, vector<string> > NamesInGroup;
702                                 for (int j=0; j<Groups.size(); j++) { //initialize groups
703                                         NamesInGroup[Groups[j]].resize(0);
704                                 }
705                                 
706                                 for (int j=0; j<namesInBin.size(); j++) {
707                                         string thisgroup = groupMap->getGroup(namesInBin[j]);
708                                         
709                                         if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
710                                         
711                                         if (m->inUsersGroups(thisgroup, Groups)) { //add this name to correct group
712                                                 NamesInGroup[thisgroup].push_back(namesInBin[j]);
713                                         }
714                                 }
715                                 
716                                 //get rep for each group in otu
717                                 for (int j=0; j<Groups.size(); j++) {
718                                         if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
719                                                 //get rep for each group
720                                                 nameRep = findRep(NamesInGroup[Groups[j]]);
721                                                 
722                                                 //output group rep and other members of this group
723                                                 (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
724                                                 
725                                                 for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
726                                                         (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][k] << ",";
727                                                 }
728                                                 //output last name
729                                                 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] << endl;
730                                         }
731                                 }
732                         }
733                 }
734                 
735                 if (Groups.size() == 0) { //you don't want to use groups
736                         newNamesOutput.close();
737                 }else{
738                         for (int i=0; i<Groups.size(); i++) {
739                                 (*(filehandles[Groups[i]])).close();
740                                 delete filehandles[Groups[i]];
741                         }
742                 }
743                 
744                 return 0;
745
746         }
747         catch(exception& e) {
748                 m->errorOut(e, "GetOTURepCommand", "process");
749                 exit(1);
750         }
751 }
752 //**********************************************************************************************************************
753 int GetOTURepCommand::processNames(string filename, string label) {
754         try{
755
756                 //create output file
757                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
758                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + ".rep.fasta";
759                 m->openOutputFile(outputFileName, out);
760                 vector<repStruct> reps;
761                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
762                 
763                 ofstream out2;
764                 string tempNameFile = filename + ".temp";
765                 m->openOutputFile(tempNameFile, out2);
766                 
767                 ifstream in;
768                 m->openInputFile(filename, in);
769                 
770                 int i = 0;
771                 while (!in.eof()) {
772                         string rep, binnames;
773                         in >> i >> rep >> binnames; m->gobble(in);
774                         out2 << rep << '\t' << binnames << endl;
775                         
776                         vector<string> names;
777                         m->splitAtComma(binnames, names);
778                         int binsize = names.size();
779                         
780                         //if you have a groupfile
781                         string group = "";
782                         if (groupfile != "") {
783                                 map<string, string> groups;
784                                 map<string, string>::iterator groupIt;
785                                 
786                                 //find the groups that are in this bin
787                                 for (size_t i = 0; i < names.size(); i++) {
788                                         string groupName = groupMap->getGroup(names[i]);
789                                         if (groupName == "not found") {  
790                                                 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
791                                                 groupError = true;
792                                         } else {
793                                                 groups[groupName] = groupName;
794                                         }
795                                 }
796                                 
797                                 //turn the groups into a string
798                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
799                                         group += groupIt->first + "-";
800                                 }
801                                 //rip off last dash
802                                 group = group.substr(0, group.length()-1);
803                         }else{ group = ""; }
804
805                         
806                         //print out name and sequence for that bin
807                         string sequence = fasta->getSequence(rep);
808
809                         if (sequence != "not found") {
810                                 if (sorted == "") { //print them out
811                                         rep = rep + "\t" + toString(i+1);
812                                         rep = rep + "|" + toString(binsize);
813                                         if (groupfile != "") {
814                                                 rep = rep + "|" + group;
815                                         }
816                                         out << ">" << rep << endl;
817                                         out << sequence << endl;
818                                 }else { //save them
819                                         repStruct newRep(rep, i+1, binsize, group);
820                                         reps.push_back(newRep);
821                                 }
822                         }else { 
823                                 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
824                         }
825                 }
826                 
827                         
828                 if (sorted != "") { //then sort them and print them
829                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
830                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
831                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
832                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
833                         
834                         //print them
835                         for (int i = 0; i < reps.size(); i++) {
836                                 string sequence = fasta->getSequence(reps[i].name);
837                                 string outputName = reps[i].name + "\t" + toString(reps[i].bin);
838                                 outputName = outputName + "|" + toString(reps[i].size);
839                                 if (groupfile != "") {
840                                         outputName = outputName + "|" + reps[i].group;
841                                 }
842                                 out << ">" << outputName << endl;
843                                 out << sequence << endl;
844                         }
845                 }
846                 
847                 in.close();
848                 out.close();
849                 out2.close();
850                 
851                 remove(filename.c_str());
852                 rename(tempNameFile.c_str(), filename.c_str());
853                 
854                 return 0;
855
856         }
857         catch(exception& e) {
858                 m->errorOut(e, "GetOTURepCommand", "processNames");
859                 exit(1);
860         }
861 }
862 //**********************************************************************************************************************
863 SeqMap GetOTURepCommand::getMap(int row) {
864         try {
865                 SeqMap rowMap;
866                 
867                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
868                 if (rowPositions[row] != -1){
869                         //go to row in file
870                         inRow.seekg(rowPositions[row]);
871                         
872                         int rowNum, numDists, colNum;
873                         float dist;
874                         
875                         inRow >> rowNum >> numDists;
876                         
877                         for(int i = 0; i < numDists; i++) {
878                                 inRow >> colNum >> dist;
879                                 rowMap[colNum] = dist;
880                                 
881                         }
882                 }
883                 
884                 return rowMap;
885         }
886         catch(exception& e) {
887                 m->errorOut(e, "GetOTURepCommand", "getMap");
888                 exit(1);
889         }
890 }
891 //**********************************************************************************************************************
892