]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
fixes while testing
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38 //**********************************************************************************************************************
39 GetOTURepCommand::GetOTURepCommand(){   
40         try {
41                 abort = true;
42                 //initialize outputTypes
43                 vector<string> tempOutNames;
44                 outputTypes["fasta"] = tempOutNames;
45                 outputTypes["name"] = tempOutNames;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> GetOTURepCommand::getValidParameters(){  
54         try {
55                 string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
56                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "GetOTURepCommand", "getValidParameters");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 vector<string> GetOTURepCommand::getRequiredParameters(){       
66         try {
67                 string Array[] =  {"fasta","list"};
68                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
69                 return myArray;
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "GetOTURepCommand", "getRequiredParameters");
73                 exit(1);
74         }
75 }
76 //**********************************************************************************************************************
77 vector<string> GetOTURepCommand::getRequiredFiles(){    
78         try {
79                 vector<string> myArray;
80                 return myArray;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "GetOTURepCommand", "getRequiredFiles");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 GetOTURepCommand::GetOTURepCommand(string option)  {
89         try{
90                 globaldata = GlobalData::getInstance();
91                 abort = false;
92                 allLines = 1;
93                 labels.clear();
94                                 
95                 //allow user to run help
96                 if (option == "help") { 
97                         help(); abort = true;
98                 } else {
99                         //valid paramters for this command
100                         string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
101                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
102                         
103                         OptionParser parser(option);
104                         map<string, string> parameters = parser.getParameters();
105                         
106                         ValidParameters validParameter;
107                         map<string, string>::iterator it;
108                 
109                         //check to make sure all parameters are valid for command
110                         for (it = parameters.begin(); it != parameters.end(); it++) { 
111                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
112                         }
113                         
114                         //initialize outputTypes
115                         vector<string> tempOutNames;
116                         outputTypes["fasta"] = tempOutNames;
117                         outputTypes["name"] = tempOutNames;
118                         
119                         //if the user changes the input directory command factory will send this info to us in the output parameter 
120                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
121                         if (inputDir == "not found"){   inputDir = "";          }
122                         else {
123                                 string path;
124                                 it = parameters.find("list");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
130                                 }
131                                 
132                                 it = parameters.find("fasta");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
138                                 }
139                                 
140                                 it = parameters.find("phylip");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
146                                 }
147                                 
148                                 it = parameters.find("column");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
154                                 }
155                                 
156                                 it = parameters.find("name");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
162                                 }
163                                 
164                                 it = parameters.find("group");
165                                 //user has given a template file
166                                 if(it != parameters.end()){ 
167                                         path = m->hasPath(it->second);
168                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
170                                 }
171                         }
172
173                         
174                         //if the user changes the output directory command factory will send this info to us in the output parameter 
175                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
176                         
177                         //check for required parameters
178                         fastafile = validParameter.validFile(parameters, "fasta", true);
179                         if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
180                         else if (fastafile == "not open") { abort = true; }     
181                 
182                         listfile = validParameter.validFile(parameters, "list", true);
183                         if (listfile == "not found") { m->mothurOut("list is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
184                         else if (listfile == "not open") { abort = true; }      
185                         
186                         phylipfile = validParameter.validFile(parameters, "phylip", true);
187                         if (phylipfile == "not found") { phylipfile = "";  }
188                         else if (phylipfile == "not open") { abort = true; }    
189                         else { distFile = phylipfile; format = "phylip";   }
190                         
191                         columnfile = validParameter.validFile(parameters, "column", true);
192                         if (columnfile == "not found") { columnfile = ""; }
193                         else if (columnfile == "not open") { abort = true; }    
194                         else { distFile = columnfile; format = "column";   }
195                         
196                         namefile = validParameter.validFile(parameters, "name", true);
197                         if (namefile == "not open") { abort = true; }   
198                         else if (namefile == "not found") { namefile = ""; }
199                         
200                         if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a get.oturep command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
201                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
202                 
203                         if (columnfile != "") {  if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }  }
204
205                         //check for optional parameter and set defaults
206                         // ...at some point should added some additional type checking...
207                         label = validParameter.validFile(parameters, "label", false);                   
208                         if (label == "not found") { label = ""; allLines = 1;  }
209                         else { 
210                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
211                                 else { allLines = 1;  }
212                         }
213                         
214                         groupfile = validParameter.validFile(parameters, "group", true);
215                         if (groupfile == "not open") { groupfile = ""; abort = true; }
216                         else if (groupfile == "not found") { groupfile = ""; }
217                                                 
218                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
219                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
220                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
221                                 sorted = "";
222                         }
223                         
224                         if ((sorted == "group") && (groupfile == "")) {
225                                 m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
226                                 sorted = "";
227                         }
228                         
229                         groups = validParameter.validFile(parameters, "groups", false);                 
230                         if (groups == "not found") { groups = ""; }
231                         else { 
232                                 if (groupfile == "") {
233                                         m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
234                                         abort = true;
235                                 }else { 
236                                         m->splitAtDash(groups, Groups);
237                                 }
238                         }
239                         globaldata->Groups = Groups;
240                         
241                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
242                         large = m->isTrue(temp);
243                         
244                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
245                         convert(temp, precision); 
246                         
247                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
248                         convert(temp, cutoff); 
249                         cutoff += (5 / (precision * 10.0));
250                 }
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
254                 exit(1);
255         }
256 }
257
258 //**********************************************************************************************************************
259
260 void GetOTURepCommand::help(){
261         try {
262                 m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, groups, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
263                 m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
264                 m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
265                 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
266                 m->mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
267                 m->mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
268                 m->mothurOut("The default value for label is all labels in your inputfile.\n");
269                 m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
270                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
271                 m->mothurOut("The group parameter allows you provide a group file.\n");
272                 m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n");
273                 m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
274                 m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
275                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
276         }
277         catch(exception& e) {
278                 m->errorOut(e, "GetOTURepCommand", "help");
279                 exit(1);
280         }
281 }
282
283 //**********************************************************************************************************************
284
285 GetOTURepCommand::~GetOTURepCommand(){}
286
287 //**********************************************************************************************************************
288
289 int GetOTURepCommand::execute(){
290         try {
291         
292                 if (abort == true) { return 0; }
293                 int error;
294                 
295                 if (!large) {
296                         //read distance files
297                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
298                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
299                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
300                         
301                         readMatrix->setCutoff(cutoff);
302         
303                         if(namefile != ""){     
304                                 nameMap = new NameAssignment(namefile);
305                                 nameMap->readMap();
306                         }else{  nameMap = NULL;         }
307                         
308                         readMatrix->read(nameMap);
309                         
310                         if (m->control_pressed) { delete readMatrix; return 0; }
311
312                         //get matrix
313                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
314                         globaldata->gListVector = readMatrix->getListVector();
315
316                         SparseMatrix* matrix = readMatrix->getMatrix();
317                         
318                         // Create a data structure to quickly access the distance information.
319                         // It consists of a vector of distance maps, where each map contains
320                         // all distances of a certain sequence. Vector and maps are accessed
321                         // via the index of a sequence in the distance matrix
322                         seqVec = vector<SeqMap>(globaldata->gListVector->size()); 
323                         for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
324                                 if (m->control_pressed) { delete readMatrix; return 0; }
325                                 seqVec[currentCell->row][currentCell->column] = currentCell->dist;
326                         }
327                         
328                         delete matrix;
329                         delete readMatrix;
330                         delete nameMap;
331                         
332                         if (m->control_pressed) { return 0; }
333                 }else {
334                         //process file and set up indexes
335                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
336                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
337                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
338                         
339                         formatMatrix->setCutoff(cutoff);
340         
341                         if(namefile != ""){     
342                                 nameMap = new NameAssignment(namefile);
343                                 nameMap->readMap();
344                         }else{  nameMap = NULL;         }
345                         
346                         formatMatrix->read(nameMap);
347                         
348                         if (m->control_pressed) { delete formatMatrix;  return 0; }
349
350                         //get matrix
351                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
352                         globaldata->gListVector = formatMatrix->getListVector();
353                         
354                         distFile = formatMatrix->getFormattedFileName();
355                         
356                         //positions in file where the distances for each sequence begin
357                         //rowPositions[1] = position in file where distance related to sequence 1 start.
358                         rowPositions = formatMatrix->getRowPositions();
359                         
360                         delete formatMatrix;
361                         delete nameMap;
362                         
363                         //openfile for getMap to use
364                         m->openInputFile(distFile, inRow);
365                         
366                         if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; }
367                 }
368                 
369                 
370                 //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
371                 if (globaldata->gListVector != NULL) {
372                         vector<string> names;
373                         string binnames;
374                         //map names to rows in sparsematrix
375                         for (int i = 0; i < globaldata->gListVector->size(); i++) {
376                                 names.clear();
377                                 binnames = globaldata->gListVector->get(i);
378                                 
379                                 m->splitAtComma(binnames, names);
380                                 
381                                 for (int j = 0; j < names.size(); j++) {
382                                         nameToIndex[names[j]] = i;
383                                 }
384                         }
385                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
386                 
387                                 
388                 if (m->control_pressed) { 
389                         if (large) {  inRow.close(); remove(distFile.c_str());  }
390                         return 0; 
391                 }
392                 
393                 if (groupfile != "") {
394                         //read in group map info.
395                         groupMap = new GroupMap(groupfile);
396                         int error = groupMap->readMap();
397                         if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
398                         
399                         if (Groups.size() != 0) {
400                                 SharedUtil* util = new SharedUtil();
401                                 util->setGroups(Groups, groupMap->namesOfGroups, "getoturep");
402                                 delete util;
403                         }
404                 }
405                                                                 
406                 //set format to list so input can get listvector
407                 globaldata->setFormat("list");
408         
409                 //read list file
410                 read = new ReadOTUFile(listfile);
411                 read->read(&*globaldata); 
412                 
413                 input = globaldata->ginput;
414                 list = globaldata->gListVector;
415                 string lastLabel = list->getLabel();
416
417                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
418                 set<string> processedLabels;
419                 set<string> userLabels = labels;
420                 
421                 if (m->control_pressed) { 
422                         if (large) {  inRow.close(); remove(distFile.c_str());  }
423                         delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
424                 }
425         
426                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
427                         
428                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
429                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
430                                         error = process(list);
431                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
432                                         
433                                         if (m->control_pressed) { 
434                                                 if (large) {  inRow.close(); remove(distFile.c_str());  }
435                                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
436                                                 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
437                                         }
438                                         
439                                         processedLabels.insert(list->getLabel());
440                                         userLabels.erase(list->getLabel());
441                         }
442                         
443                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
444                                         string saveLabel = list->getLabel();
445                                         
446                                         delete list;
447                                         list = input->getListVector(lastLabel);
448                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
449                                         error = process(list);
450                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
451                                         
452                                         if (m->control_pressed) { 
453                                                 if (large) {  inRow.close(); remove(distFile.c_str());  }
454                                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
455                                                 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
456                                         }
457                                         
458                                         processedLabels.insert(list->getLabel());
459                                         userLabels.erase(list->getLabel());
460                                         
461                                         //restore real lastlabel to save below
462                                         list->setLabel(saveLabel);
463                         }
464                         
465                         lastLabel = list->getLabel();
466         
467                         delete list;
468                         list = input->getListVector();
469                 }
470                 
471                 //output error messages about any remaining user labels
472                 bool needToRun = false;
473                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
474                         m->mothurOut("Your file does not include the label " + (*it)); 
475                         if (processedLabels.count(lastLabel) != 1) {
476                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
477                                 needToRun = true;
478                         }else {
479                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
480                         }
481                 }
482                 
483                 //run last label if you need to
484                 if (needToRun == true)  {
485                         if (list != NULL) {     delete list;    }
486                         list = input->getListVector(lastLabel);
487                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
488                         error = process(list);
489                         delete list;
490                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
491                         
492                         if (m->control_pressed) { 
493                                         if (large) {  inRow.close(); remove(distFile.c_str());  }
494                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
495                                         delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
496                         }
497                 }
498                 
499                 //close and remove formatted matrix file
500                 if (large) {
501                         inRow.close();
502                         remove(distFile.c_str());
503                 }
504                 
505                 globaldata->gListVector = NULL;
506                 delete input;  globaldata->ginput = NULL;
507                 delete read;
508                 
509                 //read fastafile
510                 fasta = new FastaMap();
511                 fasta->readFastaFile(fastafile);
512                 
513                 //if user gave a namesfile then use it
514                 if (namefile != "") {   readNamesFile();        }
515                 
516                 //output create and output the .rep.fasta files
517                 map<string, string>::iterator itNameFile;
518                 for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
519                         processNames(itNameFile->first, itNameFile->second);
520                 }
521                 
522                 delete fasta;
523                 if (groupfile != "") {
524                         delete groupMap;  globaldata->gGroupmap = NULL;
525                 }
526                 
527                 if (m->control_pressed) {  return 0; }
528                 
529                 m->mothurOutEndLine();
530                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
531                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
532                 m->mothurOutEndLine();
533                 
534                 return 0;
535         }
536         catch(exception& e) {
537                 m->errorOut(e, "GetOTURepCommand", "execute");
538                 exit(1);
539         }
540 }
541
542 //**********************************************************************************************************************
543 void GetOTURepCommand::readNamesFile() {
544         try {
545                 vector<string> dupNames;
546                 m->openInputFile(namefile, inNames);
547                 
548                 string name, names, sequence;
549         
550                 while(inNames){
551                         inNames >> name;                        //read from first column  A
552                         inNames >> names;               //read from second column  A,B,C,D
553                         
554                         dupNames.clear();
555                         
556                         //parse names into vector
557                         m->splitAtComma(names, dupNames);
558                         
559                         //store names in fasta map
560                         sequence = fasta->getSequence(name);
561                         for (int i = 0; i < dupNames.size(); i++) {
562                                 fasta->push_back(dupNames[i], sequence);
563                         }
564                 
565                         m->gobble(inNames);
566                 }
567                 inNames.close();
568
569         }
570         catch(exception& e) {
571                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
572                 exit(1);
573         }
574 }
575 //**********************************************************************************************************************
576 string GetOTURepCommand::findRep(vector<string> names) {
577         try{
578                 // if only 1 sequence in bin or processing the "unique" label, then 
579                 // the first sequence of the OTU is the representative one
580                 if ((names.size() == 2) || (names.size() == 1) || (list->getLabel() == "unique")) {
581                         return names[0];
582                 }else{
583                         vector<int> seqIndex(names.size());
584                         vector<float> max_dist(names.size());
585                         vector<float> total_dist(names.size());
586
587                         //fill seqIndex and initialize sums
588                         for (size_t i = 0; i < names.size(); i++) {
589                                 seqIndex[i] = nameToIndex[names[i]];
590                                 max_dist[i] = 0.0;
591                                 total_dist[i] = 0.0;
592                         }
593                         
594                         // loop through all entries in seqIndex
595                         SeqMap::iterator it;
596                         SeqMap currMap;
597                         for (size_t i=0; i < seqIndex.size(); i++) {
598                                 if (m->control_pressed) {  return  "control"; }
599                         
600                                 if (!large) {   currMap = seqVec[seqIndex[i]];  }
601                                 else            {       currMap = getMap(seqIndex[i]);  }
602                                 
603                                 for (size_t j=0; j < seqIndex.size(); j++) {
604                                         it = currMap.find(seqIndex[j]);         
605                                         if (it != currMap.end()) {
606                                                 max_dist[i] = max(max_dist[i], it->second);
607                                                 max_dist[j] = max(max_dist[j], it->second);
608                                                 total_dist[i] += it->second;
609                                                 total_dist[j] += it->second;
610                                         }else{ //if you can't find the distance make it the cutoff
611                                                 max_dist[i] = max(max_dist[i], cutoff);
612                                                 max_dist[j] = max(max_dist[j], cutoff);
613                                                 total_dist[i] += cutoff;
614                                                 total_dist[j] += cutoff;
615                                         }
616                                 }
617                         }
618                         
619                         // sequence with the smallest maximum distance is the representative
620                         //if tie occurs pick sequence with smallest average distance
621                         float min = 10000;
622                         int minIndex;
623                         for (size_t i=0; i < max_dist.size(); i++) {
624                                 if (m->control_pressed) {  return  "control"; }
625                                 if (max_dist[i] < min) {
626                                         min = max_dist[i];
627                                         minIndex = i;
628                                 }else if (max_dist[i] == min) {
629                                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
630                                         float newAverage = total_dist[i] / (float) total_dist.size();
631                                         
632                                         if (newAverage < currentAverage) {
633                                                 min = max_dist[i];
634                                                 minIndex = i;
635                                         }
636                                 }
637                         }
638                         
639                         return(names[minIndex]);
640                 }
641         }
642         catch(exception& e) {
643                 m->errorOut(e, "GetOTURepCommand", "FindRep");
644                 exit(1);
645         }
646 }
647
648 //**********************************************************************************************************************
649 int GetOTURepCommand::process(ListVector* processList) {
650         try{
651                 string name, sequence;
652                 string nameRep;
653
654                 //create output file
655                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
656                                 
657                 ofstream newNamesOutput;
658                 string outputNamesFile;
659                 map<string, ofstream*> filehandles;
660                 
661                 if (Groups.size() == 0) { //you don't want to use groups
662                         outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
663                         m->openOutputFile(outputNamesFile, newNamesOutput);
664                         outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
665                         outputNameFiles[outputNamesFile] = processList->getLabel();
666                 }else{ //you want to use groups
667                         ofstream* temp;
668                         for (int i=0; i<Groups.size(); i++) {
669                                 temp = new ofstream;
670                                 filehandles[Groups[i]] = temp;
671                                 outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
672                                 
673                                 m->openOutputFile(outputNamesFile, *(temp));
674                                 outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
675                                 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
676                         }
677                 }
678                 
679                 //for each bin in the list vector
680                 for (int i = 0; i < processList->size(); i++) {
681                         if (m->control_pressed) { 
682                                 out.close();  
683                                 if (Groups.size() == 0) { //you don't want to use groups
684                                         newNamesOutput.close();
685                                 }else{
686                                         for (int j=0; j<Groups.size(); j++) {
687                                                 (*(filehandles[Groups[j]])).close();
688                                                 delete filehandles[Groups[j]];
689                                         }
690                                 }
691                                 return 0; 
692                         }
693                         
694                         string temp = processList->get(i);
695                         vector<string> namesInBin;
696                         m->splitAtComma(temp, namesInBin);
697                         
698                         if (Groups.size() == 0) {
699                                 nameRep = findRep(namesInBin);
700                                 newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
701                         }else{
702                                 map<string, vector<string> > NamesInGroup;
703                                 for (int j=0; j<Groups.size(); j++) { //initialize groups
704                                         NamesInGroup[Groups[j]].resize(0);
705                                 }
706                                 
707                                 for (int j=0; j<namesInBin.size(); j++) {
708                                         string thisgroup = groupMap->getGroup(namesInBin[j]);
709                                         
710                                         if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
711                                         
712                                         if (m->inUsersGroups(thisgroup, Groups)) { //add this name to correct group
713                                                 NamesInGroup[thisgroup].push_back(namesInBin[j]);
714                                         }
715                                 }
716                                 
717                                 //get rep for each group in otu
718                                 for (int j=0; j<Groups.size(); j++) {
719                                         if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
720                                                 //get rep for each group
721                                                 nameRep = findRep(NamesInGroup[Groups[j]]);
722                                                 
723                                                 //output group rep and other members of this group
724                                                 (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
725                                                 
726                                                 for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
727                                                         (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][k] << ",";
728                                                 }
729                                                 //output last name
730                                                 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] << endl;
731                                         }
732                                 }
733                         }
734                 }
735                 
736                 if (Groups.size() == 0) { //you don't want to use groups
737                         newNamesOutput.close();
738                 }else{
739                         for (int i=0; i<Groups.size(); i++) {
740                                 (*(filehandles[Groups[i]])).close();
741                                 delete filehandles[Groups[i]];
742                         }
743                 }
744                 
745                 return 0;
746
747         }
748         catch(exception& e) {
749                 m->errorOut(e, "GetOTURepCommand", "process");
750                 exit(1);
751         }
752 }
753 //**********************************************************************************************************************
754 int GetOTURepCommand::processNames(string filename, string label) {
755         try{
756
757                 //create output file
758                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
759                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + ".rep.fasta";
760                 m->openOutputFile(outputFileName, out);
761                 vector<repStruct> reps;
762                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
763                 
764                 ofstream out2;
765                 string tempNameFile = filename + ".temp";
766                 m->openOutputFile(tempNameFile, out2);
767                 
768                 ifstream in;
769                 m->openInputFile(filename, in);
770                 
771                 int i = 0;
772                 while (!in.eof()) {
773                         string rep, binnames;
774                         in >> i >> rep >> binnames; m->gobble(in);
775                         out2 << rep << '\t' << binnames << endl;
776                         
777                         vector<string> names;
778                         m->splitAtComma(binnames, names);
779                         int binsize = names.size();
780                         
781                         //if you have a groupfile
782                         string group = "";
783                         if (groupfile != "") {
784                                 map<string, string> groups;
785                                 map<string, string>::iterator groupIt;
786                                 
787                                 //find the groups that are in this bin
788                                 for (size_t i = 0; i < names.size(); i++) {
789                                         string groupName = groupMap->getGroup(names[i]);
790                                         if (groupName == "not found") {  
791                                                 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
792                                                 groupError = true;
793                                         } else {
794                                                 groups[groupName] = groupName;
795                                         }
796                                 }
797                                 
798                                 //turn the groups into a string
799                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
800                                         group += groupIt->first + "-";
801                                 }
802                                 //rip off last dash
803                                 group = group.substr(0, group.length()-1);
804                         }else{ group = ""; }
805
806                         
807                         //print out name and sequence for that bin
808                         string sequence = fasta->getSequence(rep);
809
810                         if (sequence != "not found") {
811                                 if (sorted == "") { //print them out
812                                         rep = rep + "\t" + toString(i+1);
813                                         rep = rep + "|" + toString(binsize);
814                                         if (groupfile != "") {
815                                                 rep = rep + "|" + group;
816                                         }
817                                         out << ">" << rep << endl;
818                                         out << sequence << endl;
819                                 }else { //save them
820                                         repStruct newRep(rep, i+1, binsize, group);
821                                         reps.push_back(newRep);
822                                 }
823                         }else { 
824                                 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
825                         }
826                 }
827                 
828                         
829                 if (sorted != "") { //then sort them and print them
830                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
831                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
832                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
833                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
834                         
835                         //print them
836                         for (int i = 0; i < reps.size(); i++) {
837                                 string sequence = fasta->getSequence(reps[i].name);
838                                 string outputName = reps[i].name + "\t" + toString(reps[i].bin);
839                                 outputName = outputName + "|" + toString(reps[i].size);
840                                 if (groupfile != "") {
841                                         outputName = outputName + "|" + reps[i].group;
842                                 }
843                                 out << ">" << outputName << endl;
844                                 out << sequence << endl;
845                         }
846                 }
847                 
848                 in.close();
849                 out.close();
850                 out2.close();
851                 
852                 remove(filename.c_str());
853                 rename(tempNameFile.c_str(), filename.c_str());
854                 
855                 return 0;
856
857         }
858         catch(exception& e) {
859                 m->errorOut(e, "GetOTURepCommand", "processNames");
860                 exit(1);
861         }
862 }
863 //**********************************************************************************************************************
864 SeqMap GetOTURepCommand::getMap(int row) {
865         try {
866                 SeqMap rowMap;
867                 
868                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
869                 if (rowPositions[row] != -1){
870                         //go to row in file
871                         inRow.seekg(rowPositions[row]);
872                         
873                         int rowNum, numDists, colNum;
874                         float dist;
875                         
876                         inRow >> rowNum >> numDists;
877                         
878                         for(int i = 0; i < numDists; i++) {
879                                 inRow >> colNum >> dist;
880                                 rowMap[colNum] = dist;
881                                 
882                         }
883                 }
884                 
885                 return rowMap;
886         }
887         catch(exception& e) {
888                 m->errorOut(e, "GetOTURepCommand", "getMap");
889                 exit(1);
890         }
891 }
892 //**********************************************************************************************************************
893