]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38
39 //**********************************************************************************************************************
40 vector<string> GetOTURepCommand::setParameters(){       
41         try {
42                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","name",false,true, true); parameters.push_back(plist);
43                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,false, true); parameters.push_back(pfasta);
44                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false, true); parameters.push_back(pphylip);
45         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","",false,false, true); parameters.push_back(pname);
46         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "ColumnName","count",false,false, true); parameters.push_back(pcount);
47                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false, true); parameters.push_back(pgroup);
48                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName","",false,false, true); parameters.push_back(pcolumn);
49                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
50                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
51                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pcutoff);
52                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
53                 CommandParameter pweighted("weighted", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pweighted);
54                 CommandParameter psorted("sorted", "Multiple", "none-name-bin-size-group", "none", "", "", "","",false,false); parameters.push_back(psorted);
55                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plarge);
56                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
57                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
58                 
59                 vector<string> myArray;
60                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
61                 return myArray;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "GetOTURepCommand", "setParameters");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 string GetOTURepCommand::getHelpString(){       
70         try {
71                 string helpString = "";
72                 helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, count, large, weighted, cutoff, precision, groups, sorted and label.  The list parameter is required, as well as phylip or column and name, unless you have valid current files.\n";
73                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
74                 helpString += "The phylip or column parameter is required, but only one may be used.  If you use a column file the name or count filename is required. \n";
75                 helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
76                 helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
77                 helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n";
78                 helpString += "The default value for label is all labels in your inputfile.\n";
79                 helpString += "The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n";
80                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
81                 helpString += "The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true.  The default value is false.\n";
82                 helpString += "The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n";
83                 helpString += "For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n";
84                 helpString += "For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n";
85                 helpString += "If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n";
86                 helpString += "The group parameter allows you provide a group file.\n";
87                 helpString += "The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n";
88                 helpString += "The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n";
89                 helpString += "If you provide a groupfile, then it also appends the names of the groups present in that bin.\n";
90                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
91                 return helpString;
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "GetOTURepCommand", "getHelpString");
95                 exit(1);
96         }
97 }
98 //**********************************************************************************************************************
99 string GetOTURepCommand::getOutputPattern(string type) {
100     try {
101         string pattern = "";
102         
103         if (type == "fasta") {  pattern = "[filename],[tag],rep.fasta-[filename],[tag],[group],rep.fasta"; } 
104         else if (type == "name") {  pattern = "[filename],[tag],rep.names-[filename],[tag],[group],rep.names"; } 
105         else if (type == "count") {  pattern = "[filename],[tag],rep.count_table-[filename],[tag],[group],rep.count_table"; }
106         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
107         
108         return pattern;
109     }
110     catch(exception& e) {
111         m->errorOut(e, "GetOTURepCommand", "getOutputPattern");
112         exit(1);
113     }
114 }
115 //**********************************************************************************************************************
116 GetOTURepCommand::GetOTURepCommand(){   
117         try {
118                 abort = true; calledHelp = true; 
119                 setParameters();
120                 vector<string> tempOutNames;
121                 outputTypes["fasta"] = tempOutNames;
122                 outputTypes["name"] = tempOutNames;
123         outputTypes["count"] = tempOutNames;
124         }
125         catch(exception& e) {
126                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
127                 exit(1);
128         }
129 }
130 //**********************************************************************************************************************
131 GetOTURepCommand::GetOTURepCommand(string option)  {
132         try{
133                 abort = false; calledHelp = false;   
134                 allLines = 1;
135                                 
136                 //allow user to run help
137                 if (option == "help") { 
138                         help(); abort = true; calledHelp = true;
139                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;
140                 } else {
141                         vector<string> myArray = setParameters();
142                         
143                         OptionParser parser(option);
144                         map<string, string> parameters = parser.getParameters();
145                         
146                         ValidParameters validParameter;
147                         map<string, string>::iterator it;
148                 
149                         //check to make sure all parameters are valid for command
150                         for (it = parameters.begin(); it != parameters.end(); it++) { 
151                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
152                         }
153                         
154                         //initialize outputTypes
155                         vector<string> tempOutNames;
156                         outputTypes["fasta"] = tempOutNames;
157                         outputTypes["name"] = tempOutNames;
158             outputTypes["count"] = tempOutNames;
159                         
160                         //if the user changes the input directory command factory will send this info to us in the output parameter 
161                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
162                         if (inputDir == "not found"){   inputDir = "";          }
163                         else {
164                                 string path;
165                                 it = parameters.find("list");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
171                                 }
172                                 
173                                 it = parameters.find("fasta");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("phylip");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
187                                 }
188                                 
189                                 it = parameters.find("column");
190                                 //user has given a template file
191                                 if(it != parameters.end()){ 
192                                         path = m->hasPath(it->second);
193                                         //if the user has not given a path then, add inputdir. else leave path alone.
194                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
195                                 }
196                                 
197                                 it = parameters.find("name");
198                                 //user has given a template file
199                                 if(it != parameters.end()){ 
200                                         path = m->hasPath(it->second);
201                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
203                                 }
204                                 
205                                 it = parameters.find("group");
206                                 //user has given a template file
207                                 if(it != parameters.end()){ 
208                                         path = m->hasPath(it->second);
209                                         //if the user has not given a path then, add inputdir. else leave path alone.
210                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
211                                 }
212                 
213                 it = parameters.find("count");
214                                 //user has given a template file
215                                 if(it != parameters.end()){ 
216                                         path = m->hasPath(it->second);
217                                         //if the user has not given a path then, add inputdir. else leave path alone.
218                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
219                                 }
220                         }
221
222                         
223                         //if the user changes the output directory command factory will send this info to us in the output parameter 
224                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
225                         
226                         //check for required parameters
227                         fastafile = validParameter.validFile(parameters, "fasta", true);
228                         if (fastafile == "not found") { fastafile = ""; }
229                         else if (fastafile == "not open") { abort = true; }     
230                         else { m->setFastaFile(fastafile); }
231                 
232                         listfile = validParameter.validFile(parameters, "list", true);
233                         if (listfile == "not found") {                  
234                                 listfile = m->getListFile(); 
235                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
236                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
237                         }
238                         else if (listfile == "not open") { abort = true; }      
239                         else { m->setListFile(listfile); }
240                         
241                         phylipfile = validParameter.validFile(parameters, "phylip", true);
242                         if (phylipfile == "not found") { phylipfile = "";  }
243                         else if (phylipfile == "not open") { abort = true; }    
244                         else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);   }
245                         
246                         columnfile = validParameter.validFile(parameters, "column", true);
247                         if (columnfile == "not found") { columnfile = ""; }
248                         else if (columnfile == "not open") { abort = true; }    
249                         else { distFile = columnfile; format = "column";  m->setColumnFile(columnfile); }
250                         
251                         namefile = validParameter.validFile(parameters, "name", true);
252                         if (namefile == "not open") { abort = true; }   
253                         else if (namefile == "not found") { namefile = ""; }
254                         else { m->setNameFile(namefile); }
255             
256             hasGroups = false;
257             countfile = validParameter.validFile(parameters, "count", true);
258                         if (countfile == "not found") { countfile =  "";   }
259                         else if (countfile == "not open") { abort = true; countfile =  ""; }    
260                         else {   
261                 m->setCountTableFile(countfile); 
262                 ct.readTable(countfile);
263                 if (ct.hasGroupInfo()) { hasGroups = true; }
264             }
265             
266             if ((namefile != "") && (countfile != "")) {
267                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
268             }
269             
270             if ((groupfile != "") && (countfile != "")) {
271                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
272             }
273                         
274                         if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
275                                 //give priority to column, then phylip
276                                 columnfile = m->getColumnFile(); 
277                                 if (columnfile != "") {  distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
278                                 else { 
279                                         phylipfile = m->getPhylipFile(); 
280                                         if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
281                                         else { 
282                                                 m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the get.oturep command."); m->mothurOutEndLine(); 
283                                                 abort = true;
284                                         }
285                                 }
286                         }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
287                 
288                         if (columnfile != "") {  
289                                 if ((namefile == "") && (countfile == "")) { 
290                                         namefile = m->getNameFile(); 
291                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
292                                         else { 
293                                                 countfile = m->getCountTableFile();
294                         if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
295                         else { 
296                             m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine(); 
297                             abort = true; 
298                         }       
299                                         }       
300                                 }
301                         }
302
303                         //check for optional parameter and set defaults
304                         // ...at some point should added some additional type checking...
305                         label = validParameter.validFile(parameters, "label", false);                   
306                         if (label == "not found") { label = ""; allLines = 1;  }
307                         else { 
308                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
309                                 else { allLines = 1;  }
310                         }
311                         
312                         groupfile = validParameter.validFile(parameters, "group", true);
313                         if (groupfile == "not open") { groupfile = ""; abort = true; }
314                         else if (groupfile == "not found") { groupfile = ""; }
315                         else { m->setGroupFile(groupfile); }
316                         
317                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
318                         if (sorted == "none") { sorted=""; }
319                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
320                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
321                                 sorted = "";
322                         }
323                         
324                         if ((sorted == "group") && ((groupfile == "")&& !hasGroups)) {
325                                 m->mothurOut("You must provide a groupfile or have a count file with group info to sort by group. I will not sort."); m->mothurOutEndLine();
326                                 sorted = "";
327                         }
328                         
329                         groups = validParameter.validFile(parameters, "groups", false);                 
330                         if (groups == "not found") { groups = ""; }
331                         else { 
332                                 if ((groupfile == "") && (!hasGroups)) {
333                                         m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
334                                         abort = true;
335                                 }else { 
336                                         m->splitAtDash(groups, Groups);
337                                 }
338                         }
339                         m->setGroups(Groups);
340                         
341                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
342                         large = m->isTrue(temp);
343                         
344                         temp = validParameter.validFile(parameters, "weighted", false);         if (temp == "not found") {       temp = "f";    }
345                         weighted = m->isTrue(temp);
346                         
347                         if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; }
348                         
349                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
350                         m->mothurConvert(temp, precision); 
351                         
352                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
353                         m->mothurConvert(temp, cutoff); 
354                         cutoff += (5 / (precision * 10.0));
355                 }
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
359                 exit(1);
360         }
361 }
362
363 //**********************************************************************************************************************
364
365 int GetOTURepCommand::execute(){
366         try {
367         
368                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
369                 int error;
370                 list = NULL;
371                 
372                 readDist();             
373                                 
374                 if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  } return 0; }
375                 
376                 if (groupfile != "") {
377                         //read in group map info.
378                         groupMap = new GroupMap(groupfile);
379                         int error = groupMap->readMap();
380                         if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
381                         
382                         if (Groups.size() != 0) {
383                                 SharedUtil util;
384                                 vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
385                                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
386                                 groupMap->setNamesOfGroups(gNamesOfGroups);
387                         }
388                 }else if (hasGroups) {
389             if (Groups.size() != 0) {
390                                 SharedUtil util;
391                                 vector<string> gNamesOfGroups = ct.getNamesOfGroups();
392                                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
393                         }
394         }
395                 
396                 //done with listvector from matrix
397                 if (list != NULL) { delete list; }
398                 
399                 input = new InputData(listfile, "list");
400                 list = input->getListVector();
401                 string lastLabel = list->getLabel();
402
403                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
404                 set<string> processedLabels;
405                 set<string> userLabels = labels;
406                 
407                 if (m->control_pressed) { 
408                         if (large) {  inRow.close(); m->mothurRemove(distFile);  }
409                         delete input; delete list; return 0; 
410                 }
411                 
412                 if ((!weighted) && (namefile != "")) { readNamesFile(weighted); }
413                 
414                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
415                         
416                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
417                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
418                                         error = process(list);
419                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
420                                         
421                                         if (m->control_pressed) { 
422                                                 if (large) {  inRow.close(); m->mothurRemove(distFile);  }
423                                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
424                                                 delete input; delete list; return 0; 
425                                         }
426                                         
427                                         processedLabels.insert(list->getLabel());
428                                         userLabels.erase(list->getLabel());
429                         }
430                         
431                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
432                                         string saveLabel = list->getLabel();
433                                         
434                                         delete list;
435                                         list = input->getListVector(lastLabel);
436                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
437                                         error = process(list);
438                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
439                                         
440                                         if (m->control_pressed) { 
441                                                 if (large) {  inRow.close(); m->mothurRemove(distFile);  }
442                                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
443                                                 delete input; delete list; return 0; 
444                                         }
445                                         
446                                         processedLabels.insert(list->getLabel());
447                                         userLabels.erase(list->getLabel());
448                                         
449                                         //restore real lastlabel to save below
450                                         list->setLabel(saveLabel);
451                         }
452                         
453                         lastLabel = list->getLabel();
454         
455                         delete list;
456                         list = input->getListVector();
457                 }
458                 
459                 //output error messages about any remaining user labels
460                 bool needToRun = false;
461                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
462                         m->mothurOut("Your file does not include the label " + (*it)); 
463                         if (processedLabels.count(lastLabel) != 1) {
464                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
465                                 needToRun = true;
466                         }else {
467                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
468                         }
469                 }
470                 
471                 //run last label if you need to
472                 if (needToRun == true)  {
473                         if (list != NULL) {     delete list;    }
474                         list = input->getListVector(lastLabel);
475                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
476                         error = process(list);
477                         delete list;
478                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
479                         
480                         if (m->control_pressed) { 
481                                         if (large) {  inRow.close(); m->mothurRemove(distFile);  }
482                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
483                                         delete input; delete list; return 0; 
484                         }
485                 }
486                 
487                 //close and remove formatted matrix file
488                 if (large) {
489                         inRow.close();
490                         m->mothurRemove(distFile);
491                 }
492                 
493                 delete input;  
494                 
495                 if (!weighted) { nameFileMap.clear(); }
496                 
497                                 
498                 if (fastafile != "") {
499                         //read fastafile
500                         fasta = new FastaMap();
501                         fasta->readFastaFile(fastafile);
502                         
503                         //if user gave a namesfile then use it
504                         if (namefile != "") {   readNamesFile();        }
505                         
506                         //output create and output the .rep.fasta files
507                         map<string, string>::iterator itNameFile;
508                         for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
509                                 processFastaNames(itNameFile->first, itNameFile->second);
510                         }
511                 }else {
512                         //output create and output the .rep.fasta files
513                         map<string, string>::iterator itNameFile;
514                         for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
515                                 processNames(itNameFile->first, itNameFile->second);
516                         }
517                 }
518                 
519                                 
520                 if (groupfile != "") { delete groupMap; }
521                 
522                 if (m->control_pressed) {  return 0; }
523                 
524                 //set fasta file as new current fastafile - use first one??
525                 string current = "";
526                 itTypes = outputTypes.find("fasta");
527                 if (itTypes != outputTypes.end()) {
528                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
529                 }
530                 
531                 itTypes = outputTypes.find("name");
532                 if (itTypes != outputTypes.end()) {
533                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
534                 }
535         
536         itTypes = outputTypes.find("count");
537                 if (itTypes != outputTypes.end()) {
538                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
539                 }
540                 
541                 m->mothurOutEndLine();
542                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
543                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
544                 m->mothurOutEndLine();
545                 
546                 return 0;
547         }
548         catch(exception& e) {
549                 m->errorOut(e, "GetOTURepCommand", "execute");
550                 exit(1);
551         }
552 }
553 //**********************************************************************************************************************
554 int GetOTURepCommand::readDist() {
555         try {
556         
557         if (!large) {
558                         //read distance files
559                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
560                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
561                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
562                         
563                         readMatrix->setCutoff(cutoff);
564             
565                         NameAssignment* nameMap = NULL;
566             if(namefile != ""){ 
567                 nameMap = new NameAssignment(namefile);
568                 nameMap->readMap();
569                 readMatrix->read(nameMap);
570             }else if (countfile != "") {
571                 readMatrix->read(&ct);
572             }else {
573                readMatrix->read(nameMap); 
574             }
575                         
576                         if (m->control_pressed) { delete readMatrix; return 0; }
577             
578                         list = readMatrix->getListVector();
579                         SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
580                         
581                         // Create a data structure to quickly access the distance information.
582                         // It consists of a vector of distance maps, where each map contains
583                         // all distances of a certain sequence. Vector and maps are accessed
584                         // via the index of a sequence in the distance matrix
585                         seqVec = vector<SeqMap>(list->size()); 
586             for (int i = 0; i < matrix->seqVec.size(); i++) {
587                 for (int j = 0; j < matrix->seqVec[i].size(); j++) {
588                     if (m->control_pressed) { delete readMatrix; return 0; }
589                     //already added everyone else in row
590                     if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
591                 }
592                         }
593                         //add dummy map for unweighted calc
594                         SeqMap dummy;
595                         seqVec.push_back(dummy);
596                         
597                         delete matrix;
598                         delete readMatrix;
599                         delete nameMap;
600                         
601                         if (m->control_pressed) { return 0; }
602                 }else {
603                         //process file and set up indexes
604                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
605                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
606                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
607                         
608                         formatMatrix->setCutoff(cutoff);
609             
610                         NameAssignment* nameMap = NULL;
611             if(namefile != ""){ 
612                 nameMap = new NameAssignment(namefile);
613                 nameMap->readMap();
614                 formatMatrix->read(nameMap);
615             }else if (countfile != "") {
616                 formatMatrix->read(&ct);
617             }else {
618                 formatMatrix->read(nameMap); 
619             }
620                         
621                         if (m->control_pressed) { delete formatMatrix;  return 0; }
622             
623                         list = formatMatrix->getListVector();
624                         distFile = formatMatrix->getFormattedFileName();
625                         
626                         //positions in file where the distances for each sequence begin
627                         //rowPositions[1] = position in file where distance related to sequence 1 start.
628                         rowPositions = formatMatrix->getRowPositions();
629                         rowPositions.push_back(-1); //dummy row for unweighted calc
630                         
631                         delete formatMatrix;
632                         delete nameMap;
633                         
634                         //openfile for getMap to use
635                         m->openInputFile(distFile, inRow);
636                         
637                         if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
638                 }
639                 
640                 
641                 //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
642                 if (list != NULL) {
643                         vector<string> names;
644                         string binnames;
645                         //map names to rows in sparsematrix
646                         for (int i = 0; i < list->size(); i++) {
647                                 names.clear();
648                                 binnames = list->get(i);
649                                 
650                                 m->splitAtComma(binnames, names);
651                                 
652                                 for (int j = 0; j < names.size(); j++) {
653                                         nameToIndex[names[j]] = i;
654                                 }
655                         }
656                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
657
658         if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  }return 0; }
659         
660         return 0;
661     }
662         catch(exception& e) {
663                 m->errorOut(e, "GetOTURepCommand", "execute");
664                 exit(1);
665         }
666 }
667 //**********************************************************************************************************************
668 void GetOTURepCommand::readNamesFile() {
669         try {
670                 ifstream in;
671                 vector<string> dupNames;
672                 m->openInputFile(namefile, in);
673                 
674                 string name, names, sequence;
675         
676                 while(!in.eof()){
677                         in >> name;                     //read from first column  A
678                         in >> names;            //read from second column  A,B,C,D
679                         
680                         dupNames.clear();
681                         
682                         //parse names into vector
683                         m->splitAtComma(names, dupNames);
684                         
685                         //store names in fasta map
686                         sequence = fasta->getSequence(name);
687                         for (int i = 0; i < dupNames.size(); i++) {
688                                 fasta->push_back(dupNames[i], sequence);
689                         }
690                 
691                         m->gobble(in);
692                 }
693                 in.close();
694
695         }
696         catch(exception& e) {
697                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
698                 exit(1);
699         }
700 }
701 //**********************************************************************************************************************
702 //read names file to find the weighted rep for each bin
703 void GetOTURepCommand::readNamesFile(bool w) {
704         try {
705                 ifstream in;
706                 vector<string> dupNames;
707                 m->openInputFile(namefile, in);
708                 
709                 string name, names, sequence;
710                 
711                 while(!in.eof()){
712                         in >> name;     m->gobble(in);          //read from first column  A
713                         in >> names;                                                    //read from second column  A,B,C,D
714                         
715                         dupNames.clear();
716                         
717                         //parse names into vector
718                         m->splitAtComma(names, dupNames);
719                         
720                         for (int i = 0; i < dupNames.size(); i++) {
721                                 nameFileMap[dupNames[i]] = name;
722                         }
723                         
724                         m->gobble(in);
725                 }
726                 in.close();
727                 
728         }
729         catch(exception& e) {
730                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
731                 exit(1);
732         }
733 }
734 //**********************************************************************************************************************
735 string GetOTURepCommand::findRep(vector<string> names, string group) {
736         try{
737                 // if only 1 sequence in bin or processing the "unique" label, then 
738                 // the first sequence of the OTU is the representative one
739                 if ((names.size() == 1)) {
740                         return names[0];
741                 }else{
742                         vector<int> seqIndex; //(names.size());
743                         map<string, string>::iterator itNameFile;
744                         map<string, int>::iterator itNameIndex;
745
746                         //fill seqIndex and initialize sums
747                         for (size_t i = 0; i < names.size(); i++) {
748                                 if (weighted) {
749                                         seqIndex.push_back(nameToIndex[names[i]]);
750                     if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
751                         int numRep = 0;
752                         if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
753                         else { numRep = ct.getGroupCount(names[i]);  }
754                         for (int j = 1; j < numRep; j++) { //don't add yourself again
755                             seqIndex.push_back(nameToIndex[names[i]]);
756                         }
757                     }
758                                 }else { 
759                                         if (namefile == "") {
760                                                 itNameIndex = nameToIndex.find(names[i]);
761                                                 
762                                                 if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
763                                                         if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
764                                                         else {  seqIndex.push_back((seqVec.size()-1)); }
765                                                 }else {
766                                                         seqIndex.push_back(itNameIndex->second);
767                                                 }
768                                                 
769                                         }else {
770                                                 itNameFile = nameFileMap.find(names[i]);
771                                                 
772                                                 if (itNameFile == nameFileMap.end()) {
773                                                         m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; 
774                                                 }else{
775                                                         string name1 = itNameFile->first;
776                                                         string name2 = itNameFile->second;
777                                                         
778                                                         if (name1 == name2) { //then you are unique so add your real dists
779                                                                 seqIndex.push_back(nameToIndex[names[i]]);
780                                                         }else { //add dummy
781                                                                 if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
782                                                                 else {  seqIndex.push_back((seqVec.size()-1)); }
783                                                         }
784                                                 }
785                                         }
786                                 }
787                         }
788             
789             vector<float> max_dist(seqIndex.size(), 0.0);
790                         vector<float> total_dist(seqIndex.size(), 0.0);
791                         
792                         // loop through all entries in seqIndex
793                         SeqMap::iterator it;
794                         SeqMap currMap;
795                         for (size_t i=0; i < seqIndex.size(); i++) {
796                                 if (m->control_pressed) {  return  "control"; }
797                         
798                                 if (!large) {   currMap = seqVec[seqIndex[i]];  }
799                                 else            {       currMap = getMap(seqIndex[i]);  }
800                                 
801                                 for (size_t j=0; j < seqIndex.size(); j++) {
802                                         it = currMap.find(seqIndex[j]);         
803                                         if (it != currMap.end()) {
804                                                 max_dist[i] = max(max_dist[i], it->second);
805                                                 max_dist[j] = max(max_dist[j], it->second);
806                                                 total_dist[i] += it->second;
807                                                 total_dist[j] += it->second;
808                                         }else{ //if you can't find the distance make it the cutoff
809                                                 max_dist[i] = max(max_dist[i], cutoff);
810                                                 max_dist[j] = max(max_dist[j], cutoff);
811                                                 total_dist[i] += cutoff;
812                                                 total_dist[j] += cutoff;
813                                         }
814                                 }
815                         }
816                         
817                         // sequence with the smallest maximum distance is the representative
818                         //if tie occurs pick sequence with smallest average distance
819                         float min = 10000;
820                         int minIndex;
821                         for (size_t i=0; i < max_dist.size(); i++) {
822                                 if (m->control_pressed) {  return  "control"; }
823                                 if (max_dist[i] < min) {
824                                         min = max_dist[i];
825                                         minIndex = i;
826                                 }else if (max_dist[i] == min) {
827                                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
828                                         float newAverage = total_dist[i] / (float) total_dist.size();
829                                         
830                                         if (newAverage < currentAverage) {
831                                                 min = max_dist[i];
832                                                 minIndex = i;
833                                         }
834                                 }
835                         }
836                         
837                         return(names[minIndex]);
838                 }
839         }
840         catch(exception& e) {
841                 m->errorOut(e, "GetOTURepCommand", "FindRep");
842                 exit(1);
843         }
844 }
845
846 //**********************************************************************************************************************
847 int GetOTURepCommand::process(ListVector* processList) {
848         try{
849                 string name, sequence;
850                 string nameRep;
851
852                 //create output file
853                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
854                                 
855                 ofstream newNamesOutput;
856                 string outputNamesFile;
857                 map<string, ofstream*> filehandles;
858                 
859         map<string, string> variables; 
860         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
861         
862                 if (Groups.size() == 0) { //you don't want to use groups
863             variables["[tag]"] = processList->getLabel();
864             if (countfile == "") { 
865                 outputNamesFile = getOutputFileName("name", variables);
866                 outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
867             }else {
868                 outputNamesFile = getOutputFileName("count", variables);
869                 outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
870             }
871                         outputNameFiles[outputNamesFile] = processList->getLabel();
872             m->openOutputFile(outputNamesFile, newNamesOutput);
873             newNamesOutput << "noGroup" << endl;
874                 }else{ //you want to use groups
875                         ofstream* temp;
876                         for (int i=0; i<Groups.size(); i++) {
877                                 temp = new ofstream;
878                 variables["[tag]"] = processList->getLabel();
879                 variables["[group]"] = Groups[i];
880                                 filehandles[Groups[i]] = temp;
881                                 outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".";
882                 if (countfile == "") { 
883                     outputNamesFile = getOutputFileName("name", variables);
884                     outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
885                 }else {
886                     outputNamesFile = getOutputFileName("count", variables);
887                     outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
888                 }
889                                 
890                                 m->openOutputFile(outputNamesFile, *(temp));
891                 *(temp) << Groups[i] << endl;
892                                 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
893                         }
894                 }
895                 
896                 //for each bin in the list vector
897                 for (int i = 0; i < processList->size(); i++) {
898                         if (m->control_pressed) { 
899                                 out.close();  
900                                 if (Groups.size() == 0) { //you don't want to use groups
901                                         newNamesOutput.close();
902                                 }else{
903                                         for (int j=0; j<Groups.size(); j++) {
904                                                 (*(filehandles[Groups[j]])).close();
905                                                 delete filehandles[Groups[j]];
906                                         }
907                                 }
908                                 return 0; 
909                         }
910                         
911                         string temp = processList->get(i);
912                         vector<string> namesInBin;
913                         m->splitAtComma(temp, namesInBin);
914                         
915                         if (Groups.size() == 0) {
916                                 nameRep = findRep(namesInBin, "");
917                                 newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
918                         }else{
919                                 map<string, vector<string> > NamesInGroup;
920                                 for (int j=0; j<Groups.size(); j++) { //initialize groups
921                                         NamesInGroup[Groups[j]].resize(0);
922                                 }
923                                 
924                                 for (int j=0; j<namesInBin.size(); j++) {
925                     if (groupfile != "") {
926                         string thisgroup = groupMap->getGroup(namesInBin[j]);
927                         if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
928                         
929                         //add this name to correct group
930                         if (m->inUsersGroups(thisgroup, Groups)) { NamesInGroup[thisgroup].push_back(namesInBin[j]);  }
931                     }else {
932                         vector<string> thisSeqsGroups = ct.getGroups(namesInBin[j]);
933                         for (int k = 0; k < thisSeqsGroups.size(); k++) {
934                             if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { NamesInGroup[thisSeqsGroups[k]].push_back(namesInBin[j]);  }
935                         }
936                     }
937                                 }
938                                 
939                                 //get rep for each group in otu
940                                 for (int j=0; j<Groups.size(); j++) {
941                                         if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
942                                                 //get rep for each group
943                                                 nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
944                                                 
945                                                 //output group rep and other members of this group
946                                                 (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
947                                                 
948                                                 for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
949                                                         (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][k] << ",";
950                                                 }
951                                                 //output last name
952                                                 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] << endl;
953                                         }
954                                 }
955                         }
956                 }
957                 
958                 if (Groups.size() == 0) { //you don't want to use groups
959                         newNamesOutput.close();
960                 }else{
961                         for (int i=0; i<Groups.size(); i++) {
962                                 (*(filehandles[Groups[i]])).close();
963                                 delete filehandles[Groups[i]];
964                         }
965                 }
966                 
967                 return 0;
968
969         }
970         catch(exception& e) {
971                 m->errorOut(e, "GetOTURepCommand", "process");
972                 exit(1);
973         }
974 }
975 //**********************************************************************************************************************
976 int GetOTURepCommand::processFastaNames(string filename, string label) {
977         try{
978
979                 //create output file
980                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
981         map<string, string> variables; 
982         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
983         variables["[tag]"] = label;
984                 string outputFileName = getOutputFileName("fasta",variables);
985                 m->openOutputFile(outputFileName, out);
986                 vector<repStruct> reps;
987                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
988                 
989                 ofstream out2;
990                 string tempNameFile = filename + ".temp";
991                 m->openOutputFile(tempNameFile, out2);
992             
993                 ifstream in;
994                 m->openInputFile(filename, in);
995                 
996                 int i = 0;
997         string tempGroup = "";
998         in >> tempGroup; m->gobble(in);
999         
1000         CountTable thisCt;
1001         if (countfile != "") {
1002             thisCt.readTable(countfile);
1003             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1004         }
1005     
1006         int thistotal = 0;
1007                 while (!in.eof()) {
1008                         string rep, binnames;
1009                         in >> i >> rep >> binnames; m->gobble(in);
1010                         
1011                         vector<string> names;
1012                         m->splitAtComma(binnames, names);
1013                         int binsize = names.size();
1014             
1015             if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1016             else {
1017                 if (tempGroup == "noGroup") {
1018                     for (int j = 0; j < names.size(); j++) {
1019                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1020                     }
1021                     binsize = thisCt.getNumSeqs(rep);
1022                 }else {
1023                     int total = 0; 
1024                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1025                     out2 << rep << '\t' << total << '\t' << total << endl;
1026                     binsize = total;
1027                 }
1028             }
1029                         thistotal += binsize;
1030                         //if you have a groupfile
1031                         string group = "";
1032             map<string, string> groups;
1033             map<string, string>::iterator groupIt;
1034                         if (groupfile != "") {
1035                                 //find the groups that are in this bin
1036                                 for (int i = 0; i < names.size(); i++) {
1037                                         string groupName = groupMap->getGroup(names[i]);
1038                                         if (groupName == "not found") {  
1039                                                 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
1040                                                 groupError = true;
1041                                         } else {
1042                                                 groups[groupName] = groupName;
1043                                         }
1044                                 }
1045                                 
1046                                 //turn the groups into a string
1047                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1048                                         group += groupIt->first + "-";
1049                                 }
1050                                 //rip off last dash
1051                                 group = group.substr(0, group.length()-1);
1052                         }else if (hasGroups) {
1053                 map<string, string> groups;
1054                 for (int i = 0; i < names.size(); i++) {
1055                     vector<string> thisSeqsGroups = ct.getGroups(names[i]);
1056                     for (int j = 0; j < thisSeqsGroups.size(); j++) { groups[thisSeqsGroups[j]] = thisSeqsGroups[j]; }
1057                 }
1058                 //turn the groups into a string
1059                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1060                                         group += groupIt->first + "-";
1061                                 }
1062                                 //rip off last dash
1063                                 group = group.substr(0, group.length()-1);
1064                 //cout << group << endl;
1065             }
1066             else{ group = ""; }
1067
1068                         
1069                         //print out name and sequence for that bin
1070                         string sequence = fasta->getSequence(rep);
1071
1072                         if (sequence != "not found") {
1073                                 if (sorted == "") { //print them out
1074                                         rep = rep + "\t" + toString(i+1);
1075                                         rep = rep + "|" + toString(binsize);
1076                                         if (group != "") {
1077                                                 rep = rep + "|" + group;
1078                                         }
1079                                         out << ">" << rep << endl;
1080                                         out << sequence << endl;
1081                                 }else { //save them
1082                                         repStruct newRep(rep, i+1, binsize, group);
1083                                         reps.push_back(newRep);
1084                                 }
1085                         }else { 
1086                                 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
1087                         }
1088                 }
1089                 
1090                         
1091                 if (sorted != "") { //then sort them and print them
1092                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
1093                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
1094                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
1095                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
1096                         
1097                         //print them
1098                         for (int i = 0; i < reps.size(); i++) {
1099                                 string sequence = fasta->getSequence(reps[i].name);
1100                                 string outputName = reps[i].name + "\t" + toString(reps[i].bin);
1101                                 outputName = outputName + "|" + toString(reps[i].size);
1102                                 if (reps[i].group != "") {
1103                                         outputName = outputName + "|" + reps[i].group;
1104                                 }
1105                                 out << ">" << outputName << endl;
1106                                 out << sequence << endl;
1107                         }
1108                 }
1109                 
1110                 in.close();
1111                 out.close();
1112                 out2.close();
1113                 
1114                 m->mothurRemove(filename);
1115                 rename(tempNameFile.c_str(), filename.c_str());
1116         
1117         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1118                 
1119                 return 0;
1120
1121         }
1122         catch(exception& e) {
1123                 m->errorOut(e, "GetOTURepCommand", "processFastaNames");
1124                 exit(1);
1125         }
1126 }
1127 //**********************************************************************************************************************
1128 int GetOTURepCommand::processNames(string filename, string label) {
1129         try{
1130                 
1131                 //create output file
1132                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
1133                 
1134                 ofstream out2;
1135                 string tempNameFile = filename + ".temp";
1136                 m->openOutputFile(tempNameFile, out2);
1137                 
1138                 ifstream in;
1139                 m->openInputFile(filename, in);
1140                 
1141                 int i = 0;
1142                 string rep, binnames;
1143         
1144         string tempGroup = "";
1145         in >> tempGroup; m->gobble(in);
1146         
1147         CountTable thisCt;
1148         if (countfile != "") {
1149             thisCt.readTable(countfile);
1150             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1151         }
1152         
1153                 while (!in.eof()) {
1154                         if (m->control_pressed) { break; }
1155                         in >> i >> rep >> binnames; m->gobble(in);
1156             
1157                         if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1158             else {
1159                 vector<string> names;
1160                 m->splitAtComma(binnames, names);
1161                 if (tempGroup == "noGroup") {
1162                     for (int j = 0; j < names.size(); j++) {
1163                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1164                     }
1165                 }else {
1166                     int total = 0; 
1167                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1168                     out2 << rep << '\t' << total << '\t' << total << endl;
1169                 }
1170             }
1171
1172                 }
1173                 in.close();
1174                 out2.close();
1175                 
1176                 m->mothurRemove(filename);
1177                 rename(tempNameFile.c_str(), filename.c_str());
1178                 
1179         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1180         
1181                 return 0;
1182         }
1183         catch(exception& e) {
1184                 m->errorOut(e, "GetOTURepCommand", "processNames");
1185                 exit(1);
1186         }
1187 }
1188 //**********************************************************************************************************************
1189 SeqMap GetOTURepCommand::getMap(int row) {
1190         try {
1191                 SeqMap rowMap;
1192                 
1193                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
1194                 if (rowPositions[row] != -1){
1195                         //go to row in file
1196                         inRow.seekg(rowPositions[row]);
1197                         
1198                         int rowNum, numDists, colNum;
1199                         float dist;
1200                         
1201                         inRow >> rowNum >> numDists;
1202                         
1203                         for(int i = 0; i < numDists; i++) {
1204                                 inRow >> colNum >> dist;
1205                                 rowMap[colNum] = dist;
1206                                 
1207                         }
1208                 }
1209                 
1210                 return rowMap;
1211         }
1212         catch(exception& e) {
1213                 m->errorOut(e, "GetOTURepCommand", "getMap");
1214                 exit(1);
1215         }
1216 }
1217 //**********************************************************************************************************************
1218