]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
added count file to get.oturep, pre.cluster, screen.seqs, tree.shared. made remove...
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38
39 //**********************************************************************************************************************
40 vector<string> GetOTURepCommand::setParameters(){       
41         try {
42                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
43                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pfasta);
44                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
45         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
46         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "ColumnName",false,false); parameters.push_back(pcount);
47                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
48                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);
49                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
50                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
51                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
52                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
53                 CommandParameter pweighted("weighted", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pweighted);
54                 CommandParameter psorted("sorted", "Multiple", "none-name-bin-size-group", "none", "", "", "",false,false); parameters.push_back(psorted);
55                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
56                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
57                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
58                 
59                 vector<string> myArray;
60                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
61                 return myArray;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "GetOTURepCommand", "setParameters");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 string GetOTURepCommand::getHelpString(){       
70         try {
71                 string helpString = "";
72                 helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, count, large, weighted, cutoff, precision, groups, sorted and label.  The list parameter is required, as well as phylip or column and name, unless you have valid current files.\n";
73                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
74                 helpString += "The phylip or column parameter is required, but only one may be used.  If you use a column file the name or count filename is required. \n";
75                 helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
76                 helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
77                 helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n";
78                 helpString += "The default value for label is all labels in your inputfile.\n";
79                 helpString += "The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n";
80                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
81                 helpString += "The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true.  The default value is false.\n";
82                 helpString += "The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n";
83                 helpString += "For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n";
84                 helpString += "For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n";
85                 helpString += "If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n";
86                 helpString += "The group parameter allows you provide a group file.\n";
87                 helpString += "The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n";
88                 helpString += "The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n";
89                 helpString += "If you provide a groupfile, then it also appends the names of the groups present in that bin.\n";
90                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
91                 return helpString;
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "GetOTURepCommand", "getHelpString");
95                 exit(1);
96         }
97 }
98 //**********************************************************************************************************************
99 string GetOTURepCommand::getOutputFileNameTag(string type, string inputName=""){        
100         try {
101         string outputFileName = "";
102                 map<string, vector<string> >::iterator it;
103         
104         //is this a type this command creates
105         it = outputTypes.find(type);
106         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
107         else {
108             if (type == "fasta")            {   outputFileName =  "rep.fasta";   }
109             else if (type == "name")        {   outputFileName =  "rep.names";   }
110             else if (type == "count")        {   outputFileName =  "rep.count_table";   }
111             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
112         }
113         return outputFileName;
114         }
115         catch(exception& e) {
116                 m->errorOut(e, "GetOTURepCommand", "getOutputFileNameTag");
117                 exit(1);
118         }
119 }
120 //**********************************************************************************************************************
121 GetOTURepCommand::GetOTURepCommand(){   
122         try {
123                 abort = true; calledHelp = true; 
124                 setParameters();
125                 vector<string> tempOutNames;
126                 outputTypes["fasta"] = tempOutNames;
127                 outputTypes["name"] = tempOutNames;
128         outputTypes["count"] = tempOutNames;
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
132                 exit(1);
133         }
134 }
135 //**********************************************************************************************************************
136 GetOTURepCommand::GetOTURepCommand(string option)  {
137         try{
138                 abort = false; calledHelp = false;   
139                 allLines = 1;
140                                 
141                 //allow user to run help
142                 if (option == "help") { 
143                         help(); abort = true; calledHelp = true;
144                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;
145                 } else {
146                         vector<string> myArray = setParameters();
147                         
148                         OptionParser parser(option);
149                         map<string, string> parameters = parser.getParameters();
150                         
151                         ValidParameters validParameter;
152                         map<string, string>::iterator it;
153                 
154                         //check to make sure all parameters are valid for command
155                         for (it = parameters.begin(); it != parameters.end(); it++) { 
156                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
157                         }
158                         
159                         //initialize outputTypes
160                         vector<string> tempOutNames;
161                         outputTypes["fasta"] = tempOutNames;
162                         outputTypes["name"] = tempOutNames;
163             outputTypes["count"] = tempOutNames;
164                         
165                         //if the user changes the input directory command factory will send this info to us in the output parameter 
166                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
167                         if (inputDir == "not found"){   inputDir = "";          }
168                         else {
169                                 string path;
170                                 it = parameters.find("list");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
176                                 }
177                                 
178                                 it = parameters.find("fasta");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
184                                 }
185                                 
186                                 it = parameters.find("phylip");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
192                                 }
193                                 
194                                 it = parameters.find("column");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
200                                 }
201                                 
202                                 it = parameters.find("name");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
208                                 }
209                                 
210                                 it = parameters.find("group");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
216                                 }
217                 
218                 it = parameters.find("count");
219                                 //user has given a template file
220                                 if(it != parameters.end()){ 
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
224                                 }
225                         }
226
227                         
228                         //if the user changes the output directory command factory will send this info to us in the output parameter 
229                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
230                         
231                         //check for required parameters
232                         fastafile = validParameter.validFile(parameters, "fasta", true);
233                         if (fastafile == "not found") { fastafile = ""; }
234                         else if (fastafile == "not open") { abort = true; }     
235                         else { m->setFastaFile(fastafile); }
236                 
237                         listfile = validParameter.validFile(parameters, "list", true);
238                         if (listfile == "not found") {                  
239                                 listfile = m->getListFile(); 
240                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
241                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
242                         }
243                         else if (listfile == "not open") { abort = true; }      
244                         else { m->setListFile(listfile); }
245                         
246                         phylipfile = validParameter.validFile(parameters, "phylip", true);
247                         if (phylipfile == "not found") { phylipfile = "";  }
248                         else if (phylipfile == "not open") { abort = true; }    
249                         else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);   }
250                         
251                         columnfile = validParameter.validFile(parameters, "column", true);
252                         if (columnfile == "not found") { columnfile = ""; }
253                         else if (columnfile == "not open") { abort = true; }    
254                         else { distFile = columnfile; format = "column";  m->setColumnFile(columnfile); }
255                         
256                         namefile = validParameter.validFile(parameters, "name", true);
257                         if (namefile == "not open") { abort = true; }   
258                         else if (namefile == "not found") { namefile = ""; }
259                         else { m->setNameFile(namefile); }
260             
261             hasGroups = false;
262             countfile = validParameter.validFile(parameters, "count", true);
263                         if (countfile == "not found") { countfile =  "";   }
264                         else if (countfile == "not open") { abort = true; countfile =  ""; }    
265                         else {   
266                 m->setCountTableFile(countfile); 
267                 ct.readTable(countfile);
268                 if (ct.hasGroupInfo()) { hasGroups = true; }
269             }
270             
271             if ((namefile != "") && (countfile != "")) {
272                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
273             }
274             
275             if ((groupfile != "") && (countfile != "")) {
276                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
277             }
278                         
279                         if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
280                                 //give priority to column, then phylip
281                                 columnfile = m->getColumnFile(); 
282                                 if (columnfile != "") {  distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
283                                 else { 
284                                         phylipfile = m->getPhylipFile(); 
285                                         if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
286                                         else { 
287                                                 m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the get.oturep command."); m->mothurOutEndLine(); 
288                                                 abort = true;
289                                         }
290                                 }
291                         }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
292                 
293                         if (columnfile != "") {  
294                                 if ((namefile == "") && (countfile == "")) { 
295                                         namefile = m->getNameFile(); 
296                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
297                                         else { 
298                                                 countfile = m->getCountTableFile();
299                         if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
300                         else { 
301                             m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine(); 
302                             abort = true; 
303                         }       
304                                         }       
305                                 }
306                         }
307
308                         //check for optional parameter and set defaults
309                         // ...at some point should added some additional type checking...
310                         label = validParameter.validFile(parameters, "label", false);                   
311                         if (label == "not found") { label = ""; allLines = 1;  }
312                         else { 
313                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
314                                 else { allLines = 1;  }
315                         }
316                         
317                         groupfile = validParameter.validFile(parameters, "group", true);
318                         if (groupfile == "not open") { groupfile = ""; abort = true; }
319                         else if (groupfile == "not found") { groupfile = ""; }
320                         else { m->setGroupFile(groupfile); }
321                         
322                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
323                         if (sorted == "none") { sorted=""; }
324                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
325                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
326                                 sorted = "";
327                         }
328                         
329                         if ((sorted == "group") && ((groupfile == "")&& !hasGroups)) {
330                                 m->mothurOut("You must provide a groupfile or have a count file with group info to sort by group. I will not sort."); m->mothurOutEndLine();
331                                 sorted = "";
332                         }
333                         
334                         groups = validParameter.validFile(parameters, "groups", false);                 
335                         if (groups == "not found") { groups = ""; }
336                         else { 
337                                 if ((groupfile == "") && (!hasGroups)) {
338                                         m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
339                                         abort = true;
340                                 }else { 
341                                         m->splitAtDash(groups, Groups);
342                                 }
343                         }
344                         m->setGroups(Groups);
345                         
346                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
347                         large = m->isTrue(temp);
348                         
349                         temp = validParameter.validFile(parameters, "weighted", false);         if (temp == "not found") {       temp = "f";    }
350                         weighted = m->isTrue(temp);
351                         
352                         if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; }
353                         
354                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
355                         m->mothurConvert(temp, precision); 
356                         
357                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
358                         m->mothurConvert(temp, cutoff); 
359                         cutoff += (5 / (precision * 10.0));
360                 }
361         }
362         catch(exception& e) {
363                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
364                 exit(1);
365         }
366 }
367
368 //**********************************************************************************************************************
369
370 int GetOTURepCommand::execute(){
371         try {
372         
373                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
374                 int error;
375                 list = NULL;
376                 
377                 readDist();             
378                                 
379                 if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  } return 0; }
380                 
381                 if (groupfile != "") {
382                         //read in group map info.
383                         groupMap = new GroupMap(groupfile);
384                         int error = groupMap->readMap();
385                         if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
386                         
387                         if (Groups.size() != 0) {
388                                 SharedUtil util;
389                                 vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
390                                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
391                                 groupMap->setNamesOfGroups(gNamesOfGroups);
392                         }
393                 }else if (hasGroups) {
394             if (Groups.size() != 0) {
395                                 SharedUtil util;
396                                 vector<string> gNamesOfGroups = ct.getNamesOfGroups();
397                                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
398                         }
399         }
400                 
401                 //done with listvector from matrix
402                 if (list != NULL) { delete list; }
403                 
404                 input = new InputData(listfile, "list");
405                 list = input->getListVector();
406                 string lastLabel = list->getLabel();
407
408                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
409                 set<string> processedLabels;
410                 set<string> userLabels = labels;
411                 
412                 if (m->control_pressed) { 
413                         if (large) {  inRow.close(); m->mothurRemove(distFile);  }
414                         delete input; delete list; return 0; 
415                 }
416                 
417                 if ((!weighted) && (namefile != "")) { readNamesFile(weighted); }
418                 
419                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
420                         
421                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
422                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
423                                         error = process(list);
424                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
425                                         
426                                         if (m->control_pressed) { 
427                                                 if (large) {  inRow.close(); m->mothurRemove(distFile);  }
428                                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
429                                                 delete input; delete list; return 0; 
430                                         }
431                                         
432                                         processedLabels.insert(list->getLabel());
433                                         userLabels.erase(list->getLabel());
434                         }
435                         
436                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
437                                         string saveLabel = list->getLabel();
438                                         
439                                         delete list;
440                                         list = input->getListVector(lastLabel);
441                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
442                                         error = process(list);
443                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
444                                         
445                                         if (m->control_pressed) { 
446                                                 if (large) {  inRow.close(); m->mothurRemove(distFile);  }
447                                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
448                                                 delete input; delete list; return 0; 
449                                         }
450                                         
451                                         processedLabels.insert(list->getLabel());
452                                         userLabels.erase(list->getLabel());
453                                         
454                                         //restore real lastlabel to save below
455                                         list->setLabel(saveLabel);
456                         }
457                         
458                         lastLabel = list->getLabel();
459         
460                         delete list;
461                         list = input->getListVector();
462                 }
463                 
464                 //output error messages about any remaining user labels
465                 bool needToRun = false;
466                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
467                         m->mothurOut("Your file does not include the label " + (*it)); 
468                         if (processedLabels.count(lastLabel) != 1) {
469                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
470                                 needToRun = true;
471                         }else {
472                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
473                         }
474                 }
475                 
476                 //run last label if you need to
477                 if (needToRun == true)  {
478                         if (list != NULL) {     delete list;    }
479                         list = input->getListVector(lastLabel);
480                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
481                         error = process(list);
482                         delete list;
483                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
484                         
485                         if (m->control_pressed) { 
486                                         if (large) {  inRow.close(); m->mothurRemove(distFile);  }
487                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
488                                         delete input; delete list; return 0; 
489                         }
490                 }
491                 
492                 //close and remove formatted matrix file
493                 if (large) {
494                         inRow.close();
495                         m->mothurRemove(distFile);
496                 }
497                 
498                 delete input;  
499                 
500                 if (!weighted) { nameFileMap.clear(); }
501                 
502                                 
503                 if (fastafile != "") {
504                         //read fastafile
505                         fasta = new FastaMap();
506                         fasta->readFastaFile(fastafile);
507                         
508                         //if user gave a namesfile then use it
509                         if (namefile != "") {   readNamesFile();        }
510                         
511                         //output create and output the .rep.fasta files
512                         map<string, string>::iterator itNameFile;
513                         for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
514                                 processFastaNames(itNameFile->first, itNameFile->second);
515                         }
516                 }else {
517                         //output create and output the .rep.fasta files
518                         map<string, string>::iterator itNameFile;
519                         for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
520                                 processNames(itNameFile->first, itNameFile->second);
521                         }
522                 }
523                 
524                                 
525                 if (groupfile != "") { delete groupMap; }
526                 
527                 if (m->control_pressed) {  return 0; }
528                 
529                 //set fasta file as new current fastafile - use first one??
530                 string current = "";
531                 itTypes = outputTypes.find("fasta");
532                 if (itTypes != outputTypes.end()) {
533                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
534                 }
535                 
536                 itTypes = outputTypes.find("name");
537                 if (itTypes != outputTypes.end()) {
538                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
539                 }
540         
541         itTypes = outputTypes.find("count");
542                 if (itTypes != outputTypes.end()) {
543                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
544                 }
545                 
546                 m->mothurOutEndLine();
547                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
548                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
549                 m->mothurOutEndLine();
550                 
551                 return 0;
552         }
553         catch(exception& e) {
554                 m->errorOut(e, "GetOTURepCommand", "execute");
555                 exit(1);
556         }
557 }
558 //**********************************************************************************************************************
559 int GetOTURepCommand::readDist() {
560         try {
561         
562         if (!large) {
563                         //read distance files
564                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
565                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
566                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
567                         
568                         readMatrix->setCutoff(cutoff);
569             
570                         NameAssignment* nameMap = NULL;
571             if(namefile != ""){ 
572                 nameMap = new NameAssignment(namefile);
573                 nameMap->readMap();
574                 readMatrix->read(nameMap);
575             }else if (countfile != "") {
576                 readMatrix->read(&ct);
577             }
578                         
579                         if (m->control_pressed) { delete readMatrix; return 0; }
580             
581                         list = readMatrix->getListVector();
582                         SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
583                         
584                         // Create a data structure to quickly access the distance information.
585                         // It consists of a vector of distance maps, where each map contains
586                         // all distances of a certain sequence. Vector and maps are accessed
587                         // via the index of a sequence in the distance matrix
588                         seqVec = vector<SeqMap>(list->size()); 
589             for (int i = 0; i < matrix->seqVec.size(); i++) {
590                 for (int j = 0; j < matrix->seqVec[i].size(); j++) {
591                     if (m->control_pressed) { delete readMatrix; return 0; }
592                     //already added everyone else in row
593                     if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
594                 }
595                         }
596                         //add dummy map for unweighted calc
597                         SeqMap dummy;
598                         seqVec.push_back(dummy);
599                         
600                         delete matrix;
601                         delete readMatrix;
602                         delete nameMap;
603                         
604                         if (m->control_pressed) { return 0; }
605                 }else {
606                         //process file and set up indexes
607                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
608                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
609                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
610                         
611                         formatMatrix->setCutoff(cutoff);
612             
613                         NameAssignment* nameMap = NULL;
614             if(namefile != ""){ 
615                 nameMap = new NameAssignment(namefile);
616                 nameMap->readMap();
617                 readMatrix->read(nameMap);
618             }else if (countfile != "") {
619                 readMatrix->read(&ct);
620             }
621                         
622                         if (m->control_pressed) { delete formatMatrix;  return 0; }
623             
624                         list = formatMatrix->getListVector();
625                         distFile = formatMatrix->getFormattedFileName();
626                         
627                         //positions in file where the distances for each sequence begin
628                         //rowPositions[1] = position in file where distance related to sequence 1 start.
629                         rowPositions = formatMatrix->getRowPositions();
630                         rowPositions.push_back(-1); //dummy row for unweighted calc
631                         
632                         delete formatMatrix;
633                         delete nameMap;
634                         
635                         //openfile for getMap to use
636                         m->openInputFile(distFile, inRow);
637                         
638                         if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
639                 }
640                 
641                 
642                 //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
643                 if (list != NULL) {
644                         vector<string> names;
645                         string binnames;
646                         //map names to rows in sparsematrix
647                         for (int i = 0; i < list->size(); i++) {
648                                 names.clear();
649                                 binnames = list->get(i);
650                                 
651                                 m->splitAtComma(binnames, names);
652                                 
653                                 for (int j = 0; j < names.size(); j++) {
654                                         nameToIndex[names[j]] = i;
655                                 }
656                         }
657                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
658
659         if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  }return 0; }
660         
661         return 0;
662     }
663         catch(exception& e) {
664                 m->errorOut(e, "GetOTURepCommand", "execute");
665                 exit(1);
666         }
667 }
668 //**********************************************************************************************************************
669 void GetOTURepCommand::readNamesFile() {
670         try {
671                 ifstream in;
672                 vector<string> dupNames;
673                 m->openInputFile(namefile, in);
674                 
675                 string name, names, sequence;
676         
677                 while(!in.eof()){
678                         in >> name;                     //read from first column  A
679                         in >> names;            //read from second column  A,B,C,D
680                         
681                         dupNames.clear();
682                         
683                         //parse names into vector
684                         m->splitAtComma(names, dupNames);
685                         
686                         //store names in fasta map
687                         sequence = fasta->getSequence(name);
688                         for (int i = 0; i < dupNames.size(); i++) {
689                                 fasta->push_back(dupNames[i], sequence);
690                         }
691                 
692                         m->gobble(in);
693                 }
694                 in.close();
695
696         }
697         catch(exception& e) {
698                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
699                 exit(1);
700         }
701 }
702 //**********************************************************************************************************************
703 //read names file to find the weighted rep for each bin
704 void GetOTURepCommand::readNamesFile(bool w) {
705         try {
706                 ifstream in;
707                 vector<string> dupNames;
708                 m->openInputFile(namefile, in);
709                 
710                 string name, names, sequence;
711                 
712                 while(!in.eof()){
713                         in >> name;     m->gobble(in);          //read from first column  A
714                         in >> names;                                                    //read from second column  A,B,C,D
715                         
716                         dupNames.clear();
717                         
718                         //parse names into vector
719                         m->splitAtComma(names, dupNames);
720                         
721                         for (int i = 0; i < dupNames.size(); i++) {
722                                 nameFileMap[dupNames[i]] = name;
723                         }
724                         
725                         m->gobble(in);
726                 }
727                 in.close();
728                 
729         }
730         catch(exception& e) {
731                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
732                 exit(1);
733         }
734 }
735 //**********************************************************************************************************************
736 string GetOTURepCommand::findRep(vector<string> names, string group) {
737         try{
738                 // if only 1 sequence in bin or processing the "unique" label, then 
739                 // the first sequence of the OTU is the representative one
740                 if ((names.size() == 1)) {
741                         return names[0];
742                 }else{
743                         vector<int> seqIndex; //(names.size());
744                         map<string, string>::iterator itNameFile;
745                         map<string, int>::iterator itNameIndex;
746
747                         //fill seqIndex and initialize sums
748                         for (size_t i = 0; i < names.size(); i++) {
749                                 if (weighted) {
750                                         seqIndex.push_back(nameToIndex[names[i]]);
751                     if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
752                         int numRep = 0;
753                         if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
754                         else { numRep = ct.getGroupCount(names[i]);  }
755                         for (int j = 1; j < numRep; j++) { //don't add yourself again
756                             seqIndex.push_back(nameToIndex[names[i]]);
757                         }
758                     }
759                                 }else { 
760                                         if (namefile == "") {
761                                                 itNameIndex = nameToIndex.find(names[i]);
762                                                 
763                                                 if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
764                                                         if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
765                                                         else {  seqIndex.push_back((seqVec.size()-1)); }
766                                                 }else {
767                                                         seqIndex.push_back(itNameIndex->second);
768                                                 }
769                                                 
770                                         }else {
771                                                 itNameFile = nameFileMap.find(names[i]);
772                                                 
773                                                 if (itNameFile == nameFileMap.end()) {
774                                                         m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; 
775                                                 }else{
776                                                         string name1 = itNameFile->first;
777                                                         string name2 = itNameFile->second;
778                                                         
779                                                         if (name1 == name2) { //then you are unique so add your real dists
780                                                                 seqIndex.push_back(nameToIndex[names[i]]);
781                                                         }else { //add dummy
782                                                                 if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
783                                                                 else {  seqIndex.push_back((seqVec.size()-1)); }
784                                                         }
785                                                 }
786                                         }
787                                 }
788                         }
789             
790             vector<float> max_dist(seqIndex.size(), 0.0);
791                         vector<float> total_dist(seqIndex.size(), 0.0);
792                         
793                         // loop through all entries in seqIndex
794                         SeqMap::iterator it;
795                         SeqMap currMap;
796                         for (size_t i=0; i < seqIndex.size(); i++) {
797                                 if (m->control_pressed) {  return  "control"; }
798                         
799                                 if (!large) {   currMap = seqVec[seqIndex[i]];  }
800                                 else            {       currMap = getMap(seqIndex[i]);  }
801                                 
802                                 for (size_t j=0; j < seqIndex.size(); j++) {
803                                         it = currMap.find(seqIndex[j]);         
804                                         if (it != currMap.end()) {
805                                                 max_dist[i] = max(max_dist[i], it->second);
806                                                 max_dist[j] = max(max_dist[j], it->second);
807                                                 total_dist[i] += it->second;
808                                                 total_dist[j] += it->second;
809                                         }else{ //if you can't find the distance make it the cutoff
810                                                 max_dist[i] = max(max_dist[i], cutoff);
811                                                 max_dist[j] = max(max_dist[j], cutoff);
812                                                 total_dist[i] += cutoff;
813                                                 total_dist[j] += cutoff;
814                                         }
815                                 }
816                         }
817                         
818                         // sequence with the smallest maximum distance is the representative
819                         //if tie occurs pick sequence with smallest average distance
820                         float min = 10000;
821                         int minIndex;
822                         for (size_t i=0; i < max_dist.size(); i++) {
823                                 if (m->control_pressed) {  return  "control"; }
824                                 if (max_dist[i] < min) {
825                                         min = max_dist[i];
826                                         minIndex = i;
827                                 }else if (max_dist[i] == min) {
828                                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
829                                         float newAverage = total_dist[i] / (float) total_dist.size();
830                                         
831                                         if (newAverage < currentAverage) {
832                                                 min = max_dist[i];
833                                                 minIndex = i;
834                                         }
835                                 }
836                         }
837                         
838                         return(names[minIndex]);
839                 }
840         }
841         catch(exception& e) {
842                 m->errorOut(e, "GetOTURepCommand", "FindRep");
843                 exit(1);
844         }
845 }
846
847 //**********************************************************************************************************************
848 int GetOTURepCommand::process(ListVector* processList) {
849         try{
850                 string name, sequence;
851                 string nameRep;
852
853                 //create output file
854                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
855                                 
856                 ofstream newNamesOutput;
857                 string outputNamesFile;
858                 map<string, ofstream*> filehandles;
859                 
860                 if (Groups.size() == 0) { //you don't want to use groups
861                         outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".";
862             if (countfile == "") { 
863                 outputNamesFile += getOutputFileNameTag("name");
864                 outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
865             }else {
866                 outputNamesFile += getOutputFileNameTag("count");
867                 outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
868             }
869                         outputNameFiles[outputNamesFile] = processList->getLabel();
870             m->openOutputFile(outputNamesFile, newNamesOutput);
871             newNamesOutput << "noGroup" << endl;
872                 }else{ //you want to use groups
873                         ofstream* temp;
874                         for (int i=0; i<Groups.size(); i++) {
875                                 temp = new ofstream;
876                                 filehandles[Groups[i]] = temp;
877                                 outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".";
878                 if (countfile == "") { 
879                     outputNamesFile += getOutputFileNameTag("name");
880                     outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
881                 }else {
882                     outputNamesFile += getOutputFileNameTag("count");
883                     outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
884                 }
885                                 
886                                 m->openOutputFile(outputNamesFile, *(temp));
887                 *(temp) << Groups[i] << endl;
888                                 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
889                         }
890                 }
891                 
892                 //for each bin in the list vector
893                 for (int i = 0; i < processList->size(); i++) {
894                         if (m->control_pressed) { 
895                                 out.close();  
896                                 if (Groups.size() == 0) { //you don't want to use groups
897                                         newNamesOutput.close();
898                                 }else{
899                                         for (int j=0; j<Groups.size(); j++) {
900                                                 (*(filehandles[Groups[j]])).close();
901                                                 delete filehandles[Groups[j]];
902                                         }
903                                 }
904                                 return 0; 
905                         }
906                         
907                         string temp = processList->get(i);
908                         vector<string> namesInBin;
909                         m->splitAtComma(temp, namesInBin);
910                         
911                         if (Groups.size() == 0) {
912                                 nameRep = findRep(namesInBin, "");
913                                 newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
914                         }else{
915                                 map<string, vector<string> > NamesInGroup;
916                                 for (int j=0; j<Groups.size(); j++) { //initialize groups
917                                         NamesInGroup[Groups[j]].resize(0);
918                                 }
919                                 
920                                 for (int j=0; j<namesInBin.size(); j++) {
921                     if (groupfile != "") {
922                         string thisgroup = groupMap->getGroup(namesInBin[j]);
923                         if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
924                         
925                         //add this name to correct group
926                         if (m->inUsersGroups(thisgroup, Groups)) { NamesInGroup[thisgroup].push_back(namesInBin[j]);  }
927                     }else {
928                         vector<string> thisSeqsGroups = ct.getGroups(namesInBin[j]);
929                         for (int k = 0; k < thisSeqsGroups.size(); k++) {
930                             if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { NamesInGroup[thisSeqsGroups[k]].push_back(namesInBin[j]);  }
931                         }
932                     }
933                                 }
934                                 
935                                 //get rep for each group in otu
936                                 for (int j=0; j<Groups.size(); j++) {
937                                         if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
938                                                 //get rep for each group
939                                                 nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
940                                                 
941                                                 //output group rep and other members of this group
942                                                 (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
943                                                 
944                                                 for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
945                                                         (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][k] << ",";
946                                                 }
947                                                 //output last name
948                                                 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] << endl;
949                                         }
950                                 }
951                         }
952                 }
953                 
954                 if (Groups.size() == 0) { //you don't want to use groups
955                         newNamesOutput.close();
956                 }else{
957                         for (int i=0; i<Groups.size(); i++) {
958                                 (*(filehandles[Groups[i]])).close();
959                                 delete filehandles[Groups[i]];
960                         }
961                 }
962                 
963                 return 0;
964
965         }
966         catch(exception& e) {
967                 m->errorOut(e, "GetOTURepCommand", "process");
968                 exit(1);
969         }
970 }
971 //**********************************************************************************************************************
972 int GetOTURepCommand::processFastaNames(string filename, string label) {
973         try{
974
975                 //create output file
976                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
977                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + "." + getOutputFileNameTag("fasta");
978                 m->openOutputFile(outputFileName, out);
979                 vector<repStruct> reps;
980                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
981                 
982                 ofstream out2;
983                 string tempNameFile = filename + ".temp";
984                 m->openOutputFile(tempNameFile, out2);
985             
986                 ifstream in;
987                 m->openInputFile(filename, in);
988                 
989                 int i = 0;
990         string tempGroup = "";
991         in >> tempGroup; m->gobble(in);
992         
993         CountTable thisCt;
994         if (countfile != "") {
995             thisCt.readTable(countfile);
996             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
997         }
998     
999         int thistotal = 0;
1000                 while (!in.eof()) {
1001                         string rep, binnames;
1002                         in >> i >> rep >> binnames; m->gobble(in);
1003                         
1004                         vector<string> names;
1005                         m->splitAtComma(binnames, names);
1006                         int binsize = names.size();
1007             
1008             if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1009             else {
1010                 if (tempGroup == "noGroup") {
1011                     for (int j = 0; j < names.size(); j++) {
1012                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1013                     }
1014                     binsize = thisCt.getNumSeqs(rep);
1015                 }else {
1016                     int total = 0; 
1017                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1018                     out2 << rep << '\t' << total << '\t' << total << endl;
1019                     binsize = total;
1020                 }
1021             }
1022                         thistotal += binsize;
1023                         //if you have a groupfile
1024                         string group = "";
1025             map<string, string> groups;
1026             map<string, string>::iterator groupIt;
1027                         if (groupfile != "") {
1028                                 //find the groups that are in this bin
1029                                 for (int i = 0; i < names.size(); i++) {
1030                                         string groupName = groupMap->getGroup(names[i]);
1031                                         if (groupName == "not found") {  
1032                                                 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
1033                                                 groupError = true;
1034                                         } else {
1035                                                 groups[groupName] = groupName;
1036                                         }
1037                                 }
1038                                 
1039                                 //turn the groups into a string
1040                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1041                                         group += groupIt->first + "-";
1042                                 }
1043                                 //rip off last dash
1044                                 group = group.substr(0, group.length()-1);
1045                         }else if (hasGroups) {
1046                 map<string, string> groups;
1047                 for (int i = 0; i < names.size(); i++) {
1048                     vector<string> thisSeqsGroups = ct.getGroups(names[i]);
1049                     for (int j = 0; j < thisSeqsGroups.size(); j++) { groups[thisSeqsGroups[j]] = thisSeqsGroups[j]; }
1050                 }
1051                 //turn the groups into a string
1052                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1053                                         group += groupIt->first + "-";
1054                                 }
1055                                 //rip off last dash
1056                                 group = group.substr(0, group.length()-1);
1057                 //cout << group << endl;
1058             }
1059             else{ group = ""; }
1060
1061                         
1062                         //print out name and sequence for that bin
1063                         string sequence = fasta->getSequence(rep);
1064
1065                         if (sequence != "not found") {
1066                                 if (sorted == "") { //print them out
1067                                         rep = rep + "\t" + toString(i+1);
1068                                         rep = rep + "|" + toString(binsize);
1069                                         if (group != "") {
1070                                                 rep = rep + "|" + group;
1071                                         }
1072                                         out << ">" << rep << endl;
1073                                         out << sequence << endl;
1074                                 }else { //save them
1075                                         repStruct newRep(rep, i+1, binsize, group);
1076                                         reps.push_back(newRep);
1077                                 }
1078                         }else { 
1079                                 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
1080                         }
1081                 }
1082                 
1083                         
1084                 if (sorted != "") { //then sort them and print them
1085                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
1086                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
1087                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
1088                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
1089                         
1090                         //print them
1091                         for (int i = 0; i < reps.size(); i++) {
1092                                 string sequence = fasta->getSequence(reps[i].name);
1093                                 string outputName = reps[i].name + "\t" + toString(reps[i].bin);
1094                                 outputName = outputName + "|" + toString(reps[i].size);
1095                                 if (reps[i].group != "") {
1096                                         outputName = outputName + "|" + reps[i].group;
1097                                 }
1098                                 out << ">" << outputName << endl;
1099                                 out << sequence << endl;
1100                         }
1101                 }
1102                 
1103                 in.close();
1104                 out.close();
1105                 out2.close();
1106                 
1107                 m->mothurRemove(filename);
1108                 rename(tempNameFile.c_str(), filename.c_str());
1109         
1110         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1111                 
1112                 return 0;
1113
1114         }
1115         catch(exception& e) {
1116                 m->errorOut(e, "GetOTURepCommand", "processFastaNames");
1117                 exit(1);
1118         }
1119 }
1120 //**********************************************************************************************************************
1121 int GetOTURepCommand::processNames(string filename, string label) {
1122         try{
1123                 
1124                 //create output file
1125                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
1126                 
1127                 ofstream out2;
1128                 string tempNameFile = filename + ".temp";
1129                 m->openOutputFile(tempNameFile, out2);
1130                 
1131                 ifstream in;
1132                 m->openInputFile(filename, in);
1133                 
1134                 int i = 0;
1135                 string rep, binnames;
1136         
1137         string tempGroup = "";
1138         in >> tempGroup; m->gobble(in);
1139         
1140         CountTable thisCt;
1141         if (countfile != "") {
1142             thisCt.readTable(countfile);
1143             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1144         }
1145         
1146                 while (!in.eof()) {
1147                         if (m->control_pressed) { break; }
1148                         in >> i >> rep >> binnames; m->gobble(in);
1149             
1150                         if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1151             else {
1152                 vector<string> names;
1153                 m->splitAtComma(binnames, names);
1154                 if (tempGroup == "noGroup") {
1155                     for (int j = 0; j < names.size(); j++) {
1156                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1157                     }
1158                 }else {
1159                     int total = 0; 
1160                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1161                     out2 << rep << '\t' << total << '\t' << total << endl;
1162                 }
1163             }
1164
1165                 }
1166                 in.close();
1167                 out2.close();
1168                 
1169                 m->mothurRemove(filename);
1170                 rename(tempNameFile.c_str(), filename.c_str());
1171                 
1172         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1173         
1174                 return 0;
1175         }
1176         catch(exception& e) {
1177                 m->errorOut(e, "GetOTURepCommand", "processNames");
1178                 exit(1);
1179         }
1180 }
1181 //**********************************************************************************************************************
1182 SeqMap GetOTURepCommand::getMap(int row) {
1183         try {
1184                 SeqMap rowMap;
1185                 
1186                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
1187                 if (rowPositions[row] != -1){
1188                         //go to row in file
1189                         inRow.seekg(rowPositions[row]);
1190                         
1191                         int rowNum, numDists, colNum;
1192                         float dist;
1193                         
1194                         inRow >> rowNum >> numDists;
1195                         
1196                         for(int i = 0; i < numDists; i++) {
1197                                 inRow >> colNum >> dist;
1198                                 rowMap[colNum] = dist;
1199                                 
1200                         }
1201                 }
1202                 
1203                 return rowMap;
1204         }
1205         catch(exception& e) {
1206                 m->errorOut(e, "GetOTURepCommand", "getMap");
1207                 exit(1);
1208         }
1209 }
1210 //**********************************************************************************************************************
1211