]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
added sets to amova and homova commands. added oligos to make.contigs. added metadat...
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38
39 //**********************************************************************************************************************
40 vector<string> GetOTURepCommand::setParameters(){       
41         try {
42                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
43                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pfasta);
44                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
45         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
46         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "ColumnName",false,false); parameters.push_back(pcount);
47                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
48                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);
49                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
50                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
51                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
52                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
53                 CommandParameter pweighted("weighted", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pweighted);
54                 CommandParameter psorted("sorted", "Multiple", "none-name-bin-size-group", "none", "", "", "",false,false); parameters.push_back(psorted);
55                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
56                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
57                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
58                 
59                 vector<string> myArray;
60                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
61                 return myArray;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "GetOTURepCommand", "setParameters");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 string GetOTURepCommand::getHelpString(){       
70         try {
71                 string helpString = "";
72                 helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, count, large, weighted, cutoff, precision, groups, sorted and label.  The list parameter is required, as well as phylip or column and name, unless you have valid current files.\n";
73                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
74                 helpString += "The phylip or column parameter is required, but only one may be used.  If you use a column file the name or count filename is required. \n";
75                 helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
76                 helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
77                 helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n";
78                 helpString += "The default value for label is all labels in your inputfile.\n";
79                 helpString += "The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n";
80                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
81                 helpString += "The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true.  The default value is false.\n";
82                 helpString += "The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n";
83                 helpString += "For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n";
84                 helpString += "For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n";
85                 helpString += "If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n";
86                 helpString += "The group parameter allows you provide a group file.\n";
87                 helpString += "The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n";
88                 helpString += "The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n";
89                 helpString += "If you provide a groupfile, then it also appends the names of the groups present in that bin.\n";
90                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
91                 return helpString;
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "GetOTURepCommand", "getHelpString");
95                 exit(1);
96         }
97 }
98 //**********************************************************************************************************************
99 string GetOTURepCommand::getOutputFileNameTag(string type, string inputName=""){        
100         try {
101         string outputFileName = "";
102                 map<string, vector<string> >::iterator it;
103         
104         //is this a type this command creates
105         it = outputTypes.find(type);
106         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
107         else {
108             if (type == "fasta")            {   outputFileName =  "rep.fasta";   }
109             else if (type == "name")        {   outputFileName =  "rep.names";   }
110             else if (type == "count")        {   outputFileName =  "rep.count_table";   }
111             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
112         }
113         return outputFileName;
114         }
115         catch(exception& e) {
116                 m->errorOut(e, "GetOTURepCommand", "getOutputFileNameTag");
117                 exit(1);
118         }
119 }
120 //**********************************************************************************************************************
121 GetOTURepCommand::GetOTURepCommand(){   
122         try {
123                 abort = true; calledHelp = true; 
124                 setParameters();
125                 vector<string> tempOutNames;
126                 outputTypes["fasta"] = tempOutNames;
127                 outputTypes["name"] = tempOutNames;
128         outputTypes["count"] = tempOutNames;
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
132                 exit(1);
133         }
134 }
135 //**********************************************************************************************************************
136 GetOTURepCommand::GetOTURepCommand(string option)  {
137         try{
138                 abort = false; calledHelp = false;   
139                 allLines = 1;
140                                 
141                 //allow user to run help
142                 if (option == "help") { 
143                         help(); abort = true; calledHelp = true;
144                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;
145                 } else {
146                         vector<string> myArray = setParameters();
147                         
148                         OptionParser parser(option);
149                         map<string, string> parameters = parser.getParameters();
150                         
151                         ValidParameters validParameter;
152                         map<string, string>::iterator it;
153                 
154                         //check to make sure all parameters are valid for command
155                         for (it = parameters.begin(); it != parameters.end(); it++) { 
156                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
157                         }
158                         
159                         //initialize outputTypes
160                         vector<string> tempOutNames;
161                         outputTypes["fasta"] = tempOutNames;
162                         outputTypes["name"] = tempOutNames;
163             outputTypes["count"] = tempOutNames;
164                         
165                         //if the user changes the input directory command factory will send this info to us in the output parameter 
166                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
167                         if (inputDir == "not found"){   inputDir = "";          }
168                         else {
169                                 string path;
170                                 it = parameters.find("list");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
176                                 }
177                                 
178                                 it = parameters.find("fasta");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
184                                 }
185                                 
186                                 it = parameters.find("phylip");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
192                                 }
193                                 
194                                 it = parameters.find("column");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
200                                 }
201                                 
202                                 it = parameters.find("name");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
208                                 }
209                                 
210                                 it = parameters.find("group");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
216                                 }
217                 
218                 it = parameters.find("count");
219                                 //user has given a template file
220                                 if(it != parameters.end()){ 
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
224                                 }
225                         }
226
227                         
228                         //if the user changes the output directory command factory will send this info to us in the output parameter 
229                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
230                         
231                         //check for required parameters
232                         fastafile = validParameter.validFile(parameters, "fasta", true);
233                         if (fastafile == "not found") { fastafile = ""; }
234                         else if (fastafile == "not open") { abort = true; }     
235                         else { m->setFastaFile(fastafile); }
236                 
237                         listfile = validParameter.validFile(parameters, "list", true);
238                         if (listfile == "not found") {                  
239                                 listfile = m->getListFile(); 
240                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
241                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
242                         }
243                         else if (listfile == "not open") { abort = true; }      
244                         else { m->setListFile(listfile); }
245                         
246                         phylipfile = validParameter.validFile(parameters, "phylip", true);
247                         if (phylipfile == "not found") { phylipfile = "";  }
248                         else if (phylipfile == "not open") { abort = true; }    
249                         else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);   }
250                         
251                         columnfile = validParameter.validFile(parameters, "column", true);
252                         if (columnfile == "not found") { columnfile = ""; }
253                         else if (columnfile == "not open") { abort = true; }    
254                         else { distFile = columnfile; format = "column";  m->setColumnFile(columnfile); }
255                         
256                         namefile = validParameter.validFile(parameters, "name", true);
257                         if (namefile == "not open") { abort = true; }   
258                         else if (namefile == "not found") { namefile = ""; }
259                         else { m->setNameFile(namefile); }
260             
261             hasGroups = false;
262             countfile = validParameter.validFile(parameters, "count", true);
263                         if (countfile == "not found") { countfile =  "";   }
264                         else if (countfile == "not open") { abort = true; countfile =  ""; }    
265                         else {   
266                 m->setCountTableFile(countfile); 
267                 ct.readTable(countfile);
268                 if (ct.hasGroupInfo()) { hasGroups = true; }
269             }
270             
271             if ((namefile != "") && (countfile != "")) {
272                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
273             }
274             
275             if ((groupfile != "") && (countfile != "")) {
276                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
277             }
278                         
279                         if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
280                                 //give priority to column, then phylip
281                                 columnfile = m->getColumnFile(); 
282                                 if (columnfile != "") {  distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
283                                 else { 
284                                         phylipfile = m->getPhylipFile(); 
285                                         if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
286                                         else { 
287                                                 m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the get.oturep command."); m->mothurOutEndLine(); 
288                                                 abort = true;
289                                         }
290                                 }
291                         }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
292                 
293                         if (columnfile != "") {  
294                                 if ((namefile == "") && (countfile == "")) { 
295                                         namefile = m->getNameFile(); 
296                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
297                                         else { 
298                                                 countfile = m->getCountTableFile();
299                         if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
300                         else { 
301                             m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine(); 
302                             abort = true; 
303                         }       
304                                         }       
305                                 }
306                         }
307
308                         //check for optional parameter and set defaults
309                         // ...at some point should added some additional type checking...
310                         label = validParameter.validFile(parameters, "label", false);                   
311                         if (label == "not found") { label = ""; allLines = 1;  }
312                         else { 
313                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
314                                 else { allLines = 1;  }
315                         }
316                         
317                         groupfile = validParameter.validFile(parameters, "group", true);
318                         if (groupfile == "not open") { groupfile = ""; abort = true; }
319                         else if (groupfile == "not found") { groupfile = ""; }
320                         else { m->setGroupFile(groupfile); }
321                         
322                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
323                         if (sorted == "none") { sorted=""; }
324                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
325                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
326                                 sorted = "";
327                         }
328                         
329                         if ((sorted == "group") && ((groupfile == "")&& !hasGroups)) {
330                                 m->mothurOut("You must provide a groupfile or have a count file with group info to sort by group. I will not sort."); m->mothurOutEndLine();
331                                 sorted = "";
332                         }
333                         
334                         groups = validParameter.validFile(parameters, "groups", false);                 
335                         if (groups == "not found") { groups = ""; }
336                         else { 
337                                 if ((groupfile == "") && (!hasGroups)) {
338                                         m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
339                                         abort = true;
340                                 }else { 
341                                         m->splitAtDash(groups, Groups);
342                                 }
343                         }
344                         m->setGroups(Groups);
345                         
346                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
347                         large = m->isTrue(temp);
348                         
349                         temp = validParameter.validFile(parameters, "weighted", false);         if (temp == "not found") {       temp = "f";    }
350                         weighted = m->isTrue(temp);
351                         
352                         if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; }
353                         
354                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
355                         m->mothurConvert(temp, precision); 
356                         
357                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
358                         m->mothurConvert(temp, cutoff); 
359                         cutoff += (5 / (precision * 10.0));
360                 }
361         }
362         catch(exception& e) {
363                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
364                 exit(1);
365         }
366 }
367
368 //**********************************************************************************************************************
369
370 int GetOTURepCommand::execute(){
371         try {
372         
373                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
374                 int error;
375                 list = NULL;
376                 
377                 readDist();             
378                                 
379                 if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  } return 0; }
380                 
381                 if (groupfile != "") {
382                         //read in group map info.
383                         groupMap = new GroupMap(groupfile);
384                         int error = groupMap->readMap();
385                         if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
386                         
387                         if (Groups.size() != 0) {
388                                 SharedUtil util;
389                                 vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
390                                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
391                                 groupMap->setNamesOfGroups(gNamesOfGroups);
392                         }
393                 }else if (hasGroups) {
394             if (Groups.size() != 0) {
395                                 SharedUtil util;
396                                 vector<string> gNamesOfGroups = ct.getNamesOfGroups();
397                                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
398                         }
399         }
400                 
401                 //done with listvector from matrix
402                 if (list != NULL) { delete list; }
403                 
404                 input = new InputData(listfile, "list");
405                 list = input->getListVector();
406                 string lastLabel = list->getLabel();
407
408                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
409                 set<string> processedLabels;
410                 set<string> userLabels = labels;
411                 
412                 if (m->control_pressed) { 
413                         if (large) {  inRow.close(); m->mothurRemove(distFile);  }
414                         delete input; delete list; return 0; 
415                 }
416                 
417                 if ((!weighted) && (namefile != "")) { readNamesFile(weighted); }
418                 
419                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
420                         
421                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
422                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
423                                         error = process(list);
424                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
425                                         
426                                         if (m->control_pressed) { 
427                                                 if (large) {  inRow.close(); m->mothurRemove(distFile);  }
428                                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
429                                                 delete input; delete list; return 0; 
430                                         }
431                                         
432                                         processedLabels.insert(list->getLabel());
433                                         userLabels.erase(list->getLabel());
434                         }
435                         
436                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
437                                         string saveLabel = list->getLabel();
438                                         
439                                         delete list;
440                                         list = input->getListVector(lastLabel);
441                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
442                                         error = process(list);
443                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
444                                         
445                                         if (m->control_pressed) { 
446                                                 if (large) {  inRow.close(); m->mothurRemove(distFile);  }
447                                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
448                                                 delete input; delete list; return 0; 
449                                         }
450                                         
451                                         processedLabels.insert(list->getLabel());
452                                         userLabels.erase(list->getLabel());
453                                         
454                                         //restore real lastlabel to save below
455                                         list->setLabel(saveLabel);
456                         }
457                         
458                         lastLabel = list->getLabel();
459         
460                         delete list;
461                         list = input->getListVector();
462                 }
463                 
464                 //output error messages about any remaining user labels
465                 bool needToRun = false;
466                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
467                         m->mothurOut("Your file does not include the label " + (*it)); 
468                         if (processedLabels.count(lastLabel) != 1) {
469                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
470                                 needToRun = true;
471                         }else {
472                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
473                         }
474                 }
475                 
476                 //run last label if you need to
477                 if (needToRun == true)  {
478                         if (list != NULL) {     delete list;    }
479                         list = input->getListVector(lastLabel);
480                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
481                         error = process(list);
482                         delete list;
483                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
484                         
485                         if (m->control_pressed) { 
486                                         if (large) {  inRow.close(); m->mothurRemove(distFile);  }
487                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
488                                         delete input; delete list; return 0; 
489                         }
490                 }
491                 
492                 //close and remove formatted matrix file
493                 if (large) {
494                         inRow.close();
495                         m->mothurRemove(distFile);
496                 }
497                 
498                 delete input;  
499                 
500                 if (!weighted) { nameFileMap.clear(); }
501                 
502                                 
503                 if (fastafile != "") {
504                         //read fastafile
505                         fasta = new FastaMap();
506                         fasta->readFastaFile(fastafile);
507                         
508                         //if user gave a namesfile then use it
509                         if (namefile != "") {   readNamesFile();        }
510                         
511                         //output create and output the .rep.fasta files
512                         map<string, string>::iterator itNameFile;
513                         for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
514                                 processFastaNames(itNameFile->first, itNameFile->second);
515                         }
516                 }else {
517                         //output create and output the .rep.fasta files
518                         map<string, string>::iterator itNameFile;
519                         for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
520                                 processNames(itNameFile->first, itNameFile->second);
521                         }
522                 }
523                 
524                                 
525                 if (groupfile != "") { delete groupMap; }
526                 
527                 if (m->control_pressed) {  return 0; }
528                 
529                 //set fasta file as new current fastafile - use first one??
530                 string current = "";
531                 itTypes = outputTypes.find("fasta");
532                 if (itTypes != outputTypes.end()) {
533                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
534                 }
535                 
536                 itTypes = outputTypes.find("name");
537                 if (itTypes != outputTypes.end()) {
538                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
539                 }
540         
541         itTypes = outputTypes.find("count");
542                 if (itTypes != outputTypes.end()) {
543                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
544                 }
545                 
546                 m->mothurOutEndLine();
547                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
548                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
549                 m->mothurOutEndLine();
550                 
551                 return 0;
552         }
553         catch(exception& e) {
554                 m->errorOut(e, "GetOTURepCommand", "execute");
555                 exit(1);
556         }
557 }
558 //**********************************************************************************************************************
559 int GetOTURepCommand::readDist() {
560         try {
561         
562         if (!large) {
563                         //read distance files
564                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
565                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
566                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
567                         
568                         readMatrix->setCutoff(cutoff);
569             
570                         NameAssignment* nameMap = NULL;
571             if(namefile != ""){ 
572                 nameMap = new NameAssignment(namefile);
573                 nameMap->readMap();
574                 readMatrix->read(nameMap);
575             }else if (countfile != "") {
576                 readMatrix->read(&ct);
577             }else {
578                readMatrix->read(nameMap); 
579             }
580                         
581                         if (m->control_pressed) { delete readMatrix; return 0; }
582             
583                         list = readMatrix->getListVector();
584                         SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
585                         
586                         // Create a data structure to quickly access the distance information.
587                         // It consists of a vector of distance maps, where each map contains
588                         // all distances of a certain sequence. Vector and maps are accessed
589                         // via the index of a sequence in the distance matrix
590                         seqVec = vector<SeqMap>(list->size()); 
591             for (int i = 0; i < matrix->seqVec.size(); i++) {
592                 for (int j = 0; j < matrix->seqVec[i].size(); j++) {
593                     if (m->control_pressed) { delete readMatrix; return 0; }
594                     //already added everyone else in row
595                     if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
596                 }
597                         }
598                         //add dummy map for unweighted calc
599                         SeqMap dummy;
600                         seqVec.push_back(dummy);
601                         
602                         delete matrix;
603                         delete readMatrix;
604                         delete nameMap;
605                         
606                         if (m->control_pressed) { return 0; }
607                 }else {
608                         //process file and set up indexes
609                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
610                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
611                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
612                         
613                         formatMatrix->setCutoff(cutoff);
614             
615                         NameAssignment* nameMap = NULL;
616             if(namefile != ""){ 
617                 nameMap = new NameAssignment(namefile);
618                 nameMap->readMap();
619                 formatMatrix->read(nameMap);
620             }else if (countfile != "") {
621                 formatMatrix->read(&ct);
622             }else {
623                 formatMatrix->read(nameMap); 
624             }
625                         
626                         if (m->control_pressed) { delete formatMatrix;  return 0; }
627             
628                         list = formatMatrix->getListVector();
629                         distFile = formatMatrix->getFormattedFileName();
630                         
631                         //positions in file where the distances for each sequence begin
632                         //rowPositions[1] = position in file where distance related to sequence 1 start.
633                         rowPositions = formatMatrix->getRowPositions();
634                         rowPositions.push_back(-1); //dummy row for unweighted calc
635                         
636                         delete formatMatrix;
637                         delete nameMap;
638                         
639                         //openfile for getMap to use
640                         m->openInputFile(distFile, inRow);
641                         
642                         if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
643                 }
644                 
645                 
646                 //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
647                 if (list != NULL) {
648                         vector<string> names;
649                         string binnames;
650                         //map names to rows in sparsematrix
651                         for (int i = 0; i < list->size(); i++) {
652                                 names.clear();
653                                 binnames = list->get(i);
654                                 
655                                 m->splitAtComma(binnames, names);
656                                 
657                                 for (int j = 0; j < names.size(); j++) {
658                                         nameToIndex[names[j]] = i;
659                                 }
660                         }
661                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
662
663         if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  }return 0; }
664         
665         return 0;
666     }
667         catch(exception& e) {
668                 m->errorOut(e, "GetOTURepCommand", "execute");
669                 exit(1);
670         }
671 }
672 //**********************************************************************************************************************
673 void GetOTURepCommand::readNamesFile() {
674         try {
675                 ifstream in;
676                 vector<string> dupNames;
677                 m->openInputFile(namefile, in);
678                 
679                 string name, names, sequence;
680         
681                 while(!in.eof()){
682                         in >> name;                     //read from first column  A
683                         in >> names;            //read from second column  A,B,C,D
684                         
685                         dupNames.clear();
686                         
687                         //parse names into vector
688                         m->splitAtComma(names, dupNames);
689                         
690                         //store names in fasta map
691                         sequence = fasta->getSequence(name);
692                         for (int i = 0; i < dupNames.size(); i++) {
693                                 fasta->push_back(dupNames[i], sequence);
694                         }
695                 
696                         m->gobble(in);
697                 }
698                 in.close();
699
700         }
701         catch(exception& e) {
702                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
703                 exit(1);
704         }
705 }
706 //**********************************************************************************************************************
707 //read names file to find the weighted rep for each bin
708 void GetOTURepCommand::readNamesFile(bool w) {
709         try {
710                 ifstream in;
711                 vector<string> dupNames;
712                 m->openInputFile(namefile, in);
713                 
714                 string name, names, sequence;
715                 
716                 while(!in.eof()){
717                         in >> name;     m->gobble(in);          //read from first column  A
718                         in >> names;                                                    //read from second column  A,B,C,D
719                         
720                         dupNames.clear();
721                         
722                         //parse names into vector
723                         m->splitAtComma(names, dupNames);
724                         
725                         for (int i = 0; i < dupNames.size(); i++) {
726                                 nameFileMap[dupNames[i]] = name;
727                         }
728                         
729                         m->gobble(in);
730                 }
731                 in.close();
732                 
733         }
734         catch(exception& e) {
735                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
736                 exit(1);
737         }
738 }
739 //**********************************************************************************************************************
740 string GetOTURepCommand::findRep(vector<string> names, string group) {
741         try{
742                 // if only 1 sequence in bin or processing the "unique" label, then 
743                 // the first sequence of the OTU is the representative one
744                 if ((names.size() == 1)) {
745                         return names[0];
746                 }else{
747                         vector<int> seqIndex; //(names.size());
748                         map<string, string>::iterator itNameFile;
749                         map<string, int>::iterator itNameIndex;
750
751                         //fill seqIndex and initialize sums
752                         for (size_t i = 0; i < names.size(); i++) {
753                                 if (weighted) {
754                                         seqIndex.push_back(nameToIndex[names[i]]);
755                     if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
756                         int numRep = 0;
757                         if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
758                         else { numRep = ct.getGroupCount(names[i]);  }
759                         for (int j = 1; j < numRep; j++) { //don't add yourself again
760                             seqIndex.push_back(nameToIndex[names[i]]);
761                         }
762                     }
763                                 }else { 
764                                         if (namefile == "") {
765                                                 itNameIndex = nameToIndex.find(names[i]);
766                                                 
767                                                 if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
768                                                         if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
769                                                         else {  seqIndex.push_back((seqVec.size()-1)); }
770                                                 }else {
771                                                         seqIndex.push_back(itNameIndex->second);
772                                                 }
773                                                 
774                                         }else {
775                                                 itNameFile = nameFileMap.find(names[i]);
776                                                 
777                                                 if (itNameFile == nameFileMap.end()) {
778                                                         m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; 
779                                                 }else{
780                                                         string name1 = itNameFile->first;
781                                                         string name2 = itNameFile->second;
782                                                         
783                                                         if (name1 == name2) { //then you are unique so add your real dists
784                                                                 seqIndex.push_back(nameToIndex[names[i]]);
785                                                         }else { //add dummy
786                                                                 if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
787                                                                 else {  seqIndex.push_back((seqVec.size()-1)); }
788                                                         }
789                                                 }
790                                         }
791                                 }
792                         }
793             
794             vector<float> max_dist(seqIndex.size(), 0.0);
795                         vector<float> total_dist(seqIndex.size(), 0.0);
796                         
797                         // loop through all entries in seqIndex
798                         SeqMap::iterator it;
799                         SeqMap currMap;
800                         for (size_t i=0; i < seqIndex.size(); i++) {
801                                 if (m->control_pressed) {  return  "control"; }
802                         
803                                 if (!large) {   currMap = seqVec[seqIndex[i]];  }
804                                 else            {       currMap = getMap(seqIndex[i]);  }
805                                 
806                                 for (size_t j=0; j < seqIndex.size(); j++) {
807                                         it = currMap.find(seqIndex[j]);         
808                                         if (it != currMap.end()) {
809                                                 max_dist[i] = max(max_dist[i], it->second);
810                                                 max_dist[j] = max(max_dist[j], it->second);
811                                                 total_dist[i] += it->second;
812                                                 total_dist[j] += it->second;
813                                         }else{ //if you can't find the distance make it the cutoff
814                                                 max_dist[i] = max(max_dist[i], cutoff);
815                                                 max_dist[j] = max(max_dist[j], cutoff);
816                                                 total_dist[i] += cutoff;
817                                                 total_dist[j] += cutoff;
818                                         }
819                                 }
820                         }
821                         
822                         // sequence with the smallest maximum distance is the representative
823                         //if tie occurs pick sequence with smallest average distance
824                         float min = 10000;
825                         int minIndex;
826                         for (size_t i=0; i < max_dist.size(); i++) {
827                                 if (m->control_pressed) {  return  "control"; }
828                                 if (max_dist[i] < min) {
829                                         min = max_dist[i];
830                                         minIndex = i;
831                                 }else if (max_dist[i] == min) {
832                                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
833                                         float newAverage = total_dist[i] / (float) total_dist.size();
834                                         
835                                         if (newAverage < currentAverage) {
836                                                 min = max_dist[i];
837                                                 minIndex = i;
838                                         }
839                                 }
840                         }
841                         
842                         return(names[minIndex]);
843                 }
844         }
845         catch(exception& e) {
846                 m->errorOut(e, "GetOTURepCommand", "FindRep");
847                 exit(1);
848         }
849 }
850
851 //**********************************************************************************************************************
852 int GetOTURepCommand::process(ListVector* processList) {
853         try{
854                 string name, sequence;
855                 string nameRep;
856
857                 //create output file
858                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
859                                 
860                 ofstream newNamesOutput;
861                 string outputNamesFile;
862                 map<string, ofstream*> filehandles;
863                 
864                 if (Groups.size() == 0) { //you don't want to use groups
865                         outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".";
866             if (countfile == "") { 
867                 outputNamesFile += getOutputFileNameTag("name");
868                 outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
869             }else {
870                 outputNamesFile += getOutputFileNameTag("count");
871                 outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
872             }
873                         outputNameFiles[outputNamesFile] = processList->getLabel();
874             m->openOutputFile(outputNamesFile, newNamesOutput);
875             newNamesOutput << "noGroup" << endl;
876                 }else{ //you want to use groups
877                         ofstream* temp;
878                         for (int i=0; i<Groups.size(); i++) {
879                                 temp = new ofstream;
880                                 filehandles[Groups[i]] = temp;
881                                 outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".";
882                 if (countfile == "") { 
883                     outputNamesFile += getOutputFileNameTag("name");
884                     outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
885                 }else {
886                     outputNamesFile += getOutputFileNameTag("count");
887                     outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
888                 }
889                                 
890                                 m->openOutputFile(outputNamesFile, *(temp));
891                 *(temp) << Groups[i] << endl;
892                                 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
893                         }
894                 }
895                 
896                 //for each bin in the list vector
897                 for (int i = 0; i < processList->size(); i++) {
898                         if (m->control_pressed) { 
899                                 out.close();  
900                                 if (Groups.size() == 0) { //you don't want to use groups
901                                         newNamesOutput.close();
902                                 }else{
903                                         for (int j=0; j<Groups.size(); j++) {
904                                                 (*(filehandles[Groups[j]])).close();
905                                                 delete filehandles[Groups[j]];
906                                         }
907                                 }
908                                 return 0; 
909                         }
910                         
911                         string temp = processList->get(i);
912                         vector<string> namesInBin;
913                         m->splitAtComma(temp, namesInBin);
914                         
915                         if (Groups.size() == 0) {
916                                 nameRep = findRep(namesInBin, "");
917                                 newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
918                         }else{
919                                 map<string, vector<string> > NamesInGroup;
920                                 for (int j=0; j<Groups.size(); j++) { //initialize groups
921                                         NamesInGroup[Groups[j]].resize(0);
922                                 }
923                                 
924                                 for (int j=0; j<namesInBin.size(); j++) {
925                     if (groupfile != "") {
926                         string thisgroup = groupMap->getGroup(namesInBin[j]);
927                         if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
928                         
929                         //add this name to correct group
930                         if (m->inUsersGroups(thisgroup, Groups)) { NamesInGroup[thisgroup].push_back(namesInBin[j]);  }
931                     }else {
932                         vector<string> thisSeqsGroups = ct.getGroups(namesInBin[j]);
933                         for (int k = 0; k < thisSeqsGroups.size(); k++) {
934                             if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { NamesInGroup[thisSeqsGroups[k]].push_back(namesInBin[j]);  }
935                         }
936                     }
937                                 }
938                                 
939                                 //get rep for each group in otu
940                                 for (int j=0; j<Groups.size(); j++) {
941                                         if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
942                                                 //get rep for each group
943                                                 nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
944                                                 
945                                                 //output group rep and other members of this group
946                                                 (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
947                                                 
948                                                 for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
949                                                         (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][k] << ",";
950                                                 }
951                                                 //output last name
952                                                 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] << endl;
953                                         }
954                                 }
955                         }
956                 }
957                 
958                 if (Groups.size() == 0) { //you don't want to use groups
959                         newNamesOutput.close();
960                 }else{
961                         for (int i=0; i<Groups.size(); i++) {
962                                 (*(filehandles[Groups[i]])).close();
963                                 delete filehandles[Groups[i]];
964                         }
965                 }
966                 
967                 return 0;
968
969         }
970         catch(exception& e) {
971                 m->errorOut(e, "GetOTURepCommand", "process");
972                 exit(1);
973         }
974 }
975 //**********************************************************************************************************************
976 int GetOTURepCommand::processFastaNames(string filename, string label) {
977         try{
978
979                 //create output file
980                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
981                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + "." + getOutputFileNameTag("fasta");
982                 m->openOutputFile(outputFileName, out);
983                 vector<repStruct> reps;
984                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
985                 
986                 ofstream out2;
987                 string tempNameFile = filename + ".temp";
988                 m->openOutputFile(tempNameFile, out2);
989             
990                 ifstream in;
991                 m->openInputFile(filename, in);
992                 
993                 int i = 0;
994         string tempGroup = "";
995         in >> tempGroup; m->gobble(in);
996         
997         CountTable thisCt;
998         if (countfile != "") {
999             thisCt.readTable(countfile);
1000             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1001         }
1002     
1003         int thistotal = 0;
1004                 while (!in.eof()) {
1005                         string rep, binnames;
1006                         in >> i >> rep >> binnames; m->gobble(in);
1007                         
1008                         vector<string> names;
1009                         m->splitAtComma(binnames, names);
1010                         int binsize = names.size();
1011             
1012             if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1013             else {
1014                 if (tempGroup == "noGroup") {
1015                     for (int j = 0; j < names.size(); j++) {
1016                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1017                     }
1018                     binsize = thisCt.getNumSeqs(rep);
1019                 }else {
1020                     int total = 0; 
1021                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1022                     out2 << rep << '\t' << total << '\t' << total << endl;
1023                     binsize = total;
1024                 }
1025             }
1026                         thistotal += binsize;
1027                         //if you have a groupfile
1028                         string group = "";
1029             map<string, string> groups;
1030             map<string, string>::iterator groupIt;
1031                         if (groupfile != "") {
1032                                 //find the groups that are in this bin
1033                                 for (int i = 0; i < names.size(); i++) {
1034                                         string groupName = groupMap->getGroup(names[i]);
1035                                         if (groupName == "not found") {  
1036                                                 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
1037                                                 groupError = true;
1038                                         } else {
1039                                                 groups[groupName] = groupName;
1040                                         }
1041                                 }
1042                                 
1043                                 //turn the groups into a string
1044                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1045                                         group += groupIt->first + "-";
1046                                 }
1047                                 //rip off last dash
1048                                 group = group.substr(0, group.length()-1);
1049                         }else if (hasGroups) {
1050                 map<string, string> groups;
1051                 for (int i = 0; i < names.size(); i++) {
1052                     vector<string> thisSeqsGroups = ct.getGroups(names[i]);
1053                     for (int j = 0; j < thisSeqsGroups.size(); j++) { groups[thisSeqsGroups[j]] = thisSeqsGroups[j]; }
1054                 }
1055                 //turn the groups into a string
1056                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1057                                         group += groupIt->first + "-";
1058                                 }
1059                                 //rip off last dash
1060                                 group = group.substr(0, group.length()-1);
1061                 //cout << group << endl;
1062             }
1063             else{ group = ""; }
1064
1065                         
1066                         //print out name and sequence for that bin
1067                         string sequence = fasta->getSequence(rep);
1068
1069                         if (sequence != "not found") {
1070                                 if (sorted == "") { //print them out
1071                                         rep = rep + "\t" + toString(i+1);
1072                                         rep = rep + "|" + toString(binsize);
1073                                         if (group != "") {
1074                                                 rep = rep + "|" + group;
1075                                         }
1076                                         out << ">" << rep << endl;
1077                                         out << sequence << endl;
1078                                 }else { //save them
1079                                         repStruct newRep(rep, i+1, binsize, group);
1080                                         reps.push_back(newRep);
1081                                 }
1082                         }else { 
1083                                 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
1084                         }
1085                 }
1086                 
1087                         
1088                 if (sorted != "") { //then sort them and print them
1089                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
1090                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
1091                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
1092                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
1093                         
1094                         //print them
1095                         for (int i = 0; i < reps.size(); i++) {
1096                                 string sequence = fasta->getSequence(reps[i].name);
1097                                 string outputName = reps[i].name + "\t" + toString(reps[i].bin);
1098                                 outputName = outputName + "|" + toString(reps[i].size);
1099                                 if (reps[i].group != "") {
1100                                         outputName = outputName + "|" + reps[i].group;
1101                                 }
1102                                 out << ">" << outputName << endl;
1103                                 out << sequence << endl;
1104                         }
1105                 }
1106                 
1107                 in.close();
1108                 out.close();
1109                 out2.close();
1110                 
1111                 m->mothurRemove(filename);
1112                 rename(tempNameFile.c_str(), filename.c_str());
1113         
1114         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1115                 
1116                 return 0;
1117
1118         }
1119         catch(exception& e) {
1120                 m->errorOut(e, "GetOTURepCommand", "processFastaNames");
1121                 exit(1);
1122         }
1123 }
1124 //**********************************************************************************************************************
1125 int GetOTURepCommand::processNames(string filename, string label) {
1126         try{
1127                 
1128                 //create output file
1129                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
1130                 
1131                 ofstream out2;
1132                 string tempNameFile = filename + ".temp";
1133                 m->openOutputFile(tempNameFile, out2);
1134                 
1135                 ifstream in;
1136                 m->openInputFile(filename, in);
1137                 
1138                 int i = 0;
1139                 string rep, binnames;
1140         
1141         string tempGroup = "";
1142         in >> tempGroup; m->gobble(in);
1143         
1144         CountTable thisCt;
1145         if (countfile != "") {
1146             thisCt.readTable(countfile);
1147             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1148         }
1149         
1150                 while (!in.eof()) {
1151                         if (m->control_pressed) { break; }
1152                         in >> i >> rep >> binnames; m->gobble(in);
1153             
1154                         if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1155             else {
1156                 vector<string> names;
1157                 m->splitAtComma(binnames, names);
1158                 if (tempGroup == "noGroup") {
1159                     for (int j = 0; j < names.size(); j++) {
1160                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1161                     }
1162                 }else {
1163                     int total = 0; 
1164                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1165                     out2 << rep << '\t' << total << '\t' << total << endl;
1166                 }
1167             }
1168
1169                 }
1170                 in.close();
1171                 out2.close();
1172                 
1173                 m->mothurRemove(filename);
1174                 rename(tempNameFile.c_str(), filename.c_str());
1175                 
1176         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1177         
1178                 return 0;
1179         }
1180         catch(exception& e) {
1181                 m->errorOut(e, "GetOTURepCommand", "processNames");
1182                 exit(1);
1183         }
1184 }
1185 //**********************************************************************************************************************
1186 SeqMap GetOTURepCommand::getMap(int row) {
1187         try {
1188                 SeqMap rowMap;
1189                 
1190                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
1191                 if (rowPositions[row] != -1){
1192                         //go to row in file
1193                         inRow.seekg(rowPositions[row]);
1194                         
1195                         int rowNum, numDists, colNum;
1196                         float dist;
1197                         
1198                         inRow >> rowNum >> numDists;
1199                         
1200                         for(int i = 0; i < numDists; i++) {
1201                                 inRow >> colNum >> dist;
1202                                 rowMap[colNum] = dist;
1203                                 
1204                         }
1205                 }
1206                 
1207                 return rowMap;
1208         }
1209         catch(exception& e) {
1210                 m->errorOut(e, "GetOTURepCommand", "getMap");
1211                 exit(1);
1212         }
1213 }
1214 //**********************************************************************************************************************
1215