]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38
39 //**********************************************************************************************************************
40 vector<string> GetOTURepCommand::setParameters(){       
41         try {
42                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","name",false,true, true); parameters.push_back(plist);
43                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,false, true); parameters.push_back(pfasta);
44                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false, true); parameters.push_back(pphylip);
45         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","",false,false, true); parameters.push_back(pname);
46         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "ColumnName","count",false,false, true); parameters.push_back(pcount);
47                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false, true); parameters.push_back(pgroup);
48                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName","",false,false, true); parameters.push_back(pcolumn);
49                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
50                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
51                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pcutoff);
52                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
53                 CommandParameter pweighted("weighted", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pweighted);
54                 CommandParameter psorted("sorted", "Multiple", "none-name-bin-size-group", "none", "", "", "","",false,false); parameters.push_back(psorted);
55         CommandParameter pmethod("method", "Multiple", "distance-abundance", "distance", "", "", "","",false,false); parameters.push_back(pmethod);
56                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plarge);
57                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
58                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
59                 
60                 vector<string> myArray;
61                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
62                 return myArray;
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "GetOTURepCommand", "setParameters");
66                 exit(1);
67         }
68 }
69 //**********************************************************************************************************************
70 string GetOTURepCommand::getHelpString(){       
71         try {
72                 string helpString = "";
73                 helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, count, large, weighted, cutoff, precision, groups, sorted, method and label.  The list parameter is required, as well as phylip or column and name if you are using method=distance. If method=abundance a name or count file is required.\n";
74                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
75                 helpString += "The phylip or column parameter is required for method=distance, but only one may be used.  If you use a column file the name or count filename is required. \n";
76         helpString += "The method parameter allows you to select the method of selecting the representative sequence. Choices are distance and abundance.  The distance method finds the sequence with the smallest maximum distance to the other sequences. If tie occurs the sequence with smallest average distance is selected.  The abundance method chooses the most abundant sequence in the OTU as the representative.\n";
77                 helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
78                 helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
79                 helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n";
80                 helpString += "The default value for label is all labels in your inputfile.\n";
81                 helpString += "The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n";
82                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
83                 helpString += "The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true.  The default value is false.\n";
84                 helpString += "The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n";
85                 helpString += "For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n";
86                 helpString += "For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n";
87                 helpString += "If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n";
88                 helpString += "The group parameter allows you provide a group file.\n";
89                 helpString += "The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n";
90                 helpString += "The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n";
91                 helpString += "If you provide a groupfile, then it also appends the names of the groups present in that bin.\n";
92                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "GetOTURepCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string GetOTURepCommand::getOutputPattern(string type) {
102     try {
103         string pattern = "";
104         
105         if (type == "fasta") {  pattern = "[filename],[tag],rep.fasta-[filename],[tag],[group],rep.fasta"; } 
106         else if (type == "name") {  pattern = "[filename],[tag],rep.names-[filename],[tag],[group],rep.names"; } 
107         else if (type == "count") {  pattern = "[filename],[tag],rep.count_table-[filename],[tag],[group],rep.count_table"; }
108         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
109         
110         return pattern;
111     }
112     catch(exception& e) {
113         m->errorOut(e, "GetOTURepCommand", "getOutputPattern");
114         exit(1);
115     }
116 }
117 //**********************************************************************************************************************
118 GetOTURepCommand::GetOTURepCommand(){   
119         try {
120                 abort = true; calledHelp = true; 
121                 setParameters();
122                 vector<string> tempOutNames;
123                 outputTypes["fasta"] = tempOutNames;
124                 outputTypes["name"] = tempOutNames;
125         outputTypes["count"] = tempOutNames;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
129                 exit(1);
130         }
131 }
132 //**********************************************************************************************************************
133 GetOTURepCommand::GetOTURepCommand(string option)  {
134         try{
135                 abort = false; calledHelp = false;   
136                 allLines = 1;
137                                 
138                 //allow user to run help
139                 if (option == "help") { 
140                         help(); abort = true; calledHelp = true;
141                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;
142                 } else {
143                         vector<string> myArray = setParameters();
144                         
145                         OptionParser parser(option);
146                         map<string, string> parameters = parser.getParameters();
147                         
148                         ValidParameters validParameter;
149                         map<string, string>::iterator it;
150                 
151                         //check to make sure all parameters are valid for command
152                         for (it = parameters.begin(); it != parameters.end(); it++) { 
153                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
154                         }
155                         
156                         //initialize outputTypes
157                         vector<string> tempOutNames;
158                         outputTypes["fasta"] = tempOutNames;
159                         outputTypes["name"] = tempOutNames;
160             outputTypes["count"] = tempOutNames;
161                         
162                         //if the user changes the input directory command factory will send this info to us in the output parameter 
163                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
164                         if (inputDir == "not found"){   inputDir = "";          }
165                         else {
166                                 string path;
167                                 it = parameters.find("list");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
173                                 }
174                                 
175                                 it = parameters.find("fasta");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
181                                 }
182                                 
183                                 it = parameters.find("phylip");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
189                                 }
190                                 
191                                 it = parameters.find("column");
192                                 //user has given a template file
193                                 if(it != parameters.end()){ 
194                                         path = m->hasPath(it->second);
195                                         //if the user has not given a path then, add inputdir. else leave path alone.
196                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
197                                 }
198                                 
199                                 it = parameters.find("name");
200                                 //user has given a template file
201                                 if(it != parameters.end()){ 
202                                         path = m->hasPath(it->second);
203                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
205                                 }
206                                 
207                                 it = parameters.find("group");
208                                 //user has given a template file
209                                 if(it != parameters.end()){ 
210                                         path = m->hasPath(it->second);
211                                         //if the user has not given a path then, add inputdir. else leave path alone.
212                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
213                                 }
214                 
215                 it = parameters.find("count");
216                                 //user has given a template file
217                                 if(it != parameters.end()){ 
218                                         path = m->hasPath(it->second);
219                                         //if the user has not given a path then, add inputdir. else leave path alone.
220                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
221                                 }
222                         }
223
224                         
225                         //if the user changes the output directory command factory will send this info to us in the output parameter 
226                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
227                         
228                         //check for required parameters
229                         fastafile = validParameter.validFile(parameters, "fasta", true);
230                         if (fastafile == "not found") { fastafile = ""; }
231                         else if (fastafile == "not open") { abort = true; }     
232                         else { m->setFastaFile(fastafile); }
233                 
234                         listfile = validParameter.validFile(parameters, "list", true);
235                         if (listfile == "not found") {                  
236                                 listfile = m->getListFile(); 
237                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
238                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
239                         }
240                         else if (listfile == "not open") { abort = true; }      
241                         else { m->setListFile(listfile); }
242                         
243                         phylipfile = validParameter.validFile(parameters, "phylip", true);
244                         if (phylipfile == "not found") { phylipfile = "";  }
245                         else if (phylipfile == "not open") { abort = true; }    
246                         else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);   }
247                         
248                         columnfile = validParameter.validFile(parameters, "column", true);
249                         if (columnfile == "not found") { columnfile = ""; }
250                         else if (columnfile == "not open") { abort = true; }    
251                         else { distFile = columnfile; format = "column";  m->setColumnFile(columnfile); }
252                         
253                         namefile = validParameter.validFile(parameters, "name", true);
254                         if (namefile == "not open") { abort = true; }   
255                         else if (namefile == "not found") { namefile = ""; }
256                         else { m->setNameFile(namefile); }
257             
258             hasGroups = false;
259             countfile = validParameter.validFile(parameters, "count", true);
260                         if (countfile == "not found") { countfile =  "";   }
261                         else if (countfile == "not open") { abort = true; countfile =  ""; }    
262                         else {   
263                 m->setCountTableFile(countfile); 
264                 ct.readTable(countfile, true);
265                 if (ct.hasGroupInfo()) { hasGroups = true; }
266             }
267             
268             groupfile = validParameter.validFile(parameters, "group", true);
269                         if (groupfile == "not open") { groupfile = ""; abort = true; }
270                         else if (groupfile == "not found") { groupfile = ""; }
271                         else { m->setGroupFile(groupfile); }
272                         
273             method = validParameter.validFile(parameters, "method", false);             if (method == "not found"){     method = "distance";    }
274                         if ((method != "distance") && (method != "abundance")) {
275                                 m->mothurOut(method + " is not a valid option for the method parameter. The only options are: distance and abundance, aborting."); m->mothurOutEndLine(); abort = true;
276                         }
277             
278             if (method == "distance") {
279                 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
280                     //give priority to column, then phylip
281                     columnfile = m->getColumnFile();
282                     if (columnfile != "") {  distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
283                     else {
284                         phylipfile = m->getPhylipFile();
285                         if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
286                         else {
287                             m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the get.oturep command."); m->mothurOutEndLine();
288                             abort = true;
289                         }
290                     }
291                 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
292                 
293                 if (columnfile != "") {
294                     if ((namefile == "") && (countfile == "")) {
295                         namefile = m->getNameFile();
296                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
297                         else {
298                             countfile = m->getCountTableFile();
299                             if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
300                             else {
301                                 m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine();
302                                 abort = true; 
303                             }   
304                         }       
305                     }
306                 }
307             }else if (method == "abundance") {
308                 if ((namefile == "") && (countfile == "")) {
309                                         namefile = m->getNameFile();
310                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
311                                         else {
312                                                 countfile = m->getCountTableFile();
313                         if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
314                         else {
315                             m->mothurOut("You need to provide a namefile or countfile if you are going to use the abundance method."); m->mothurOutEndLine();
316                             abort = true;
317                         }
318                                         }
319                                 }
320                 if ((phylipfile != "") || (columnfile != "")) {
321                     m->mothurOut("[WARNING]: A phylip or column file is not needed to use the abundance method, ignoring."); m->mothurOutEndLine();
322                     phylipfile = ""; columnfile = "";
323                 }
324             }
325             
326             if ((namefile != "") && (countfile != "")) {
327                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
328             }
329             
330             if ((groupfile != "") && (countfile != "")) {
331                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
332             }
333
334         
335                         //check for optional parameter and set defaults
336                         // ...at some point should added some additional type checking...
337                         label = validParameter.validFile(parameters, "label", false);                   
338                         if (label == "not found") { label = ""; allLines = 1;  }
339                         else { 
340                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
341                                 else { allLines = 1;  }
342                         }
343                         
344                                                 
345                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
346                         if (sorted == "none") { sorted=""; }
347                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
348                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
349                                 sorted = "";
350                         }
351             
352             
353                         
354                         if ((sorted == "group") && ((groupfile == "")&& !hasGroups)) {
355                                 m->mothurOut("You must provide a groupfile or have a count file with group info to sort by group. I will not sort."); m->mothurOutEndLine();
356                                 sorted = "";
357                         }
358                         
359                         groups = validParameter.validFile(parameters, "groups", false);                 
360                         if (groups == "not found") { groups = ""; }
361                         else { 
362                                 if ((groupfile == "") && (!hasGroups)) {
363                                         m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
364                                         abort = true;
365                                 }else { 
366                                         m->splitAtDash(groups, Groups);
367                                 }
368                         }
369                         m->setGroups(Groups);
370                         
371                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
372                         large = m->isTrue(temp);
373                         
374                         temp = validParameter.validFile(parameters, "weighted", false);         if (temp == "not found") {       temp = "f";    }
375                         weighted = m->isTrue(temp);
376                         
377                         if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; }
378                         
379                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
380                         m->mothurConvert(temp, precision); 
381                         
382                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
383                         m->mothurConvert(temp, cutoff); 
384                         cutoff += (5 / (precision * 10.0));
385                 }
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
389                 exit(1);
390         }
391 }
392
393 //**********************************************************************************************************************
394
395 int GetOTURepCommand::execute(){
396         try {
397         
398                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
399                 int error;
400                 list = NULL;
401                 
402         if (method=="distance") {
403             readDist();
404             if ((!weighted) && (namefile != "")) { readNamesFile(weighted); }
405         }else {
406             //map name -> abundance for use if findRepAbund
407             if (namefile != "") { nameToIndex = m->readNames(namefile); }
408         }
409         
410         if (m->control_pressed) { if (method=="distance") { if (large) {  inRow.close(); m->mothurRemove(distFile);  } }return 0; }
411         
412         if (groupfile != "") {
413             //read in group map info.
414             groupMap = new GroupMap(groupfile);
415             int error = groupMap->readMap();
416             if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
417             
418             if (Groups.size() != 0) {
419                 SharedUtil util;
420                 vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
421                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
422                 groupMap->setNamesOfGroups(gNamesOfGroups);
423             }
424         }else if (hasGroups) {
425             if (Groups.size() != 0) {
426                 SharedUtil util;
427                 vector<string> gNamesOfGroups = ct.getNamesOfGroups();
428                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
429             }
430         }
431         
432         //done with listvector from matrix
433         if (list != NULL) { delete list; }
434         
435         InputData input(listfile, "list");
436         list = input.getListVector();
437         string lastLabel = list->getLabel();
438         
439         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
440         set<string> processedLabels;
441         set<string> userLabels = labels;
442         
443         if (m->control_pressed) { if (method=="distance") {  if (large) {  inRow.close(); m->mothurRemove(distFile);  } }  delete list; return 0; }
444         
445         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
446             
447             if (allLines == 1 || labels.count(list->getLabel()) == 1){
448                 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
449                 error = process(list);
450                 if (error == 1) { return 0; } //there is an error in hte input files, abort command
451                 
452                 if (m->control_pressed) {
453                     if (method=="distance") { if (large) {  inRow.close(); m->mothurRemove(distFile);  } }
454                     for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } outputTypes.clear();
455                     delete list; return 0;
456                 }
457                 
458                 processedLabels.insert(list->getLabel());
459                 userLabels.erase(list->getLabel());
460             }
461             
462             if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
463                 string saveLabel = list->getLabel();
464                 
465                 delete list;
466                 list = input.getListVector(lastLabel);
467                 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
468                 error = process(list);
469                 if (error == 1) { return 0; } //there is an error in hte input files, abort command
470                 
471                 if (m->control_pressed) {
472                     if (method=="distance") { if (large) {  inRow.close(); m->mothurRemove(distFile);  } }
473                     for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } outputTypes.clear();
474                     delete list; return 0;
475                 }
476                 
477                 processedLabels.insert(list->getLabel());
478                 userLabels.erase(list->getLabel());
479                 
480                 //restore real lastlabel to save below
481                 list->setLabel(saveLabel);
482             }
483             
484             lastLabel = list->getLabel();
485             
486             delete list;
487             list = input.getListVector();
488         }
489         
490         //output error messages about any remaining user labels
491         bool needToRun = false;
492         for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
493             m->mothurOut("Your file does not include the label " + (*it));
494             if (processedLabels.count(lastLabel) != 1) {
495                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
496                 needToRun = true;
497             }else {
498                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
499             }
500         }
501         
502         //run last label if you need to
503         if (needToRun == true)  {
504             if (list != NULL) { delete list;    }
505             list = input.getListVector(lastLabel);
506             m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
507             error = process(list);
508             delete list;
509             if (error == 1) { return 0; } //there is an error in hte input files, abort command
510             
511             if (m->control_pressed) {
512                 if (method=="distance") { if (large) {  inRow.close(); m->mothurRemove(distFile);  } }
513                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
514                 delete list; return 0;
515             }
516         }
517         
518         //close and remove formatted matrix file
519         if (method=="distance") { if (large) { inRow.close(); m->mothurRemove(distFile); } if (!weighted) { nameFileMap.clear(); } }
520          
521         if (fastafile != "") {
522             //read fastafile
523             FastaMap* fasta = new FastaMap();
524             fasta->readFastaFile(fastafile);
525             
526             //if user gave a namesfile then use it
527             if (namefile != "") {       readNamesFile(fasta);   }
528             
529             //output create and output the .rep.fasta files
530             map<string, string>::iterator itNameFile;
531             for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
532                 processFastaNames(itNameFile->first, itNameFile->second, fasta);
533             }
534             delete fasta;
535         }else {
536             //output create and output the .rep.fasta files
537             map<string, string>::iterator itNameFile;
538             for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
539                 processNames(itNameFile->first, itNameFile->second);
540             }
541         }
542         
543         
544         if (groupfile != "") { delete groupMap; }
545                 
546                 if (m->control_pressed) {  return 0; }
547                 
548                 //set fasta file as new current fastafile - use first one??
549                 string current = "";
550                 itTypes = outputTypes.find("fasta");
551                 if (itTypes != outputTypes.end()) {
552                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
553                 }
554                 
555                 itTypes = outputTypes.find("name");
556                 if (itTypes != outputTypes.end()) {
557                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
558                 }
559         
560         itTypes = outputTypes.find("count");
561                 if (itTypes != outputTypes.end()) {
562                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
563                 }
564                 
565                 m->mothurOutEndLine();
566                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
567                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
568                 m->mothurOutEndLine();
569                 
570                 return 0;
571         }
572         catch(exception& e) {
573                 m->errorOut(e, "GetOTURepCommand", "execute");
574                 exit(1);
575         }
576 }
577 //**********************************************************************************************************************
578 int GetOTURepCommand::readDist() {
579         try {
580         
581         if (!large) {
582                         //read distance files
583                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
584                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
585                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
586                         
587                         readMatrix->setCutoff(cutoff);
588             
589                         NameAssignment* nameMap = NULL;
590             if(namefile != ""){ 
591                 nameMap = new NameAssignment(namefile);
592                 nameMap->readMap();
593                 readMatrix->read(nameMap);
594             }else if (countfile != "") {
595                 readMatrix->read(&ct);
596             }else {
597                readMatrix->read(nameMap); 
598             }
599                         
600                         if (m->control_pressed) { delete readMatrix; return 0; }
601             
602                         list = readMatrix->getListVector();
603                         SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
604                         
605                         // Create a data structure to quickly access the distance information.
606                         // It consists of a vector of distance maps, where each map contains
607                         // all distances of a certain sequence. Vector and maps are accessed
608                         // via the index of a sequence in the distance matrix
609                         seqVec = vector<SeqMap>(list->size()); 
610             for (int i = 0; i < matrix->seqVec.size(); i++) {
611                 for (int j = 0; j < matrix->seqVec[i].size(); j++) {
612                     if (m->control_pressed) { delete readMatrix; return 0; }
613                     //already added everyone else in row
614                     if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
615                 }
616                         }
617                         //add dummy map for unweighted calc
618                         SeqMap dummy;
619                         seqVec.push_back(dummy);
620                         
621                         delete matrix;
622                         delete readMatrix;
623                         delete nameMap;
624                         
625                         if (m->control_pressed) { return 0; }
626                 }else {
627                         //process file and set up indexes
628                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
629                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
630                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
631                         
632                         formatMatrix->setCutoff(cutoff);
633             
634                         NameAssignment* nameMap = NULL;
635             if(namefile != ""){ 
636                 nameMap = new NameAssignment(namefile);
637                 nameMap->readMap();
638                 formatMatrix->read(nameMap);
639             }else if (countfile != "") {
640                 formatMatrix->read(&ct);
641             }else {
642                 formatMatrix->read(nameMap); 
643             }
644                         
645                         if (m->control_pressed) { delete formatMatrix;  return 0; }
646             
647                         list = formatMatrix->getListVector();
648                         distFile = formatMatrix->getFormattedFileName();
649                         
650                         //positions in file where the distances for each sequence begin
651                         //rowPositions[1] = position in file where distance related to sequence 1 start.
652                         rowPositions = formatMatrix->getRowPositions();
653                         rowPositions.push_back(-1); //dummy row for unweighted calc
654                         
655                         delete formatMatrix;
656                         delete nameMap;
657                         
658                         //openfile for getMap to use
659                         m->openInputFile(distFile, inRow);
660                         
661                         if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
662                 }
663                 
664                 
665                 //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
666                 if (list != NULL) {
667                         vector<string> names;
668                         string binnames;
669                         //map names to rows in sparsematrix
670                         for (int i = 0; i < list->size(); i++) {
671                                 names.clear();
672                                 binnames = list->get(i);
673                                 
674                                 m->splitAtComma(binnames, names);
675                                 
676                                 for (int j = 0; j < names.size(); j++) {
677                                         nameToIndex[names[j]] = i;
678                                 }
679                         }
680                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
681
682         if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  }return 0; }
683         
684         return 0;
685     }
686         catch(exception& e) {
687                 m->errorOut(e, "GetOTURepCommand", "readDist");
688                 exit(1);
689         }
690 }
691 //**********************************************************************************************************************
692 void GetOTURepCommand::readNamesFile(FastaMap*& fasta) {
693         try {
694                 ifstream in;
695                 vector<string> dupNames;
696                 m->openInputFile(namefile, in);
697                 
698                 string name, names, sequence;
699         
700                 while(!in.eof()){
701                         in >> name;                     //read from first column  A
702                         in >> names;            //read from second column  A,B,C,D
703                         
704                         dupNames.clear();
705                         
706                         //parse names into vector
707                         m->splitAtComma(names, dupNames);
708                         
709                         //store names in fasta map
710                         sequence = fasta->getSequence(name);
711                         for (int i = 0; i < dupNames.size(); i++) {
712                                 fasta->push_back(dupNames[i], sequence);
713                         }
714                 
715                         m->gobble(in);
716                 }
717                 in.close();
718
719         }
720         catch(exception& e) {
721                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
722                 exit(1);
723         }
724 }
725 //**********************************************************************************************************************
726 //read names file to find the weighted rep for each bin
727 void GetOTURepCommand::readNamesFile(bool w) {
728         try {
729                 ifstream in;
730                 vector<string> dupNames;
731                 m->openInputFile(namefile, in);
732                 
733                 string name, names, sequence;
734                 
735                 while(!in.eof()){
736                         in >> name;     m->gobble(in);          //read from first column  A
737                         in >> names;                                                    //read from second column  A,B,C,D
738                         
739                         dupNames.clear();
740                         
741                         //parse names into vector
742                         m->splitAtComma(names, dupNames);
743                         
744                         for (int i = 0; i < dupNames.size(); i++) {
745                                 nameFileMap[dupNames[i]] = name;
746                         }
747                         
748                         m->gobble(in);
749                 }
750                 in.close();
751                 
752         }
753         catch(exception& e) {
754                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
755                 exit(1);
756         }
757 }
758 //**********************************************************************************************************************
759 string GetOTURepCommand::findRepAbund(vector<string> names, string group) {
760         try{
761         vector<string> reps;
762         string rep = "notFound";
763         
764         if ((names.size() == 1)) {
765             return names[0];
766         }else{
767             //fill seqIndex and initialize sums
768             int maxAbund = 0;
769             for (int i = 0; i < names.size(); i++) {
770                 
771                 if (m->control_pressed) { return "control"; }
772                 
773                 if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
774                     int numRep = 0;
775                     if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
776                     else { numRep = ct.getGroupCount(names[i]);  }
777                     if (numRep > maxAbund) {
778                         reps.clear();
779                         reps.push_back(names[i]);
780                         maxAbund = numRep;
781                     }else if(numRep == maxAbund) { //tie
782                         reps.push_back(names[i]);
783                     }
784                 }else { //name file used, we assume list file contains all sequences
785                     map<string, int>::iterator itNameMap = nameToIndex.find(names[i]);
786                     if (itNameMap == nameToIndex.end()) {} //assume that this sequence is not a unique
787                     else {
788                         if (itNameMap->second > maxAbund) {
789                             reps.clear();
790                             reps.push_back(names[i]);
791                             maxAbund = itNameMap->second;
792                         }else if(itNameMap->second == maxAbund) { //tie
793                             reps.push_back(names[i]);
794                         }
795                     }
796                 }
797             }
798             
799             if (reps.size() == 0) { m->mothurOut("[ERROR]: no rep found, file mismatch?? Quitting.\n"); m->control_pressed = true; }
800             else if (reps.size() == 1) { rep = reps[0]; }
801             else { //tie
802                 int index = m->getRandomIndex(reps.size()-1);
803                 rep = reps[index];
804             }
805         }
806         
807         return rep;
808     }
809         catch(exception& e) {
810                 m->errorOut(e, "GetOTURepCommand", "findRepAbund");
811                 exit(1);
812         }
813 }
814 //**********************************************************************************************************************
815 string GetOTURepCommand::findRep(vector<string> names, string group) {
816         try{
817         //if using abundance 
818         if (method == "abundance") { return (findRepAbund(names, group)); }
819         else { //find rep based on distance
820             
821             // if only 1 sequence in bin or processing the "unique" label, then
822             // the first sequence of the OTU is the representative one
823             if ((names.size() == 1)) {
824                 return names[0];
825             }else{
826                 vector<int> seqIndex; //(names.size());
827                 map<string, string>::iterator itNameFile;
828                 map<string, int>::iterator itNameIndex;
829                 
830                 //fill seqIndex and initialize sums
831                 for (size_t i = 0; i < names.size(); i++) {
832                     if (weighted) {
833                         seqIndex.push_back(nameToIndex[names[i]]);
834                         if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
835                             int numRep = 0;
836                             if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
837                             else { numRep = ct.getGroupCount(names[i]);  }
838                             for (int j = 1; j < numRep; j++) { //don't add yourself again
839                                 seqIndex.push_back(nameToIndex[names[i]]);
840                             }
841                         }
842                     }else {
843                         if (namefile == "") {
844                             itNameIndex = nameToIndex.find(names[i]);
845                             
846                             if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
847                                 if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
848                                 else {  seqIndex.push_back((seqVec.size()-1)); }
849                             }else {
850                                 seqIndex.push_back(itNameIndex->second);
851                             }
852                             
853                         }else {
854                             itNameFile = nameFileMap.find(names[i]);
855                             
856                             if (itNameFile == nameFileMap.end()) {
857                                 m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
858                             }else{
859                                 string name1 = itNameFile->first;
860                                 string name2 = itNameFile->second;
861                                 
862                                 if (name1 == name2) { //then you are unique so add your real dists
863                                     seqIndex.push_back(nameToIndex[names[i]]);
864                                 }else { //add dummy
865                                     if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
866                                     else {  seqIndex.push_back((seqVec.size()-1)); }
867                                 }
868                             }
869                         }
870                     }
871                 }
872                 
873                 vector<float> max_dist(seqIndex.size(), 0.0);
874                 vector<float> total_dist(seqIndex.size(), 0.0);
875                 
876                 // loop through all entries in seqIndex
877                 SeqMap::iterator it;
878                 SeqMap currMap;
879                 for (size_t i=0; i < seqIndex.size(); i++) {
880                     if (m->control_pressed) {  return  "control"; }
881                     
882                     if (!large) {       currMap = seqVec[seqIndex[i]];  }
883                     else                {       currMap = getMap(seqIndex[i]);  }
884                     
885                     for (size_t j=0; j < seqIndex.size(); j++) {
886                         it = currMap.find(seqIndex[j]);
887                         if (it != currMap.end()) {
888                             max_dist[i] = max(max_dist[i], it->second);
889                             max_dist[j] = max(max_dist[j], it->second);
890                             total_dist[i] += it->second;
891                             total_dist[j] += it->second;
892                         }else{ //if you can't find the distance make it the cutoff
893                             max_dist[i] = max(max_dist[i], cutoff);
894                             max_dist[j] = max(max_dist[j], cutoff);
895                             total_dist[i] += cutoff;
896                             total_dist[j] += cutoff;
897                         }
898                     }
899                 }
900                 
901                 // sequence with the smallest maximum distance is the representative
902                 //if tie occurs pick sequence with smallest average distance
903                 float min = 10000;
904                 int minIndex;
905                 for (size_t i=0; i < max_dist.size(); i++) {
906                     if (m->control_pressed) {  return  "control"; }
907                     if (max_dist[i] < min) {
908                         min = max_dist[i];
909                         minIndex = i;
910                     }else if (max_dist[i] == min) {
911                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
912                         float newAverage = total_dist[i] / (float) total_dist.size();
913                         
914                         if (newAverage < currentAverage) {
915                             min = max_dist[i];
916                             minIndex = i;
917                         }
918                     }
919                 }
920                 
921                 return(names[minIndex]);
922             }
923         }
924         }
925         catch(exception& e) {
926                 m->errorOut(e, "GetOTURepCommand", "FindRep");
927                 exit(1);
928         }
929 }
930
931 //**********************************************************************************************************************
932 int GetOTURepCommand::process(ListVector* processList) {
933         try{
934                 string name, sequence;
935                 string nameRep;
936
937                 //create output file
938                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
939                                 
940                 ofstream newNamesOutput;
941                 string outputNamesFile;
942                 map<string, ofstream*> filehandles;
943                 
944         map<string, string> variables; 
945         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
946         
947                 if (Groups.size() == 0) { //you don't want to use groups
948             variables["[tag]"] = processList->getLabel();
949             if (countfile == "") { 
950                 outputNamesFile = getOutputFileName("name", variables);
951                 outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
952             }else {
953                 outputNamesFile = getOutputFileName("count", variables);
954                 outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
955             }
956                         outputNameFiles[outputNamesFile] = processList->getLabel();
957             m->openOutputFile(outputNamesFile, newNamesOutput);
958             newNamesOutput << "noGroup" << endl;
959                 }else{ //you want to use groups
960                         ofstream* temp;
961                         for (int i=0; i<Groups.size(); i++) {
962                                 temp = new ofstream;
963                 variables["[tag]"] = processList->getLabel();
964                 variables["[group]"] = Groups[i];
965                                 filehandles[Groups[i]] = temp;
966                                 outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".";
967                 if (countfile == "") { 
968                     outputNamesFile = getOutputFileName("name", variables);
969                     outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
970                 }else {
971                     outputNamesFile = getOutputFileName("count", variables);
972                     outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
973                 }
974                                 
975                                 m->openOutputFile(outputNamesFile, *(temp));
976                 *(temp) << Groups[i] << endl;
977                                 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
978                         }
979                 }
980                 
981                 //for each bin in the list vector
982                 for (int i = 0; i < processList->size(); i++) {
983                         if (m->control_pressed) { 
984                                 out.close();  
985                                 if (Groups.size() == 0) { //you don't want to use groups
986                                         newNamesOutput.close();
987                                 }else{
988                                         for (int j=0; j<Groups.size(); j++) {
989                                                 (*(filehandles[Groups[j]])).close();
990                                                 delete filehandles[Groups[j]];
991                                         }
992                                 }
993                                 return 0; 
994                         }
995                         
996                         string temp = processList->get(i);
997                         vector<string> namesInBin;
998                         m->splitAtComma(temp, namesInBin);
999                         
1000                         if (Groups.size() == 0) {
1001                                 nameRep = findRep(namesInBin, "");
1002                                 newNamesOutput << i << '\t' << nameRep << '\t';
1003                 
1004                 //put rep at first position in names line
1005                 string outputString = nameRep + ",";
1006                 for (int k=0; k<namesInBin.size()-1; k++) {//output list of names in this otu
1007                     if (namesInBin[k] != nameRep) { outputString += namesInBin[k] + ","; }
1008                 }
1009                 
1010                 //output last name
1011                 if (namesInBin[namesInBin.size()-1] != nameRep) { outputString += namesInBin[namesInBin.size()-1]; }
1012                 
1013                 if (outputString[outputString.length()-1] == ',') { //rip off comma
1014                     outputString = outputString.substr(0, outputString.length()-1);
1015                 }
1016                 newNamesOutput << outputString << endl;
1017                         }else{
1018                                 map<string, vector<string> > NamesInGroup;
1019                                 for (int j=0; j<Groups.size(); j++) { //initialize groups
1020                                         NamesInGroup[Groups[j]].resize(0);
1021                                 }
1022                                 
1023                                 for (int j=0; j<namesInBin.size(); j++) {
1024                     if (groupfile != "") {
1025                         string thisgroup = groupMap->getGroup(namesInBin[j]);
1026                         if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
1027                         
1028                         //add this name to correct group
1029                         if (m->inUsersGroups(thisgroup, Groups)) { NamesInGroup[thisgroup].push_back(namesInBin[j]);  }
1030                     }else {
1031                         vector<string> thisSeqsGroups = ct.getGroups(namesInBin[j]);
1032                         for (int k = 0; k < thisSeqsGroups.size(); k++) {
1033                             if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { NamesInGroup[thisSeqsGroups[k]].push_back(namesInBin[j]);  }
1034                         }
1035                     }
1036                                 }
1037                                 
1038                                 //get rep for each group in otu
1039                                 for (int j=0; j<Groups.size(); j++) {
1040                                         if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
1041                                                 //get rep for each group
1042                                                 nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
1043                                                 
1044                                                 //output group rep and other members of this group
1045                                                 (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
1046                                                 
1047                         //put rep at first position in names line
1048                         string outputString = nameRep + ",";
1049                         for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
1050                             if (NamesInGroup[Groups[j]][k] != nameRep) { outputString +=  NamesInGroup[Groups[j]][k] + ","; }
1051                         }
1052                         
1053                         //output last name
1054                         if (NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] != nameRep) { outputString += NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1]; }
1055                         
1056                         if (outputString[outputString.length()-1] == ',') { //rip off comma
1057                             outputString = outputString.substr(0, outputString.length()-1);
1058                         }
1059                         (*(filehandles[Groups[j]])) << outputString << endl;
1060                                         }
1061                                 }
1062                         }
1063                 }
1064                 
1065                 if (Groups.size() == 0) { //you don't want to use groups
1066                         newNamesOutput.close();
1067                 }else{
1068                         for (int i=0; i<Groups.size(); i++) {
1069                                 (*(filehandles[Groups[i]])).close();
1070                                 delete filehandles[Groups[i]];
1071                         }
1072                 }
1073                 
1074                 return 0;
1075
1076         }
1077         catch(exception& e) {
1078                 m->errorOut(e, "GetOTURepCommand", "process");
1079                 exit(1);
1080         }
1081 }
1082 //**********************************************************************************************************************
1083 int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap*& fasta) {
1084         try{
1085
1086                 //create output file
1087                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
1088         map<string, string> variables; 
1089         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
1090         variables["[tag]"] = label;
1091                 string outputFileName = getOutputFileName("fasta",variables);
1092                 m->openOutputFile(outputFileName, out);
1093                 vector<repStruct> reps;
1094                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
1095                 
1096                 ofstream out2;
1097                 string tempNameFile = filename + ".temp";
1098                 m->openOutputFile(tempNameFile, out2);
1099             
1100                 ifstream in;
1101                 m->openInputFile(filename, in);
1102                 
1103                 int i = 0;
1104         string tempGroup = "";
1105         in >> tempGroup; m->gobble(in);
1106         
1107         CountTable thisCt;
1108         if (countfile != "") {
1109             thisCt.readTable(countfile, true);
1110             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1111         }
1112     
1113         int thistotal = 0;
1114                 while (!in.eof()) {
1115                         string rep, binnames;
1116                         in >> i >> rep >> binnames; m->gobble(in);
1117                         
1118                         vector<string> names;
1119                         m->splitAtComma(binnames, names);
1120                         int binsize = names.size();
1121             
1122             if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1123             else {
1124                 if (tempGroup == "noGroup") {
1125                     for (int j = 0; j < names.size(); j++) {
1126                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1127                     }
1128                     binsize = thisCt.getNumSeqs(rep);
1129                 }else {
1130                     int total = 0; 
1131                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1132                     out2 << rep << '\t' << total << '\t' << total << endl;
1133                     binsize = total;
1134                 }
1135             }
1136                         thistotal += binsize;
1137                         //if you have a groupfile
1138                         string group = "";
1139             map<string, string> groups;
1140             map<string, string>::iterator groupIt;
1141                         if (groupfile != "") {
1142                                 //find the groups that are in this bin
1143                                 for (int i = 0; i < names.size(); i++) {
1144                                         string groupName = groupMap->getGroup(names[i]);
1145                                         if (groupName == "not found") {  
1146                                                 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
1147                                                 groupError = true;
1148                                         } else {
1149                                                 groups[groupName] = groupName;
1150                                         }
1151                                 }
1152                                 
1153                                 //turn the groups into a string
1154                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1155                                         group += groupIt->first + "-";
1156                                 }
1157                                 //rip off last dash
1158                                 group = group.substr(0, group.length()-1);
1159                         }else if (hasGroups) {
1160                 map<string, string> groups;
1161                 for (int i = 0; i < names.size(); i++) {
1162                     vector<string> thisSeqsGroups = ct.getGroups(names[i]);
1163                     for (int j = 0; j < thisSeqsGroups.size(); j++) { groups[thisSeqsGroups[j]] = thisSeqsGroups[j]; }
1164                 }
1165                 //turn the groups into a string
1166                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1167                                         group += groupIt->first + "-";
1168                                 }
1169                                 //rip off last dash
1170                                 group = group.substr(0, group.length()-1);
1171                 //cout << group << endl;
1172             }
1173             else{ group = ""; }
1174
1175                         
1176                         //print out name and sequence for that bin
1177                         string sequence = fasta->getSequence(rep);
1178
1179                         if (sequence != "not found") {
1180                                 if (sorted == "") { //print them out
1181                                         rep = rep + "\t" + toString(i+1);
1182                                         rep = rep + "|" + toString(binsize);
1183                                         if (group != "") {
1184                                                 rep = rep + "|" + group;
1185                                         }
1186                                         out << ">" << rep << endl;
1187                                         out << sequence << endl;
1188                                 }else { //save them
1189                                         repStruct newRep(rep, i+1, binsize, group);
1190                                         reps.push_back(newRep);
1191                                 }
1192                         }else { 
1193                                 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
1194                         }
1195                 }
1196                 
1197                         
1198                 if (sorted != "") { //then sort them and print them
1199                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
1200                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
1201                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
1202                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
1203                         
1204                         //print them
1205                         for (int i = 0; i < reps.size(); i++) {
1206                                 string sequence = fasta->getSequence(reps[i].name);
1207                                 string outputName = reps[i].name + "\t" + toString(reps[i].bin);
1208                                 outputName = outputName + "|" + toString(reps[i].size);
1209                                 if (reps[i].group != "") {
1210                                         outputName = outputName + "|" + reps[i].group;
1211                                 }
1212                                 out << ">" << outputName << endl;
1213                                 out << sequence << endl;
1214                         }
1215                 }
1216                 
1217                 in.close();
1218                 out.close();
1219                 out2.close();
1220                 
1221                 m->mothurRemove(filename);
1222                 rename(tempNameFile.c_str(), filename.c_str());
1223         
1224         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1225                 
1226                 return 0;
1227
1228         }
1229         catch(exception& e) {
1230                 m->errorOut(e, "GetOTURepCommand", "processFastaNames");
1231                 exit(1);
1232         }
1233 }
1234 //**********************************************************************************************************************
1235 int GetOTURepCommand::processNames(string filename, string label) {
1236         try{
1237                 
1238                 //create output file
1239                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
1240                 
1241                 ofstream out2;
1242                 string tempNameFile = filename + ".temp";
1243                 m->openOutputFile(tempNameFile, out2);
1244                 
1245                 ifstream in;
1246                 m->openInputFile(filename, in);
1247                 
1248                 int i = 0;
1249                 string rep, binnames;
1250         
1251         string tempGroup = "";
1252         in >> tempGroup; m->gobble(in);
1253         
1254         CountTable thisCt;
1255         if (countfile != "") {
1256             thisCt.readTable(countfile, true);
1257             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1258         }
1259         
1260                 while (!in.eof()) {
1261                         if (m->control_pressed) { break; }
1262                         in >> i >> rep >> binnames; m->gobble(in);
1263             
1264                         if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1265             else {
1266                 vector<string> names;
1267                 m->splitAtComma(binnames, names);
1268                 if (tempGroup == "noGroup") {
1269                     for (int j = 0; j < names.size(); j++) {
1270                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1271                     }
1272                 }else {
1273                     int total = 0; 
1274                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1275                     out2 << rep << '\t' << total << '\t' << total << endl;
1276                 }
1277             }
1278
1279                 }
1280                 in.close();
1281                 out2.close();
1282                 
1283                 m->mothurRemove(filename);
1284                 rename(tempNameFile.c_str(), filename.c_str());
1285                 
1286         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1287         
1288                 return 0;
1289         }
1290         catch(exception& e) {
1291                 m->errorOut(e, "GetOTURepCommand", "processNames");
1292                 exit(1);
1293         }
1294 }
1295 //**********************************************************************************************************************
1296 SeqMap GetOTURepCommand::getMap(int row) {
1297         try {
1298                 SeqMap rowMap;
1299                 
1300                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
1301                 if (rowPositions[row] != -1){
1302                         //go to row in file
1303                         inRow.seekg(rowPositions[row]);
1304                         
1305                         int rowNum, numDists, colNum;
1306                         float dist;
1307                         
1308                         inRow >> rowNum >> numDists;
1309                         
1310                         for(int i = 0; i < numDists; i++) {
1311                                 inRow >> colNum >> dist;
1312                                 rowMap[colNum] = dist;
1313                                 
1314                         }
1315                 }
1316                 
1317                 return rowMap;
1318         }
1319         catch(exception& e) {
1320                 m->errorOut(e, "GetOTURepCommand", "getMap");
1321                 exit(1);
1322         }
1323 }
1324 //**********************************************************************************************************************
1325