]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
working on pam
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.simpleBin < right.simpleBin);
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38
39 //**********************************************************************************************************************
40 vector<string> GetOTURepCommand::setParameters(){       
41         try {
42                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","name",false,true, true); parameters.push_back(plist);
43                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,false, true); parameters.push_back(pfasta);
44                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false, true); parameters.push_back(pphylip);
45         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","",false,false, true); parameters.push_back(pname);
46         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "ColumnName","count",false,false, true); parameters.push_back(pcount);
47                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false, true); parameters.push_back(pgroup);
48                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName","",false,false, true); parameters.push_back(pcolumn);
49                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
50                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
51                 CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pcutoff);
52                 CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
53                 CommandParameter pweighted("weighted", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pweighted);
54                 CommandParameter psorted("sorted", "Multiple", "none-name-bin-size-group", "none", "", "", "","",false,false); parameters.push_back(psorted);
55         CommandParameter pmethod("method", "Multiple", "distance-abundance", "distance", "", "", "","",false,false); parameters.push_back(pmethod);
56                 CommandParameter plarge("large", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plarge);
57                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
58                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
59                 
60                 vector<string> myArray;
61                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
62                 return myArray;
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "GetOTURepCommand", "setParameters");
66                 exit(1);
67         }
68 }
69 //**********************************************************************************************************************
70 string GetOTURepCommand::getHelpString(){       
71         try {
72                 string helpString = "";
73                 helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, count, large, weighted, cutoff, precision, groups, sorted, method and label.  The list parameter is required, as well as phylip or column and name if you are using method=distance. If method=abundance a name or count file is required.\n";
74                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
75                 helpString += "The phylip or column parameter is required for method=distance, but only one may be used.  If you use a column file the name or count filename is required. \n";
76         helpString += "The method parameter allows you to select the method of selecting the representative sequence. Choices are distance and abundance.  The distance method finds the sequence with the smallest maximum distance to the other sequences. If tie occurs the sequence with smallest average distance is selected.  The abundance method chooses the most abundant sequence in the OTU as the representative.\n";
77                 helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
78                 helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
79                 helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n";
80                 helpString += "The default value for label is all labels in your inputfile.\n";
81                 helpString += "The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n";
82                 helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
83                 helpString += "The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true.  The default value is false.\n";
84                 helpString += "The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n";
85                 helpString += "For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n";
86                 helpString += "For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n";
87                 helpString += "If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n";
88                 helpString += "The group parameter allows you provide a group file.\n";
89                 helpString += "The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n";
90                 helpString += "The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n";
91                 helpString += "If you provide a groupfile, then it also appends the names of the groups present in that bin.\n";
92                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "GetOTURepCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string GetOTURepCommand::getOutputPattern(string type) {
102     try {
103         string pattern = "";
104         
105         if (type == "fasta") {  pattern = "[filename],[tag],rep.fasta-[filename],[tag],[group],rep.fasta"; } 
106         else if (type == "name") {  pattern = "[filename],[tag],rep.names-[filename],[tag],[group],rep.names"; } 
107         else if (type == "count") {  pattern = "[filename],[tag],rep.count_table-[filename],[tag],[group],rep.count_table"; }
108         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
109         
110         return pattern;
111     }
112     catch(exception& e) {
113         m->errorOut(e, "GetOTURepCommand", "getOutputPattern");
114         exit(1);
115     }
116 }
117 //**********************************************************************************************************************
118 GetOTURepCommand::GetOTURepCommand(){   
119         try {
120                 abort = true; calledHelp = true; 
121                 setParameters();
122                 vector<string> tempOutNames;
123                 outputTypes["fasta"] = tempOutNames;
124                 outputTypes["name"] = tempOutNames;
125         outputTypes["count"] = tempOutNames;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
129                 exit(1);
130         }
131 }
132 //**********************************************************************************************************************
133 GetOTURepCommand::GetOTURepCommand(string option)  {
134         try{
135                 abort = false; calledHelp = false;   
136                 allLines = 1;
137                                 
138                 //allow user to run help
139                 if (option == "help") { 
140                         help(); abort = true; calledHelp = true;
141                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;
142                 } else {
143                         vector<string> myArray = setParameters();
144                         
145                         OptionParser parser(option);
146                         map<string, string> parameters = parser.getParameters();
147                         
148                         ValidParameters validParameter;
149                         map<string, string>::iterator it;
150                 
151                         //check to make sure all parameters are valid for command
152                         for (it = parameters.begin(); it != parameters.end(); it++) { 
153                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
154                         }
155                         
156                         //initialize outputTypes
157                         vector<string> tempOutNames;
158                         outputTypes["fasta"] = tempOutNames;
159                         outputTypes["name"] = tempOutNames;
160             outputTypes["count"] = tempOutNames;
161                         
162                         //if the user changes the input directory command factory will send this info to us in the output parameter 
163                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
164                         if (inputDir == "not found"){   inputDir = "";          }
165                         else {
166                                 string path;
167                                 it = parameters.find("list");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
173                                 }
174                                 
175                                 it = parameters.find("fasta");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
181                                 }
182                                 
183                                 it = parameters.find("phylip");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
189                                 }
190                                 
191                                 it = parameters.find("column");
192                                 //user has given a template file
193                                 if(it != parameters.end()){ 
194                                         path = m->hasPath(it->second);
195                                         //if the user has not given a path then, add inputdir. else leave path alone.
196                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
197                                 }
198                                 
199                                 it = parameters.find("name");
200                                 //user has given a template file
201                                 if(it != parameters.end()){ 
202                                         path = m->hasPath(it->second);
203                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
205                                 }
206                                 
207                                 it = parameters.find("group");
208                                 //user has given a template file
209                                 if(it != parameters.end()){ 
210                                         path = m->hasPath(it->second);
211                                         //if the user has not given a path then, add inputdir. else leave path alone.
212                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
213                                 }
214                 
215                 it = parameters.find("count");
216                                 //user has given a template file
217                                 if(it != parameters.end()){ 
218                                         path = m->hasPath(it->second);
219                                         //if the user has not given a path then, add inputdir. else leave path alone.
220                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
221                                 }
222                         }
223
224                         
225                         //if the user changes the output directory command factory will send this info to us in the output parameter 
226                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
227                         
228                         //check for required parameters
229                         fastafile = validParameter.validFile(parameters, "fasta", true);
230                         if (fastafile == "not found") { fastafile = ""; }
231                         else if (fastafile == "not open") { abort = true; }     
232                         else { m->setFastaFile(fastafile); }
233                 
234                         listfile = validParameter.validFile(parameters, "list", true);
235                         if (listfile == "not found") {                  
236                                 listfile = m->getListFile(); 
237                                 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
238                                 else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
239                         }
240                         else if (listfile == "not open") { abort = true; }      
241                         else { m->setListFile(listfile); }
242                         
243                         phylipfile = validParameter.validFile(parameters, "phylip", true);
244                         if (phylipfile == "not found") { phylipfile = "";  }
245                         else if (phylipfile == "not open") { abort = true; }    
246                         else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);   }
247                         
248                         columnfile = validParameter.validFile(parameters, "column", true);
249                         if (columnfile == "not found") { columnfile = ""; }
250                         else if (columnfile == "not open") { abort = true; }    
251                         else { distFile = columnfile; format = "column";  m->setColumnFile(columnfile); }
252                         
253                         namefile = validParameter.validFile(parameters, "name", true);
254                         if (namefile == "not open") { abort = true; }   
255                         else if (namefile == "not found") { namefile = ""; }
256                         else { m->setNameFile(namefile); }
257             
258             hasGroups = false;
259             countfile = validParameter.validFile(parameters, "count", true);
260                         if (countfile == "not found") { countfile =  "";   }
261                         else if (countfile == "not open") { abort = true; countfile =  ""; }    
262                         else {   
263                 m->setCountTableFile(countfile); 
264                 ct.readTable(countfile, true, false);
265                 if (ct.hasGroupInfo()) { hasGroups = true; }
266             }
267             
268             groupfile = validParameter.validFile(parameters, "group", true);
269                         if (groupfile == "not open") { groupfile = ""; abort = true; }
270                         else if (groupfile == "not found") { groupfile = ""; }
271                         else { m->setGroupFile(groupfile); }
272                         
273             method = validParameter.validFile(parameters, "method", false);             if (method == "not found"){     method = "distance";    }
274                         if ((method != "distance") && (method != "abundance")) {
275                                 m->mothurOut(method + " is not a valid option for the method parameter. The only options are: distance and abundance, aborting."); m->mothurOutEndLine(); abort = true;
276                         }
277             
278             if (method == "distance") {
279                 if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
280                     //give priority to column, then phylip
281                     columnfile = m->getColumnFile();
282                     if (columnfile != "") {  distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
283                     else {
284                         phylipfile = m->getPhylipFile();
285                         if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
286                         else {
287                             m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the get.oturep command."); m->mothurOutEndLine();
288                             abort = true;
289                         }
290                     }
291                 }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
292                 
293                 if (columnfile != "") {
294                     if ((namefile == "") && (countfile == "")) {
295                         namefile = m->getNameFile();
296                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
297                         else {
298                             countfile = m->getCountTableFile();
299                             if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
300                             else {
301                                 m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine();
302                                 abort = true; 
303                             }   
304                         }       
305                     }
306                 }
307             }else if (method == "abundance") {
308                 if ((namefile == "") && (countfile == "")) {
309                                         namefile = m->getNameFile();
310                                         if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
311                                         else {
312                                                 countfile = m->getCountTableFile();
313                         if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
314                         else {
315                             m->mothurOut("You need to provide a namefile or countfile if you are going to use the abundance method."); m->mothurOutEndLine();
316                             abort = true;
317                         }
318                                         }
319                                 }
320                 if ((phylipfile != "") || (columnfile != "")) {
321                     m->mothurOut("[WARNING]: A phylip or column file is not needed to use the abundance method, ignoring."); m->mothurOutEndLine();
322                     phylipfile = ""; columnfile = "";
323                 }
324             }
325             
326             if ((namefile != "") && (countfile != "")) {
327                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
328             }
329             
330             if ((groupfile != "") && (countfile != "")) {
331                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
332             }
333
334         
335                         //check for optional parameter and set defaults
336                         // ...at some point should added some additional type checking...
337                         label = validParameter.validFile(parameters, "label", false);                   
338                         if (label == "not found") { label = ""; allLines = 1;  }
339                         else { 
340                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
341                                 else { allLines = 1;  }
342                         }
343                         
344                                                 
345                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
346                         if (sorted == "none") { sorted=""; }
347                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
348                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
349                                 sorted = "";
350                         }
351             
352             
353                         
354                         if ((sorted == "group") && ((groupfile == "")&& !hasGroups)) {
355                                 m->mothurOut("You must provide a groupfile or have a count file with group info to sort by group. I will not sort."); m->mothurOutEndLine();
356                                 sorted = "";
357                         }
358                         
359                         groups = validParameter.validFile(parameters, "groups", false);                 
360                         if (groups == "not found") { groups = ""; }
361                         else { 
362                                 if ((groupfile == "") && (!hasGroups)) {
363                                         m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
364                                         abort = true;
365                                 }else { 
366                                         m->splitAtDash(groups, Groups);
367                                 }
368                         }
369                         m->setGroups(Groups);
370                         
371                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
372                         large = m->isTrue(temp);
373                         
374                         temp = validParameter.validFile(parameters, "weighted", false);         if (temp == "not found") {       temp = "f";    }
375                         weighted = m->isTrue(temp);
376                         
377                         if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; }
378                         
379                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
380                         m->mothurConvert(temp, precision); 
381                         
382                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
383                         m->mothurConvert(temp, cutoff); 
384                         cutoff += (5 / (precision * 10.0));
385                 }
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
389                 exit(1);
390         }
391 }
392
393 //**********************************************************************************************************************
394
395 int GetOTURepCommand::execute(){
396         try {
397         
398                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
399                 int error;
400                 list = NULL;
401                 
402         if (method=="distance") {
403             readDist();
404             if ((!weighted) && (namefile != "")) { readNamesFile(weighted); }
405         }else {
406             //map name -> abundance for use if findRepAbund
407             if (namefile != "") { nameToIndex = m->readNames(namefile); }
408         }
409         
410         if (m->control_pressed) { if (method=="distance") { if (large) {  inRow.close(); m->mothurRemove(distFile);  } }return 0; }
411         
412         if (groupfile != "") {
413             //read in group map info.
414             groupMap = new GroupMap(groupfile);
415             int error = groupMap->readMap();
416             if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
417             
418             if (Groups.size() != 0) {
419                 SharedUtil util;
420                 vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
421                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
422                 groupMap->setNamesOfGroups(gNamesOfGroups);
423             }
424         }else if (hasGroups) {
425             if (Groups.size() != 0) {
426                 SharedUtil util;
427                 vector<string> gNamesOfGroups = ct.getNamesOfGroups();
428                 util.setGroups(Groups, gNamesOfGroups, "getoturep");
429             }
430         }
431         
432         //done with listvector from matrix
433         if (list != NULL) { delete list; }
434         
435         InputData input(listfile, "list");
436         list = input.getListVector();
437         string lastLabel = list->getLabel();
438         
439         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
440         set<string> processedLabels;
441         set<string> userLabels = labels;
442         
443         if (m->control_pressed) { if (method=="distance") {  if (large) {  inRow.close(); m->mothurRemove(distFile);  } }  delete list; return 0; }
444         
445         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
446             
447             if (allLines == 1 || labels.count(list->getLabel()) == 1){
448                 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
449                 error = process(list);
450                 if (error == 1) { return 0; } //there is an error in hte input files, abort command
451                 
452                 if (m->control_pressed) {
453                     if (method=="distance") { if (large) {  inRow.close(); m->mothurRemove(distFile);  } }
454                     for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } outputTypes.clear();
455                     delete list; return 0;
456                 }
457                 
458                 processedLabels.insert(list->getLabel());
459                 userLabels.erase(list->getLabel());
460             }
461             
462             if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
463                 string saveLabel = list->getLabel();
464                 
465                 delete list;
466                 list = input.getListVector(lastLabel);
467                 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
468                 error = process(list);
469                 if (error == 1) { return 0; } //there is an error in hte input files, abort command
470                 
471                 if (m->control_pressed) {
472                     if (method=="distance") { if (large) {  inRow.close(); m->mothurRemove(distFile);  } }
473                     for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } outputTypes.clear();
474                     delete list; return 0;
475                 }
476                 
477                 processedLabels.insert(list->getLabel());
478                 userLabels.erase(list->getLabel());
479                 
480                 //restore real lastlabel to save below
481                 list->setLabel(saveLabel);
482             }
483             
484             lastLabel = list->getLabel();
485             
486             delete list;
487             list = input.getListVector();
488         }
489         
490         //output error messages about any remaining user labels
491         bool needToRun = false;
492         for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
493             m->mothurOut("Your file does not include the label " + (*it));
494             if (processedLabels.count(lastLabel) != 1) {
495                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
496                 needToRun = true;
497             }else {
498                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
499             }
500         }
501         
502         //run last label if you need to
503         if (needToRun == true)  {
504             if (list != NULL) { delete list;    }
505             list = input.getListVector(lastLabel);
506             m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
507             error = process(list);
508             delete list;
509             if (error == 1) { return 0; } //there is an error in hte input files, abort command
510             
511             if (m->control_pressed) {
512                 if (method=="distance") { if (large) {  inRow.close(); m->mothurRemove(distFile);  } }
513                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
514                 delete list; return 0;
515             }
516         }
517         
518         //close and remove formatted matrix file
519         if (method=="distance") { if (large) { inRow.close(); m->mothurRemove(distFile); } if (!weighted) { nameFileMap.clear(); } }
520          
521         if (fastafile != "") {
522             //read fastafile
523             FastaMap* fasta = new FastaMap();
524             fasta->readFastaFile(fastafile);
525             
526             //if user gave a namesfile then use it
527             if (namefile != "") {       readNamesFile(fasta);   }
528             
529             //output create and output the .rep.fasta files
530             map<string, string>::iterator itNameFile;
531             for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
532                 processFastaNames(itNameFile->first, itNameFile->second, fasta);
533             }
534             delete fasta;
535         }else {
536             //output create and output the .rep.fasta files
537             map<string, string>::iterator itNameFile;
538             for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
539                 processNames(itNameFile->first, itNameFile->second);
540             }
541         }
542         
543         
544         if (groupfile != "") { delete groupMap; }
545                 
546                 if (m->control_pressed) {  return 0; }
547                 
548                 //set fasta file as new current fastafile - use first one??
549                 string current = "";
550                 itTypes = outputTypes.find("fasta");
551                 if (itTypes != outputTypes.end()) {
552                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
553                 }
554                 
555                 itTypes = outputTypes.find("name");
556                 if (itTypes != outputTypes.end()) {
557                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
558                 }
559         
560         itTypes = outputTypes.find("count");
561                 if (itTypes != outputTypes.end()) {
562                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
563                 }
564                 
565                 m->mothurOutEndLine();
566                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
567                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
568                 m->mothurOutEndLine();
569                 
570                 return 0;
571         }
572         catch(exception& e) {
573                 m->errorOut(e, "GetOTURepCommand", "execute");
574                 exit(1);
575         }
576 }
577 //**********************************************************************************************************************
578 int GetOTURepCommand::readDist() {
579         try {
580         
581         if (!large) {
582                         //read distance files
583                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
584                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
585                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
586                         
587                         readMatrix->setCutoff(cutoff);
588             
589                         NameAssignment* nameMap = NULL;
590             if(namefile != ""){ 
591                 nameMap = new NameAssignment(namefile);
592                 nameMap->readMap();
593                 readMatrix->read(nameMap);
594             }else if (countfile != "") {
595                 readMatrix->read(&ct);
596             }else {
597                readMatrix->read(nameMap); 
598             }
599                         
600                         if (m->control_pressed) { delete readMatrix; return 0; }
601             
602                         list = readMatrix->getListVector();
603                         SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
604                         
605                         // Create a data structure to quickly access the distance information.
606                         // It consists of a vector of distance maps, where each map contains
607                         // all distances of a certain sequence. Vector and maps are accessed
608                         // via the index of a sequence in the distance matrix
609                         seqVec = vector<SeqMap>(list->size()); 
610             for (int i = 0; i < matrix->seqVec.size(); i++) {
611                 for (int j = 0; j < matrix->seqVec[i].size(); j++) {
612                     if (m->control_pressed) { delete readMatrix; return 0; }
613                     //already added everyone else in row
614                     if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
615                 }
616                         }
617                         //add dummy map for unweighted calc
618                         SeqMap dummy;
619                         seqVec.push_back(dummy);
620                         
621                         delete matrix;
622                         delete readMatrix;
623                         delete nameMap;
624                         
625                         if (m->control_pressed) { return 0; }
626                 }else {
627                         //process file and set up indexes
628                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
629                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
630                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
631                         
632                         formatMatrix->setCutoff(cutoff);
633             
634                         NameAssignment* nameMap = NULL;
635             if(namefile != ""){ 
636                 nameMap = new NameAssignment(namefile);
637                 nameMap->readMap();
638                 formatMatrix->read(nameMap);
639             }else if (countfile != "") {
640                 formatMatrix->read(&ct);
641             }else {
642                 formatMatrix->read(nameMap); 
643             }
644                         
645                         if (m->control_pressed) { delete formatMatrix;  return 0; }
646             
647                         list = formatMatrix->getListVector();
648                         distFile = formatMatrix->getFormattedFileName();
649                         
650                         //positions in file where the distances for each sequence begin
651                         //rowPositions[1] = position in file where distance related to sequence 1 start.
652                         rowPositions = formatMatrix->getRowPositions();
653                         rowPositions.push_back(-1); //dummy row for unweighted calc
654                         
655                         delete formatMatrix;
656                         delete nameMap;
657                         
658                         //openfile for getMap to use
659                         m->openInputFile(distFile, inRow);
660                         
661                         if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
662                 }
663                 
664                 
665                 //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
666                 if (list != NULL) {
667                         vector<string> names;
668                         string binnames;
669                         //map names to rows in sparsematrix
670                         for (int i = 0; i < list->size(); i++) {
671                                 names.clear();
672                                 binnames = list->get(i);
673                                 
674                                 m->splitAtComma(binnames, names);
675                                 
676                                 for (int j = 0; j < names.size(); j++) {
677                                         nameToIndex[names[j]] = i;
678                                 }
679                         }
680                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
681
682         if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  }return 0; }
683         
684         return 0;
685     }
686         catch(exception& e) {
687                 m->errorOut(e, "GetOTURepCommand", "readDist");
688                 exit(1);
689         }
690 }
691 //**********************************************************************************************************************
692 void GetOTURepCommand::readNamesFile(FastaMap*& fasta) {
693         try {
694                 ifstream in;
695                 vector<string> dupNames;
696                 m->openInputFile(namefile, in);
697                 
698                 string name, names, sequence;
699         
700                 while(!in.eof()){
701                         in >> name;                     //read from first column  A
702                         in >> names;            //read from second column  A,B,C,D
703                         
704                         dupNames.clear();
705                         
706                         //parse names into vector
707                         m->splitAtComma(names, dupNames);
708                         
709                         //store names in fasta map
710                         sequence = fasta->getSequence(name);
711                         for (int i = 0; i < dupNames.size(); i++) {
712                                 fasta->push_back(dupNames[i], sequence);
713                         }
714                 
715                         m->gobble(in);
716                 }
717                 in.close();
718
719         }
720         catch(exception& e) {
721                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
722                 exit(1);
723         }
724 }
725 //**********************************************************************************************************************
726 //read names file to find the weighted rep for each bin
727 void GetOTURepCommand::readNamesFile(bool w) {
728         try {
729                 ifstream in;
730                 vector<string> dupNames;
731                 m->openInputFile(namefile, in);
732                 
733                 string name, names, sequence;
734                 
735                 while(!in.eof()){
736                         in >> name;     m->gobble(in);          //read from first column  A
737                         in >> names;                                                    //read from second column  A,B,C,D
738                         
739                         dupNames.clear();
740                         
741                         //parse names into vector
742                         m->splitAtComma(names, dupNames);
743                         
744                         for (int i = 0; i < dupNames.size(); i++) {
745                                 nameFileMap[dupNames[i]] = name;
746                         }
747                         
748                         m->gobble(in);
749                 }
750                 in.close();
751                 
752         }
753         catch(exception& e) {
754                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
755                 exit(1);
756         }
757 }
758 //**********************************************************************************************************************
759 string GetOTURepCommand::findRepAbund(vector<string> names, string group) {
760         try{
761         vector<string> reps;
762         string rep = "notFound";
763     
764         if (m->debug) { m->mothurOut("[DEBUG]: group=" + group + " names.size() = " + toString(names.size()) + " " + names[0] + "\n"); }
765         
766         if ((names.size() == 1)) {
767             return names[0];
768         }else{
769             //fill seqIndex and initialize sums
770             int maxAbund = 0;
771             for (int i = 0; i < names.size(); i++) {
772                 
773                 if (m->control_pressed) { return "control"; }
774                 
775                 if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
776                     int numRep = 0;
777                     if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
778                     else { numRep = ct.getNumSeqs(names[i]);  }
779                     if (numRep > maxAbund) {
780                         reps.clear();
781                         reps.push_back(names[i]);
782                         maxAbund = numRep;
783                     }else if(numRep == maxAbund) { //tie
784                         reps.push_back(names[i]);
785                     }
786                 }else { //name file used, we assume list file contains all sequences
787                     map<string, int>::iterator itNameMap = nameToIndex.find(names[i]);
788                     if (itNameMap == nameToIndex.end()) {} //assume that this sequence is not a unique
789                     else {
790                         if (itNameMap->second > maxAbund) {
791                             reps.clear();
792                             reps.push_back(names[i]);
793                             maxAbund = itNameMap->second;
794                         }else if(itNameMap->second == maxAbund) { //tie
795                             reps.push_back(names[i]);
796                         }
797                     }
798                 }
799             }
800             
801             if (reps.size() == 0) { m->mothurOut("[ERROR]: no rep found, file mismatch?? Quitting.\n"); m->control_pressed = true; }
802             else if (reps.size() == 1) { rep = reps[0]; }
803             else { //tie
804                 int index = m->getRandomIndex(reps.size()-1);
805                 rep = reps[index];
806             }
807         }
808         
809         return rep;
810     }
811         catch(exception& e) {
812                 m->errorOut(e, "GetOTURepCommand", "findRepAbund");
813                 exit(1);
814         }
815 }
816 //**********************************************************************************************************************
817 string GetOTURepCommand::findRep(vector<string> names, string group) {
818         try{
819         //if using abundance 
820         if (method == "abundance") { return (findRepAbund(names, group)); }
821         else { //find rep based on distance
822             
823             // if only 1 sequence in bin or processing the "unique" label, then
824             // the first sequence of the OTU is the representative one
825             if ((names.size() == 1)) {
826                 return names[0];
827             }else{
828                 vector<int> seqIndex; //(names.size());
829                 map<string, string>::iterator itNameFile;
830                 map<string, int>::iterator itNameIndex;
831                 
832                 //fill seqIndex and initialize sums
833                 for (size_t i = 0; i < names.size(); i++) {
834                     if (weighted) {
835                         seqIndex.push_back(nameToIndex[names[i]]);
836                         if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
837                             int numRep = 0;
838                             if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
839                             else { numRep = ct.getNumSeqs(names[i]);  }
840                             for (int j = 1; j < numRep; j++) { //don't add yourself again
841                                 seqIndex.push_back(nameToIndex[names[i]]);
842                             }
843                         }
844                     }else {
845                         if (namefile == "") {
846                             itNameIndex = nameToIndex.find(names[i]);
847                             
848                             if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
849                                 if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
850                                 else {  seqIndex.push_back((seqVec.size()-1)); }
851                             }else {
852                                 seqIndex.push_back(itNameIndex->second);
853                             }
854                             
855                         }else {
856                             itNameFile = nameFileMap.find(names[i]);
857                             
858                             if (itNameFile == nameFileMap.end()) {
859                                 m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
860                             }else{
861                                 string name1 = itNameFile->first;
862                                 string name2 = itNameFile->second;
863                                 
864                                 if (name1 == name2) { //then you are unique so add your real dists
865                                     seqIndex.push_back(nameToIndex[names[i]]);
866                                 }else { //add dummy
867                                     if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
868                                     else {  seqIndex.push_back((seqVec.size()-1)); }
869                                 }
870                             }
871                         }
872                     }
873                 }
874                 
875                 vector<float> max_dist(seqIndex.size(), 0.0);
876                 vector<float> total_dist(seqIndex.size(), 0.0);
877                 
878                 // loop through all entries in seqIndex
879                 SeqMap::iterator it;
880                 SeqMap currMap;
881                 for (size_t i=0; i < seqIndex.size(); i++) {
882                     if (m->control_pressed) {  return  "control"; }
883                     
884                     if (!large) {       currMap = seqVec[seqIndex[i]];  }
885                     else                {       currMap = getMap(seqIndex[i]);  }
886                     
887                     for (size_t j=0; j < seqIndex.size(); j++) {
888                         it = currMap.find(seqIndex[j]);
889                         if (it != currMap.end()) {
890                             max_dist[i] = max(max_dist[i], it->second);
891                             max_dist[j] = max(max_dist[j], it->second);
892                             total_dist[i] += it->second;
893                             total_dist[j] += it->second;
894                         }else{ //if you can't find the distance make it the cutoff
895                             max_dist[i] = max(max_dist[i], cutoff);
896                             max_dist[j] = max(max_dist[j], cutoff);
897                             total_dist[i] += cutoff;
898                             total_dist[j] += cutoff;
899                         }
900                     }
901                 }
902                 
903                 // sequence with the smallest maximum distance is the representative
904                 //if tie occurs pick sequence with smallest average distance
905                 float min = 10000;
906                 int minIndex;
907                 for (size_t i=0; i < max_dist.size(); i++) {
908                     if (m->control_pressed) {  return  "control"; }
909                     if (max_dist[i] < min) {
910                         min = max_dist[i];
911                         minIndex = i;
912                     }else if (max_dist[i] == min) {
913                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
914                         float newAverage = total_dist[i] / (float) total_dist.size();
915                         
916                         if (newAverage < currentAverage) {
917                             min = max_dist[i];
918                             minIndex = i;
919                         }
920                     }
921                 }
922                 
923                 return(names[minIndex]);
924             }
925         }
926         }
927         catch(exception& e) {
928                 m->errorOut(e, "GetOTURepCommand", "FindRep");
929                 exit(1);
930         }
931 }
932
933 //**********************************************************************************************************************
934 int GetOTURepCommand::process(ListVector* processList) {
935         try{
936                 string name, sequence;
937                 string nameRep;
938
939                 //create output file
940                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
941                                 
942                 ofstream newNamesOutput;
943                 string outputNamesFile;
944                 map<string, ofstream*> filehandles;
945                 
946         map<string, string> variables; 
947         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
948         
949                 if (Groups.size() == 0) { //you don't want to use groups
950             variables["[tag]"] = processList->getLabel();
951             if (countfile == "") { 
952                 outputNamesFile = getOutputFileName("name", variables);
953                 outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
954             }else {
955                 outputNamesFile = getOutputFileName("count", variables);
956                 outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
957             }
958                         outputNameFiles[outputNamesFile] = processList->getLabel();
959             m->openOutputFile(outputNamesFile, newNamesOutput);
960             newNamesOutput << "noGroup" << endl;
961                 }else{ //you want to use groups
962                         ofstream* temp;
963                         for (int i=0; i<Groups.size(); i++) {
964                                 temp = new ofstream;
965                 variables["[tag]"] = processList->getLabel();
966                 variables["[group]"] = Groups[i];
967                                 filehandles[Groups[i]] = temp;
968                                 outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".";
969                 if (countfile == "") { 
970                     outputNamesFile = getOutputFileName("name", variables);
971                     outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
972                 }else {
973                     outputNamesFile = getOutputFileName("count", variables);
974                     outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
975                 }
976                                 
977                                 m->openOutputFile(outputNamesFile, *(temp));
978                 *(temp) << Groups[i] << endl;
979                                 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
980                         }
981                 }
982                 
983                 //for each bin in the list vector
984         vector<string> binLabels = processList->getLabels();
985                 for (int i = 0; i < processList->size(); i++) {
986                         if (m->control_pressed) { 
987                                 out.close();  
988                                 if (Groups.size() == 0) { //you don't want to use groups
989                                         newNamesOutput.close();
990                                 }else{
991                                         for (int j=0; j<Groups.size(); j++) {
992                                                 (*(filehandles[Groups[j]])).close();
993                                                 delete filehandles[Groups[j]];
994                                         }
995                                 }
996                                 return 0; 
997                         }
998                         
999                         string temp = processList->get(i);
1000                         vector<string> namesInBin;
1001                         m->splitAtComma(temp, namesInBin);
1002                         
1003                         if (Groups.size() == 0) {
1004                                 nameRep = findRep(namesInBin, "");
1005                                 newNamesOutput << binLabels[i] << '\t' << nameRep << '\t';
1006                 
1007                 //put rep at first position in names line
1008                 string outputString = nameRep + ",";
1009                 for (int k=0; k<namesInBin.size()-1; k++) {//output list of names in this otu
1010                     if (namesInBin[k] != nameRep) { outputString += namesInBin[k] + ","; }
1011                 }
1012                 
1013                 //output last name
1014                 if (namesInBin[namesInBin.size()-1] != nameRep) { outputString += namesInBin[namesInBin.size()-1]; }
1015                 
1016                 if (outputString[outputString.length()-1] == ',') { //rip off comma
1017                     outputString = outputString.substr(0, outputString.length()-1);
1018                 }
1019                 newNamesOutput << outputString << endl;
1020                         }else{
1021                                 map<string, vector<string> > NamesInGroup;
1022                                 for (int j=0; j<Groups.size(); j++) { //initialize groups
1023                                         NamesInGroup[Groups[j]].resize(0);
1024                                 }
1025                                 
1026                                 for (int j=0; j<namesInBin.size(); j++) {
1027                     if (groupfile != "") {
1028                         string thisgroup = groupMap->getGroup(namesInBin[j]);
1029                         if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
1030                         
1031                         //add this name to correct group
1032                         if (m->inUsersGroups(thisgroup, Groups)) { NamesInGroup[thisgroup].push_back(namesInBin[j]);  }
1033                     }else {
1034                         vector<string> thisSeqsGroups = ct.getGroups(namesInBin[j]);
1035                         for (int k = 0; k < thisSeqsGroups.size(); k++) {
1036                             if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { NamesInGroup[thisSeqsGroups[k]].push_back(namesInBin[j]);  }
1037                         }
1038                     }
1039                                 }
1040                                 
1041                                 //get rep for each group in otu
1042                                 for (int j=0; j<Groups.size(); j++) {
1043                                         if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
1044                                                 //get rep for each group
1045                                                 nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
1046                                                 
1047                                                 //output group rep and other members of this group
1048                                                 (*(filehandles[Groups[j]])) << binLabels[i] << '\t' << nameRep << '\t';
1049                                                 
1050                         //put rep at first position in names line
1051                         string outputString = nameRep + ",";
1052                         for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
1053                             if (NamesInGroup[Groups[j]][k] != nameRep) { outputString +=  NamesInGroup[Groups[j]][k] + ","; }
1054                         }
1055                         
1056                         //output last name
1057                         if (NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] != nameRep) { outputString += NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1]; }
1058                         
1059                         if (outputString[outputString.length()-1] == ',') { //rip off comma
1060                             outputString = outputString.substr(0, outputString.length()-1);
1061                         }
1062                         (*(filehandles[Groups[j]])) << outputString << endl;
1063                                         }
1064                                 }
1065                         }
1066                 }
1067                 
1068                 if (Groups.size() == 0) { //you don't want to use groups
1069                         newNamesOutput.close();
1070                 }else{
1071                         for (int i=0; i<Groups.size(); i++) {
1072                                 (*(filehandles[Groups[i]])).close();
1073                                 delete filehandles[Groups[i]];
1074                         }
1075                 }
1076                 
1077                 return 0;
1078
1079         }
1080         catch(exception& e) {
1081                 m->errorOut(e, "GetOTURepCommand", "process");
1082                 exit(1);
1083         }
1084 }
1085 //**********************************************************************************************************************
1086 int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap*& fasta) {
1087         try{
1088
1089                 //create output file
1090                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
1091         map<string, string> variables; 
1092         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
1093         variables["[tag]"] = label;
1094                 string outputFileName = getOutputFileName("fasta",variables);
1095                 m->openOutputFile(outputFileName, out);
1096                 vector<repStruct> reps;
1097                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
1098                 
1099                 ofstream out2;
1100                 string tempNameFile = filename + ".temp";
1101                 m->openOutputFile(tempNameFile, out2);
1102             
1103                 ifstream in;
1104                 m->openInputFile(filename, in);
1105                 
1106         string tempGroup = "";
1107         in >> tempGroup; m->gobble(in);
1108         
1109         CountTable thisCt;
1110         if (countfile != "") {
1111             thisCt.readTable(countfile, true, false);
1112             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1113         }
1114     
1115         int thistotal = 0;
1116                 while (!in.eof()) {
1117                         string rep, binnames, binLabel;
1118                         in >> binLabel >> rep >> binnames; m->gobble(in);
1119                         
1120                         vector<string> names;
1121                         m->splitAtComma(binnames, names);
1122                         int binsize = names.size();
1123             
1124             if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1125             else {
1126                 if (tempGroup == "noGroup") {
1127                     for (int j = 0; j < names.size(); j++) {
1128                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1129                     }
1130                     binsize = thisCt.getNumSeqs(rep);
1131                 }else {
1132                     int total = 0; 
1133                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1134                     out2 << rep << '\t' << total << '\t' << total << endl;
1135                     binsize = total;
1136                 }
1137             }
1138                         thistotal += binsize;
1139                         //if you have a groupfile
1140                         string group = "";
1141             map<string, string> groups;
1142             map<string, string>::iterator groupIt;
1143                         if (groupfile != "") {
1144                                 //find the groups that are in this bin
1145                                 for (int i = 0; i < names.size(); i++) {
1146                                         string groupName = groupMap->getGroup(names[i]);
1147                                         if (groupName == "not found") {  
1148                                                 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
1149                                                 groupError = true;
1150                                         } else {
1151                                                 groups[groupName] = groupName;
1152                                         }
1153                                 }
1154                                 
1155                                 //turn the groups into a string
1156                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1157                                         group += groupIt->first + "-";
1158                                 }
1159                                 //rip off last dash
1160                                 group = group.substr(0, group.length()-1);
1161                         }else if (hasGroups) {
1162                 map<string, string> groups;
1163                 for (int i = 0; i < names.size(); i++) {
1164                     vector<string> thisSeqsGroups = ct.getGroups(names[i]);
1165                     for (int j = 0; j < thisSeqsGroups.size(); j++) { groups[thisSeqsGroups[j]] = thisSeqsGroups[j]; }
1166                 }
1167                 //turn the groups into a string
1168                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
1169                                         group += groupIt->first + "-";
1170                                 }
1171                                 //rip off last dash
1172                                 group = group.substr(0, group.length()-1);
1173                 //cout << group << endl;
1174             }
1175             else{ group = ""; }
1176
1177                         
1178                         //print out name and sequence for that bin
1179                         string sequence = fasta->getSequence(rep);
1180
1181                         if (sequence != "not found") {
1182                                 if (sorted == "") { //print them out
1183                                         rep = rep + "\t" + binLabel;
1184                                         rep = rep + "|" + toString(binsize);
1185                                         if (group != "") {
1186                                                 rep = rep + "|" + group;
1187                                         }
1188                                         out << ">" << rep << endl;
1189                                         out << sequence << endl;
1190                                 }else { //save them
1191                     int simpleLabel;
1192                     m->mothurConvert(m->getSimpleLabel(binLabel), simpleLabel);
1193                                         repStruct newRep(rep, binLabel, simpleLabel, binsize, group);
1194                                         reps.push_back(newRep);
1195                                 }
1196                         }else { 
1197                                 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
1198                         }
1199                 }
1200                 
1201                         
1202                 if (sorted != "") { //then sort them and print them
1203                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
1204                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
1205                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
1206                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
1207                         
1208                         //print them
1209                         for (int i = 0; i < reps.size(); i++) {
1210                                 string sequence = fasta->getSequence(reps[i].name);
1211                                 string outputName = reps[i].name + "\t" + reps[i].bin;
1212                                 outputName = outputName + "|" + toString(reps[i].size);
1213                                 if (reps[i].group != "") {
1214                                         outputName = outputName + "|" + reps[i].group;
1215                                 }
1216                                 out << ">" << outputName << endl;
1217                                 out << sequence << endl;
1218                         }
1219                 }
1220                 
1221                 in.close();
1222                 out.close();
1223                 out2.close();
1224                 
1225                 m->mothurRemove(filename);
1226                 rename(tempNameFile.c_str(), filename.c_str());
1227         
1228         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1229                 
1230                 return 0;
1231
1232         }
1233         catch(exception& e) {
1234                 m->errorOut(e, "GetOTURepCommand", "processFastaNames");
1235                 exit(1);
1236         }
1237 }
1238 //**********************************************************************************************************************
1239 int GetOTURepCommand::processNames(string filename, string label) {
1240         try{
1241                 
1242                 //create output file
1243                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
1244                 
1245                 ofstream out2;
1246                 string tempNameFile = filename + ".temp";
1247                 m->openOutputFile(tempNameFile, out2);
1248                 
1249                 ifstream in;
1250                 m->openInputFile(filename, in);
1251                 
1252                 string rep, binnames;
1253         
1254         string tempGroup = "";
1255         in >> tempGroup; m->gobble(in);
1256         
1257         CountTable thisCt;
1258         if (countfile != "") {
1259             thisCt.readTable(countfile, true, false);
1260             if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
1261         }
1262         
1263                 while (!in.eof()) {
1264                         if (m->control_pressed) { break; }
1265             string binLabel;
1266                         in >> binLabel >> rep >> binnames; m->gobble(in);
1267             
1268                         if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
1269             else {
1270                 vector<string> names;
1271                 m->splitAtComma(binnames, names);
1272                 if (tempGroup == "noGroup") {
1273                     for (int j = 0; j < names.size(); j++) {
1274                         if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
1275                     }
1276                 }else {
1277                     int total = 0; 
1278                     for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
1279                     out2 << rep << '\t' << total << '\t' << total << endl;
1280                 }
1281             }
1282
1283                 }
1284                 in.close();
1285                 out2.close();
1286                 
1287                 m->mothurRemove(filename);
1288                 rename(tempNameFile.c_str(), filename.c_str());
1289                 
1290         if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
1291         
1292                 return 0;
1293         }
1294         catch(exception& e) {
1295                 m->errorOut(e, "GetOTURepCommand", "processNames");
1296                 exit(1);
1297         }
1298 }
1299 //**********************************************************************************************************************
1300 SeqMap GetOTURepCommand::getMap(int row) {
1301         try {
1302                 SeqMap rowMap;
1303                 
1304                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
1305                 if (rowPositions[row] != -1){
1306                         //go to row in file
1307                         inRow.seekg(rowPositions[row]);
1308                         
1309                         int rowNum, numDists, colNum;
1310                         float dist;
1311                         
1312                         inRow >> rowNum >> numDists;
1313                         
1314                         for(int i = 0; i < numDists; i++) {
1315                                 inRow >> colNum >> dist;
1316                                 rowMap[colNum] = dist;
1317                                 
1318                         }
1319                 }
1320                 
1321                 return rowMap;
1322         }
1323         catch(exception& e) {
1324                 m->errorOut(e, "GetOTURepCommand", "getMap");
1325                 exit(1);
1326         }
1327 }
1328 //**********************************************************************************************************************
1329