]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
latest version
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38 //**********************************************************************************************************************
39 GetOTURepCommand::GetOTURepCommand(string option)  {
40         try{
41                 globaldata = GlobalData::getInstance();
42                 abort = false;
43                 allLines = 1;
44                 labels.clear();
45                                 
46                 //allow user to run help
47                 if (option == "help") { 
48                         help(); abort = true;
49                 } else {
50                         //valid paramters for this command
51                         string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","outputdir","inputdir"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                         map<string, string>::iterator it;
59                 
60                         //check to make sure all parameters are valid for command
61                         for (it = parameters.begin(); it != parameters.end(); it++) { 
62                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
63                         }
64                         
65                         //if the user changes the input directory command factory will send this info to us in the output parameter 
66                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
67                         if (inputDir == "not found"){   inputDir = "";          }
68                         else {
69                                 string path;
70                                 it = parameters.find("list");
71                                 //user has given a template file
72                                 if(it != parameters.end()){ 
73                                         path = hasPath(it->second);
74                                         //if the user has not given a path then, add inputdir. else leave path alone.
75                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
76                                 }
77                                 
78                                 it = parameters.find("fasta");
79                                 //user has given a template file
80                                 if(it != parameters.end()){ 
81                                         path = hasPath(it->second);
82                                         //if the user has not given a path then, add inputdir. else leave path alone.
83                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
84                                 }
85                                 
86                                 it = parameters.find("phylip");
87                                 //user has given a template file
88                                 if(it != parameters.end()){ 
89                                         path = hasPath(it->second);
90                                         //if the user has not given a path then, add inputdir. else leave path alone.
91                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
92                                 }
93                                 
94                                 it = parameters.find("column");
95                                 //user has given a template file
96                                 if(it != parameters.end()){ 
97                                         path = hasPath(it->second);
98                                         //if the user has not given a path then, add inputdir. else leave path alone.
99                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
100                                 }
101                                 
102                                 it = parameters.find("name");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("group");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
116                                 }
117                         }
118
119                         
120                         //if the user changes the output directory command factory will send this info to us in the output parameter 
121                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
122                         
123                         //check for required parameters
124                         fastafile = validParameter.validFile(parameters, "fasta", true);
125                         if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
126                         else if (fastafile == "not open") { abort = true; }     
127                 
128                         listfile = validParameter.validFile(parameters, "list", true);
129                         if (listfile == "not found") { m->mothurOut("list is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
130                         else if (listfile == "not open") { abort = true; }      
131                         
132                         phylipfile = validParameter.validFile(parameters, "phylip", true);
133                         if (phylipfile == "not found") { phylipfile = "";  }
134                         else if (phylipfile == "not open") { abort = true; }    
135                         else { distFile = phylipfile; format = "phylip";   }
136                         
137                         columnfile = validParameter.validFile(parameters, "column", true);
138                         if (columnfile == "not found") { columnfile = ""; }
139                         else if (columnfile == "not open") { abort = true; }    
140                         else { distFile = columnfile; format = "column";   }
141                         
142                         namefile = validParameter.validFile(parameters, "name", true);
143                         if (namefile == "not open") { abort = true; }   
144                         else if (namefile == "not found") { namefile = ""; }
145                         
146                         if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a get.oturep command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
147                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
148                 
149                         if (columnfile != "") {  if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }  }
150
151                         //check for optional parameter and set defaults
152                         // ...at some point should added some additional type checking...
153                         label = validParameter.validFile(parameters, "label", false);                   
154                         if (label == "not found") { label = ""; allLines = 1;  }
155                         else { 
156                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
157                                 else { allLines = 1;  }
158                         }
159                         
160                         groupfile = validParameter.validFile(parameters, "group", true);
161                         if (groupfile == "not open") { groupfile = ""; abort = true; }
162                         else if (groupfile == "not found") { groupfile = ""; }
163                                                 
164                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
165                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
166                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
167                                 sorted = "";
168                         }
169                         
170                         if ((sorted == "group") && (groupfile == "")) {
171                                 m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
172                                 sorted = "";
173                         }
174                         
175                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
176                         large = isTrue(temp);
177                         
178                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
179                         convert(temp, precision); 
180                         
181                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
182                         convert(temp, cutoff); 
183                         cutoff += (5 / (precision * 10.0));
184                 }
185         }
186         catch(exception& e) {
187                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
188                 exit(1);
189         }
190 }
191
192 //**********************************************************************************************************************
193
194 void GetOTURepCommand::help(){
195         try {
196                 m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
197                 m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
198                 m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
199                 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
200                 m->mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
201                 m->mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
202                 m->mothurOut("The default value for label is all labels in your inputfile.\n");
203                 m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
204                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
205                 m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
206                 m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
207                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
208         }
209         catch(exception& e) {
210                 m->errorOut(e, "GetOTURepCommand", "help");
211                 exit(1);
212         }
213 }
214
215 //**********************************************************************************************************************
216
217 GetOTURepCommand::~GetOTURepCommand(){}
218
219 //**********************************************************************************************************************
220
221 int GetOTURepCommand::execute(){
222         try {
223         
224                 if (abort == true) { return 0; }
225                 int error;
226                 
227                 if (!large) {
228                         //read distance files
229                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
230                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
231                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
232                         
233                         readMatrix->setCutoff(cutoff);
234         
235                         if(namefile != ""){     
236                                 nameMap = new NameAssignment(namefile);
237                                 nameMap->readMap();
238                         }else{  nameMap = NULL;         }
239                         
240                         readMatrix->read(nameMap);
241                         
242                         if (m->control_pressed) { delete readMatrix; return 0; }
243
244                         //get matrix
245                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
246                         globaldata->gListVector = readMatrix->getListVector();
247
248                         SparseMatrix* matrix = readMatrix->getMatrix();
249                         
250                         // Create a data structure to quickly access the distance information.
251                         // It consists of a vector of distance maps, where each map contains
252                         // all distances of a certain sequence. Vector and maps are accessed
253                         // via the index of a sequence in the distance matrix
254                         seqVec = vector<SeqMap>(globaldata->gListVector->size()); 
255                         for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
256                                 if (m->control_pressed) { delete readMatrix; return 0; }
257                                 seqVec[currentCell->row][currentCell->column] = currentCell->dist;
258                         }
259                         
260                         delete matrix;
261                         delete readMatrix;
262                         delete nameMap;
263                         
264                         if (m->control_pressed) { return 0; }
265                 }else {
266                         //process file and set up indexes
267                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
268                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
269                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
270                         
271                         formatMatrix->setCutoff(cutoff);
272         
273                         if(namefile != ""){     
274                                 nameMap = new NameAssignment(namefile);
275                                 nameMap->readMap();
276                         }else{  nameMap = NULL;         }
277                         
278                         formatMatrix->read(nameMap);
279                         
280                         if (m->control_pressed) { delete formatMatrix;  return 0; }
281
282                         //get matrix
283                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
284                         globaldata->gListVector = formatMatrix->getListVector();
285                         
286                         distFile = formatMatrix->getFormattedFileName();
287                         
288                         //positions in file where the distances for each sequence begin
289                         //rowPositions[1] = position in file where distance related to sequence 1 start.
290                         rowPositions = formatMatrix->getRowPositions();
291                         
292                         delete formatMatrix;
293                         delete nameMap;
294                         
295                         //openfile for getMap to use
296                         openInputFile(distFile, inRow);
297                         
298                         if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; }
299                 }
300                 
301                 
302                 //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
303                 if (globaldata->gListVector != NULL) {
304                         vector<string> names;
305                         string binnames;
306                         //map names to rows in sparsematrix
307                         for (int i = 0; i < globaldata->gListVector->size(); i++) {
308                                 names.clear();
309                                 binnames = globaldata->gListVector->get(i);
310                                 
311                                 splitAtComma(binnames, names);
312                                 
313                                 for (int j = 0; j < names.size(); j++) {
314                                         nameToIndex[names[j]] = i;
315                                 }
316                         }
317                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
318                 
319                                 
320                 if (m->control_pressed) { 
321                         if (large) {  inRow.close(); remove(distFile.c_str());  }
322                         return 0; 
323                 }
324                                 
325                 //set format to list so input can get listvector
326                 globaldata->setFormat("list");
327         
328                 //read list file
329                 read = new ReadOTUFile(listfile);
330                 read->read(&*globaldata); 
331                 
332                 input = globaldata->ginput;
333                 list = globaldata->gListVector;
334                 string lastLabel = list->getLabel();
335
336                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
337                 set<string> processedLabels;
338                 set<string> userLabels = labels;
339                 
340                 if (m->control_pressed) { 
341                         if (large) {  inRow.close(); remove(distFile.c_str());  }
342                         delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
343                 }
344         
345                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
346                         
347                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
348                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
349                                         error = process(list);
350                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
351                                         
352                                         if (m->control_pressed) { 
353                                                 if (large) {  inRow.close(); remove(distFile.c_str());  }
354                                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
355                                                 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
356                                         }
357                                         
358                                         processedLabels.insert(list->getLabel());
359                                         userLabels.erase(list->getLabel());
360                         }
361                         
362                         if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
363                                         string saveLabel = list->getLabel();
364                                         
365                                         delete list;
366                                         list = input->getListVector(lastLabel);
367                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
368                                         error = process(list);
369                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
370                                         
371                                         if (m->control_pressed) { 
372                                                 if (large) {  inRow.close(); remove(distFile.c_str());  }
373                                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
374                                                 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
375                                         }
376                                         
377                                         processedLabels.insert(list->getLabel());
378                                         userLabels.erase(list->getLabel());
379                                         
380                                         //restore real lastlabel to save below
381                                         list->setLabel(saveLabel);
382                         }
383                         
384                         lastLabel = list->getLabel();
385         
386                         delete list;
387                         list = input->getListVector();
388                 }
389                 
390                 //output error messages about any remaining user labels
391                 bool needToRun = false;
392                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
393                         m->mothurOut("Your file does not include the label " + (*it)); 
394                         if (processedLabels.count(lastLabel) != 1) {
395                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
396                                 needToRun = true;
397                         }else {
398                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
399                         }
400                 }
401                 
402                 //run last label if you need to
403                 if (needToRun == true)  {
404                         if (list != NULL) {     delete list;    }
405                         list = input->getListVector(lastLabel);
406                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
407                         error = process(list);
408                         delete list;
409                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
410                         
411                         if (m->control_pressed) { 
412                                         if (large) {  inRow.close(); remove(distFile.c_str());  }
413                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
414                                         delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
415                         }
416                 }
417                 
418                 //close and remove formatted matrix file
419                 if (large) {
420                         inRow.close();
421                         remove(distFile.c_str());
422                 }
423                 
424                 globaldata->gListVector = NULL;
425                 delete input;  globaldata->ginput = NULL;
426                 delete read;
427                 
428                 if (groupfile != "") {
429                         //read in group map info.
430                         groupMap = new GroupMap(groupfile);
431                         int error = groupMap->readMap();
432                         if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
433                 }
434
435                 //read fastafile
436                 fasta = new FastaMap();
437                 fasta->readFastaFile(fastafile);
438                 
439                 //if user gave a namesfile then use it
440                 if (namefile != "") {   readNamesFile();        }
441                 
442                 //output create and output the .rep.fasta files
443                 map<string, string>::iterator itNameFile;
444                 for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
445                         processNames(itNameFile->first, itNameFile->second);
446                 }
447                 
448                 delete fasta;
449                 if (groupfile != "") {
450                         delete groupMap;  globaldata->gGroupmap = NULL;
451                 }
452                 
453                 if (m->control_pressed) {  return 0; }
454                 
455                 m->mothurOutEndLine();
456                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
457                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
458                 m->mothurOutEndLine();
459                 
460                 return 0;
461         }
462         catch(exception& e) {
463                 m->errorOut(e, "GetOTURepCommand", "execute");
464                 exit(1);
465         }
466 }
467
468 //**********************************************************************************************************************
469 void GetOTURepCommand::readNamesFile() {
470         try {
471                 vector<string> dupNames;
472                 openInputFile(namefile, inNames);
473                 
474                 string name, names, sequence;
475         
476                 while(inNames){
477                         inNames >> name;                        //read from first column  A
478                         inNames >> names;               //read from second column  A,B,C,D
479                         
480                         dupNames.clear();
481                         
482                         //parse names into vector
483                         splitAtComma(names, dupNames);
484                         
485                         //store names in fasta map
486                         sequence = fasta->getSequence(name);
487                         for (int i = 0; i < dupNames.size(); i++) {
488                                 fasta->push_back(dupNames[i], sequence);
489                         }
490                 
491                         gobble(inNames);
492                 }
493                 inNames.close();
494
495         }
496         catch(exception& e) {
497                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
498                 exit(1);
499         }
500 }
501 //**********************************************************************************************************************
502 string GetOTURepCommand::findRep(int bin, ListVector* thisList) {
503         try{
504                 vector<string> names;
505                 map<string, string> groups;
506                 map<string, string>::iterator groupIt;
507
508                 //parse names into vector
509                 string binnames = thisList->get(bin);
510                 splitAtComma(binnames, names);
511
512                 // if only 1 sequence in bin or processing the "unique" label, then 
513                 // the first sequence of the OTU is the representative one
514                 if ((names.size() == 1) || (list->getLabel() == "unique")) {
515                         return names[0];
516                 }else{
517                         vector<int> seqIndex(names.size());
518                         vector<float> max_dist(names.size());
519                         vector<float> total_dist(names.size());
520
521                         //fill seqIndex and initialize sums
522                         for (size_t i = 0; i < names.size(); i++) {
523                                 seqIndex[i] = nameToIndex[names[i]];
524                                 max_dist[i] = 0.0;
525                                 total_dist[i] = 0.0;
526                         }
527                         
528                         // loop through all entries in seqIndex
529                         SeqMap::iterator it;
530                         SeqMap currMap;
531                         for (size_t i=0; i < seqIndex.size(); i++) {
532                                 if (m->control_pressed) {  return  "control"; }
533                         
534                                 if (!large) {   currMap = seqVec[seqIndex[i]];  }
535                                 else            {       currMap = getMap(seqIndex[i]);  }
536                                 
537                                 for (size_t j=0; j < seqIndex.size(); j++) {
538                                         it = currMap.find(seqIndex[j]);         
539                                         if (it != currMap.end()) {
540                                                 max_dist[i] = max(max_dist[i], it->second);
541                                                 max_dist[j] = max(max_dist[j], it->second);
542                                                 total_dist[i] += it->second;
543                                                 total_dist[j] += it->second;
544                                         }else{ //if you can't find the distance make it the cutoff
545                                                 max_dist[i] = max(max_dist[i], cutoff);
546                                                 max_dist[j] = max(max_dist[j], cutoff);
547                                                 total_dist[i] += cutoff;
548                                                 total_dist[j] += cutoff;
549                                         }
550                                 }
551                         }
552                         
553                         // sequence with the smallest maximum distance is the representative
554                         //if tie occurs pick sequence with smallest average distance
555                         float min = 10000;
556                         int minIndex;
557                         for (size_t i=0; i < max_dist.size(); i++) {
558                                 if (m->control_pressed) {  return  "control"; }
559                                 if (max_dist[i] < min) {
560                                         min = max_dist[i];
561                                         minIndex = i;
562                                 }else if (max_dist[i] == min) {
563                                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
564                                         float newAverage = total_dist[i] / (float) total_dist.size();
565                                         
566                                         if (newAverage < currentAverage) {
567                                                 min = max_dist[i];
568                                                 minIndex = i;
569                                         }
570                                 }
571                         }
572
573                         return(names[minIndex]);
574                 }
575         }
576         catch(exception& e) {
577                 m->errorOut(e, "GetOTURepCommand", "FindRep");
578                 exit(1);
579         }
580 }
581
582 //**********************************************************************************************************************
583 int GetOTURepCommand::process(ListVector* processList) {
584         try{
585                 string nameRep, name, sequence;
586
587                 //create output file
588                 if (outputDir == "") { outputDir += hasPath(listfile); }
589                                 
590                 ofstream newNamesOutput;
591                 string outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
592                 openOutputFile(outputNamesFile, newNamesOutput);
593                 outputNames.push_back(outputNamesFile);
594                 outputNameFiles[outputNamesFile] = processList->getLabel();
595                 
596                 //for each bin in the list vector
597                 for (int i = 0; i < processList->size(); i++) {
598                         nameRep = findRep(i, processList);
599                         
600                         if (m->control_pressed) { out.close();  newNamesOutput.close(); return 0; }
601                         
602                         //output to new names file
603                         newNamesOutput << nameRep << '\t' << processList->get(i) << endl;
604                 }
605
606                 newNamesOutput.close();
607                 return 0;
608
609         }
610         catch(exception& e) {
611                 m->errorOut(e, "GetOTURepCommand", "process");
612                 exit(1);
613         }
614 }
615 //**********************************************************************************************************************
616 int GetOTURepCommand::processNames(string filename, string label) {
617         try{
618
619                 //create output file
620                 if (outputDir == "") { outputDir += hasPath(listfile); }
621                 string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + label + ".rep.fasta";
622                 openOutputFile(outputFileName, out);
623                 vector<repStruct> reps;
624                 outputNames.push_back(outputFileName);
625                 
626                 ifstream in;
627                 openInputFile(filename, in);
628                 
629                 int i = 0;
630                 while (!in.eof()) {
631                         string rep, binnames;
632                         in >> rep >> binnames; gobble(in);
633                         
634                         vector<string> names;
635                         splitAtComma(binnames, names);
636                         int binsize = names.size();
637                         
638                         //if you have a groupfile
639                         string group = "";
640                         if (groupfile != "") {
641                                 map<string, string> groups;
642                                 map<string, string>::iterator groupIt;
643                                 
644                                 //find the groups that are in this bin
645                                 for (size_t i = 0; i < names.size(); i++) {
646                                         string groupName = groupMap->getGroup(names[i]);
647                                         if (groupName == "not found") {  
648                                                 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
649                                                 groupError = true;
650                                         } else {
651                                                 groups[groupName] = groupName;
652                                         }
653                                 }
654                                 
655                                 //turn the groups into a string
656                                 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
657                                         group += groupIt->first + "-";
658                                 }
659                                 //rip off last dash
660                                 group = group.substr(0, group.length()-1);
661                         }else{ group = ""; }
662
663                         
664                         //print out name and sequence for that bin
665                         string sequence = fasta->getSequence(rep);
666
667                         if (sequence != "not found") {
668                                 if (sorted == "") { //print them out
669                                         rep = rep + "|" + toString(i+1);
670                                         rep = rep + "|" + toString(binsize);
671                                         if (groupfile != "") {
672                                                 rep = rep + "|" + group;
673                                         }
674                                         out << ">" << rep << endl;
675                                         out << sequence << endl;
676                                 }else { //save them
677                                         repStruct newRep(rep, i+1, binsize, group);
678                                         reps.push_back(newRep);
679                                 }
680                         }else { 
681                                 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine(); 
682                         }
683                         i++;
684                 }
685                 
686                         
687                 if (sorted != "") { //then sort them and print them
688                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
689                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
690                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
691                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
692                         
693                         //print them
694                         for (int i = 0; i < reps.size(); i++) {
695                                 string sequence = fasta->getSequence(reps[i].name);
696                                 string outputName = reps[i].name + "|" + toString(reps[i].bin);
697                                 outputName = outputName + "|" + toString(reps[i].size);
698                                 if (groupfile != "") {
699                                         outputName = outputName + "|" + reps[i].group;
700                                 }
701                                 out << ">" << outputName << endl;
702                                 out << sequence << endl;
703                         }
704                 }
705                         
706                 out.close();
707                 
708                 return 0;
709
710         }
711         catch(exception& e) {
712                 m->errorOut(e, "GetOTURepCommand", "processNames");
713                 exit(1);
714         }
715 }
716 //**********************************************************************************************************************
717 SeqMap GetOTURepCommand::getMap(int row) {
718         try {
719                 SeqMap rowMap;
720                 
721                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
722                 if (rowPositions[row] != -1){
723                         //go to row in file
724                         inRow.seekg(rowPositions[row]);
725                         
726                         int rowNum, numDists, colNum;
727                         float dist;
728                         
729                         inRow >> rowNum >> numDists;
730                         
731                         for(int i = 0; i < numDists; i++) {
732                                 inRow >> colNum >> dist;
733                                 rowMap[colNum] = dist;
734                                 
735                         }
736                 }
737                 
738                 return rowMap;
739         }
740         catch(exception& e) {
741                 m->errorOut(e, "GetOTURepCommand", "getMap");
742                 exit(1);
743         }
744 }
745 //**********************************************************************************************************************
746