]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
created mothurOut class to handle logfiles
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38 //**********************************************************************************************************************
39 GetOTURepCommand::GetOTURepCommand(string option)  {
40         try{
41                 globaldata = GlobalData::getInstance();
42                 abort = false;
43                 allLines = 1;
44                 labels.clear();
45                                 
46                 //allow user to run help
47                 if (option == "help") { 
48                         help(); abort = true;
49                 } else {
50                         //valid paramters for this command
51                         string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","outputdir","inputdir"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                         map<string, string>::iterator it;
59                 
60                         //check to make sure all parameters are valid for command
61                         for (it = parameters.begin(); it != parameters.end(); it++) { 
62                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
63                         }
64                         
65                         //if the user changes the input directory command factory will send this info to us in the output parameter 
66                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
67                         if (inputDir == "not found"){   inputDir = "";          }
68                         else {
69                                 string path;
70                                 it = parameters.find("list");
71                                 //user has given a template file
72                                 if(it != parameters.end()){ 
73                                         path = hasPath(it->second);
74                                         //if the user has not given a path then, add inputdir. else leave path alone.
75                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
76                                 }
77                                 
78                                 it = parameters.find("fasta");
79                                 //user has given a template file
80                                 if(it != parameters.end()){ 
81                                         path = hasPath(it->second);
82                                         //if the user has not given a path then, add inputdir. else leave path alone.
83                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
84                                 }
85                                 
86                                 it = parameters.find("phylip");
87                                 //user has given a template file
88                                 if(it != parameters.end()){ 
89                                         path = hasPath(it->second);
90                                         //if the user has not given a path then, add inputdir. else leave path alone.
91                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
92                                 }
93                                 
94                                 it = parameters.find("column");
95                                 //user has given a template file
96                                 if(it != parameters.end()){ 
97                                         path = hasPath(it->second);
98                                         //if the user has not given a path then, add inputdir. else leave path alone.
99                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
100                                 }
101                                 
102                                 it = parameters.find("name");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("group");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
116                                 }
117                         }
118
119                         
120                         //if the user changes the output directory command factory will send this info to us in the output parameter 
121                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
122                         
123                         //check for required parameters
124                         fastafile = validParameter.validFile(parameters, "fasta", true);
125                         if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
126                         else if (fastafile == "not open") { abort = true; }     
127                 
128                         listfile = validParameter.validFile(parameters, "list", true);
129                         if (listfile == "not found") { m->mothurOut("list is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
130                         else if (listfile == "not open") { abort = true; }      
131                         
132                         phylipfile = validParameter.validFile(parameters, "phylip", true);
133                         if (phylipfile == "not found") { phylipfile = "";  }
134                         else if (phylipfile == "not open") { abort = true; }    
135                         else { distFile = phylipfile; format = "phylip";   }
136                         
137                         columnfile = validParameter.validFile(parameters, "column", true);
138                         if (columnfile == "not found") { columnfile = ""; }
139                         else if (columnfile == "not open") { abort = true; }    
140                         else { distFile = columnfile; format = "column";   }
141                         
142                         namefile = validParameter.validFile(parameters, "name", true);
143                         if (namefile == "not open") { abort = true; }   
144                         else if (namefile == "not found") { namefile = ""; }
145                         
146                         if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a get.oturep command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
147                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
148                 
149                         if (columnfile != "") {  if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }  }
150
151                         //check for optional parameter and set defaults
152                         // ...at some point should added some additional type checking...
153                         label = validParameter.validFile(parameters, "label", false);                   
154                         if (label == "not found") { label = ""; allLines = 1;  }
155                         else { 
156                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
157                                 else { allLines = 1;  }
158                         }
159                         
160                         groupfile = validParameter.validFile(parameters, "group", true);
161                         if (groupfile == "not open") { groupfile = ""; abort = true; }
162                         else if (groupfile == "not found") { groupfile = ""; }
163                         else {
164                                 //read in group map info.
165                                 groupMap = new GroupMap(groupfile);
166                                 int error = groupMap->readMap();
167                                 if (error == 1) { delete groupMap; abort = true; }
168                         }
169                         
170                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
171                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
172                                 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
173                                 sorted = "";
174                         }
175                         
176                         if ((sorted == "group") && (groupfile == "")) {
177                                 m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
178                                 sorted = "";
179                         }
180                         
181                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
182                         large = isTrue(temp);
183                         
184                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
185                         convert(temp, precision); 
186                         
187                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
188                         convert(temp, cutoff); 
189                         cutoff += (5 / (precision * 10.0));
190                 }
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
194                 exit(1);
195         }
196 }
197
198 //**********************************************************************************************************************
199
200 void GetOTURepCommand::help(){
201         try {
202                 m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
203                 m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
204                 m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
205                 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
206                 m->mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
207                 m->mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
208                 m->mothurOut("The default value for label is all labels in your inputfile.\n");
209                 m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
210                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
211                 m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
212                 m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
213                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
214         }
215         catch(exception& e) {
216                 m->errorOut(e, "GetOTURepCommand", "help");
217                 exit(1);
218         }
219 }
220
221 //**********************************************************************************************************************
222
223 GetOTURepCommand::~GetOTURepCommand(){}
224
225 //**********************************************************************************************************************
226
227 int GetOTURepCommand::execute(){
228         try {
229         
230                 if (abort == true) { return 0; }
231                 int error;
232                 
233                 if (!large) {
234                         //read distance files
235                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
236                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
237                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
238                         
239                         readMatrix->setCutoff(cutoff);
240         
241                         if(namefile != ""){     
242                                 nameMap = new NameAssignment(namefile);
243                                 nameMap->readMap();
244                         }else{  nameMap = NULL;         }
245                         
246                         readMatrix->read(nameMap);
247
248                         //get matrix
249                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
250                         globaldata->gListVector = readMatrix->getListVector();
251
252                         SparseMatrix* matrix = readMatrix->getMatrix();
253                         
254                         // Create a data structure to quickly access the distance information.
255                         // It consists of a vector of distance maps, where each map contains
256                         // all distances of a certain sequence. Vector and maps are accessed
257                         // via the index of a sequence in the distance matrix
258                         seqVec = vector<SeqMap>(globaldata->gListVector->size()); 
259                         for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
260                                 seqVec[currentCell->row][currentCell->column] = currentCell->dist;
261                         }
262                         
263                         delete matrix;
264                         delete readMatrix;
265                 }else {
266                         //process file and set up indexes
267                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
268                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
269                         else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
270                         
271                         formatMatrix->setCutoff(cutoff);
272         
273                         if(namefile != ""){     
274                                 nameMap = new NameAssignment(namefile);
275                                 nameMap->readMap();
276                         }else{  nameMap = NULL;         }
277                         
278                         formatMatrix->read(nameMap);
279
280                         //get matrix
281                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
282                         globaldata->gListVector = formatMatrix->getListVector();
283                         
284                         distFile = formatMatrix->getFormattedFileName();
285                         
286                         //positions in file where the distances for each sequence begin
287                         //rowPositions[1] = position in file where distance related to sequence 1 start.
288                         rowPositions = formatMatrix->getRowPositions();
289                         
290                         delete formatMatrix;
291                         
292                         //openfile for getMap to use
293                         openInputFile(distFile, inRow);
294                 }
295                 
296                 //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
297                 if (globaldata->gListVector != NULL) {
298                         vector<string> names;
299                         string binnames;
300                         //map names to rows in sparsematrix
301                         for (int i = 0; i < globaldata->gListVector->size(); i++) {
302                                 names.clear();
303                                 binnames = globaldata->gListVector->get(i);
304                                 
305                                 splitAtComma(binnames, names);
306                                 
307                                 for (int j = 0; j < names.size(); j++) {
308                                         nameToIndex[names[j]] = i;
309                                 }
310                         }
311                 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
312                 
313                 fasta = new FastaMap();
314                 
315                 //read fastafile
316                 fasta->readFastaFile(fastafile);
317                                 
318                 //if user gave a namesfile then use it
319                 if (namefile != "") {   readNamesFile();        }
320                 
321                 //set format to list so input can get listvector
322                 globaldata->setFormat("list");
323
324                 //read list file
325                 read = new ReadOTUFile(listfile);
326                 read->read(&*globaldata); 
327                 
328                 input = globaldata->ginput;
329                 list = globaldata->gListVector;
330                 string lastLabel = list->getLabel();
331                 
332                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
333                 set<string> processedLabels;
334                 set<string> userLabels = labels;
335         
336                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
337                         
338                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
339                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
340                                         error = process(list);
341                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
342                                         
343                                         processedLabels.insert(list->getLabel());
344                                         userLabels.erase(list->getLabel());
345                         }
346                         
347                         if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
348                                         string saveLabel = list->getLabel();
349                                         
350                                         delete list;
351                                         list = input->getListVector(lastLabel);
352                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
353                                         error = process(list);
354                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
355                                         
356                                         processedLabels.insert(list->getLabel());
357                                         userLabels.erase(list->getLabel());
358                                         
359                                         //restore real lastlabel to save below
360                                         list->setLabel(saveLabel);
361                         }
362                         
363                         lastLabel = list->getLabel();
364                         
365                         delete list;
366                         list = input->getListVector();
367                 }
368                 
369                 //output error messages about any remaining user labels
370                 bool needToRun = false;
371                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
372                         m->mothurOut("Your file does not include the label " + *it); 
373                         if (processedLabels.count(list->getLabel()) != 1) {
374                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
375                                 needToRun = true;
376                         }else {
377                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
378                         }
379                 }
380                 
381                 //run last label if you need to
382                 if (needToRun == true)  {
383                         if (list != NULL) {     delete list;    }
384                         list = input->getListVector(lastLabel);
385                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
386                         error = process(list);
387                         delete list;
388                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
389                 }
390                 
391                 //close and remove formatted matrix file
392                 if (large) {
393                         inRow.close();
394                         remove(distFile.c_str());
395                 }
396                 
397                 globaldata->gListVector = NULL;
398                 delete input;  globaldata->ginput = NULL;
399                 delete read;
400                 delete fasta;
401                 if (groupfile != "") {
402                         delete groupMap;  globaldata->gGroupmap = NULL;
403                 }
404                 
405                 m->mothurOutEndLine();
406                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
407                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
408                 m->mothurOutEndLine();
409                 
410                 return 0;
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "GetOTURepCommand", "execute");
414                 exit(1);
415         }
416 }
417
418 //**********************************************************************************************************************
419 void GetOTURepCommand::readNamesFile() {
420         try {
421                 vector<string> dupNames;
422                 openInputFile(namefile, inNames);
423                 
424                 string name, names, sequence;
425         
426                 while(inNames){
427                         inNames >> name;                        //read from first column  A
428                         inNames >> names;               //read from second column  A,B,C,D
429                         
430                         dupNames.clear();
431                         
432                         //parse names into vector
433                         splitAtComma(names, dupNames);
434                         
435                         //store names in fasta map
436                         sequence = fasta->getSequence(name);
437                         for (int i = 0; i < dupNames.size(); i++) {
438                                 fasta->push_back(dupNames[i], sequence);
439                         }
440                 
441                         gobble(inNames);
442                 }
443                 inNames.close();
444
445         }
446         catch(exception& e) {
447                 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
448                 exit(1);
449         }
450 }
451 //**********************************************************************************************************************
452 string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, int& binsize) {
453         try{
454                 vector<string> names;
455                 map<string, string> groups;
456                 map<string, string>::iterator groupIt;
457
458                 //parse names into vector
459                 string binnames = thisList->get(bin);
460                 splitAtComma(binnames, names);
461                 binsize = names.size();
462
463                 //if you have a groupfile
464                 if (groupfile != "") {
465                         //find the groups that are in this bin
466                         for (size_t i = 0; i < names.size(); i++) {
467                                 string groupName = groupMap->getGroup(names[i]);
468                                 if (groupName == "not found") {  
469                                         m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
470                                         groupError = true;
471                                 } else {
472                                         groups[groupName] = groupName;
473                                 }
474                         }
475                         
476                         //turn the groups into a string
477                         for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
478                                 group += groupIt->first + "-";
479                         }
480                         //rip off last dash
481                         group = group.substr(0, group.length()-1);
482                 }else{ group = ""; }
483
484                 // if only 1 sequence in bin or processing the "unique" label, then 
485                 // the first sequence of the OTU is the representative one
486                 if ((names.size() == 1) || (list->getLabel() == "unique")) {
487                         return names[0];
488                 }else{
489                         vector<int> seqIndex(names.size());
490                         vector<float> max_dist(names.size());
491                         vector<float> total_dist(names.size());
492
493                         //fill seqIndex and initialize sums
494                         for (size_t i = 0; i < names.size(); i++) {
495                                 seqIndex[i] = nameToIndex[names[i]];
496                                 max_dist[i] = 0.0;
497                                 total_dist[i] = 0.0;
498                         }
499                         
500                         // loop through all entries in seqIndex
501                         SeqMap::iterator it;
502                         SeqMap currMap;
503                         for (size_t i=0; i < seqIndex.size(); i++) {
504                         
505                                 if (!large) {   currMap = seqVec[seqIndex[i]];  }
506                                 else            {       currMap = getMap(seqIndex[i]);  }
507                                 
508                                 for (size_t j=0; j < seqIndex.size(); j++) {
509                                         it = currMap.find(seqIndex[j]);         
510                                         if (it != currMap.end()) {
511                                                 max_dist[i] = max(max_dist[i], it->second);
512                                                 max_dist[j] = max(max_dist[j], it->second);
513                                                 total_dist[i] += it->second;
514                                                 total_dist[j] += it->second;
515                                         }else{ //if you can't find the distance make it the cutoff
516                                                 max_dist[i] = max(max_dist[i], cutoff);
517                                                 max_dist[j] = max(max_dist[j], cutoff);
518                                                 total_dist[i] += cutoff;
519                                                 total_dist[j] += cutoff;
520                                         }
521                                 }
522                         }
523                         
524                         // sequence with the smallest maximum distance is the representative
525                         //if tie occurs pick sequence with smallest average distance
526                         float min = 10000;
527                         int minIndex;
528                         for (size_t i=0; i < max_dist.size(); i++) {
529                                 if (max_dist[i] < min) {
530                                         min = max_dist[i];
531                                         minIndex = i;
532                                 }else if (max_dist[i] == min) {
533                                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
534                                         float newAverage = total_dist[i] / (float) total_dist.size();
535                                         
536                                         if (newAverage < currentAverage) {
537                                                 min = max_dist[i];
538                                                 minIndex = i;
539                                         }
540                                 }
541                         }
542
543                         return(names[minIndex]);
544                 }
545         }
546         catch(exception& e) {
547                 m->errorOut(e, "GetOTURepCommand", "FindRep");
548                 exit(1);
549         }
550 }
551
552 //**********************************************************************************************************************
553 int GetOTURepCommand::process(ListVector* processList) {
554         try{
555                 string nameRep, name, sequence;
556
557                 //create output file
558                 if (outputDir == "") { outputDir += hasPath(listfile); }
559                 string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.fasta";
560                 openOutputFile(outputFileName, out);
561                 vector<repStruct> reps;
562                 outputNames.push_back(outputFileName);
563                 
564                 ofstream newNamesOutput;
565                 string outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
566                 openOutputFile(outputNamesFile, newNamesOutput);
567                 outputNames.push_back(outputNamesFile);
568                 
569                 //for each bin in the list vector
570                 for (int i = 0; i < processList->size(); i++) {
571                         string groups;
572                         int binsize;
573                         nameRep = findRep(i, groups, processList, binsize);
574                         
575                         //output to new names file
576                         newNamesOutput << nameRep << '\t' << processList->get(i) << endl;
577
578                         //print out name and sequence for that bin
579                         sequence = fasta->getSequence(nameRep);
580
581                         if (sequence != "not found") {
582                                 if (sorted == "") { //print them out
583                                         nameRep = nameRep + "|" + toString(i+1);
584                                         nameRep = nameRep + "|" + toString(binsize);
585                                         if (groupfile != "") {
586                                                 nameRep = nameRep + "|" + groups;
587                                         }
588                                         out << ">" << nameRep << endl;
589                                         out << sequence << endl;
590                                 }else { //save them
591                                         repStruct newRep(nameRep, i+1, binsize, groups);
592                                         reps.push_back(newRep);
593                                 }
594                         }else { 
595                                 m->mothurOut(nameRep + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); 
596                                 remove(outputFileName.c_str());
597                                 remove(outputNamesFile.c_str());
598                                 return 1;
599                         }
600                 }
601                 
602                 if (sorted != "") { //then sort them and print them
603                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
604                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
605                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
606                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
607                         
608                         //print them
609                         for (int i = 0; i < reps.size(); i++) {
610                                 string sequence = fasta->getSequence(reps[i].name);
611                                 string outputName = reps[i].name + "|" + toString(reps[i].bin);
612                                 outputName = outputName + "|" + toString(reps[i].size);
613                                 if (groupfile != "") {
614                                         outputName = outputName + "|" + reps[i].group;
615                                 }
616                                 out << ">" << outputName << endl;
617                                 out << sequence << endl;
618                         }
619                 }
620
621                 out.close();
622                 newNamesOutput.close();
623                 return 0;
624
625         }
626         catch(exception& e) {
627                 m->errorOut(e, "GetOTURepCommand", "process");
628                 exit(1);
629         }
630 }
631
632 //**********************************************************************************************************************
633 SeqMap GetOTURepCommand::getMap(int row) {
634         try {
635                 SeqMap rowMap;
636                 
637                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
638                 if (rowPositions[row] != -1){
639                         //go to row in file
640                         inRow.seekg(rowPositions[row]);
641                         
642                         int rowNum, numDists, colNum;
643                         float dist;
644                         
645                         inRow >> rowNum >> numDists;
646                         
647                         for(int i = 0; i < numDists; i++) {
648                                 inRow >> colNum >> dist;
649                                 rowMap[colNum] = dist;
650                                 
651                         }
652                 }
653                 
654                 return rowMap;
655         }
656         catch(exception& e) {
657                 m->errorOut(e, "GetOTURepCommand", "getMap");
658                 exit(1);
659         }
660 }
661 //**********************************************************************************************************************
662