]> git.donarmstrong.com Git - mothur.git/blob - getoturepcommand.cpp
fixed some bugs found while testing 1.8
[mothur.git] / getoturepcommand.cpp
1 /*
2  *  getoturepcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15
16
17
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21         return (left.name < right.name);        
22 }
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26         return (left.bin < right.bin);  
27 }
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31         return (left.size < right.size);        
32 }
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36         return (left.group < right.group);      
37 }
38 //**********************************************************************************************************************
39 GetOTURepCommand::GetOTURepCommand(string option){
40         try{
41                 globaldata = GlobalData::getInstance();
42                 abort = false;
43                 allLines = 1;
44                 labels.clear();
45                 
46                 //allow user to run help
47                 if (option == "help") { 
48                         help(); abort = true;
49                 } else {
50                         //valid paramters for this command
51                         string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","outputdir","inputdir"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                         map<string, string>::iterator it;
59                 
60                         //check to make sure all parameters are valid for command
61                         for (it = parameters.begin(); it != parameters.end(); it++) { 
62                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
63                         }
64                         
65                         //if the user changes the input directory command factory will send this info to us in the output parameter 
66                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
67                         if (inputDir == "not found"){   inputDir = "";          }
68                         else {
69                                 string path;
70                                 it = parameters.find("list");
71                                 //user has given a template file
72                                 if(it != parameters.end()){ 
73                                         path = hasPath(it->second);
74                                         //if the user has not given a path then, add inputdir. else leave path alone.
75                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
76                                 }
77                                 
78                                 it = parameters.find("fasta");
79                                 //user has given a template file
80                                 if(it != parameters.end()){ 
81                                         path = hasPath(it->second);
82                                         //if the user has not given a path then, add inputdir. else leave path alone.
83                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
84                                 }
85                                 
86                                 it = parameters.find("phylip");
87                                 //user has given a template file
88                                 if(it != parameters.end()){ 
89                                         path = hasPath(it->second);
90                                         //if the user has not given a path then, add inputdir. else leave path alone.
91                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
92                                 }
93                                 
94                                 it = parameters.find("column");
95                                 //user has given a template file
96                                 if(it != parameters.end()){ 
97                                         path = hasPath(it->second);
98                                         //if the user has not given a path then, add inputdir. else leave path alone.
99                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
100                                 }
101                                 
102                                 it = parameters.find("name");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("group");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
116                                 }
117                         }
118
119                         
120                         //if the user changes the output directory command factory will send this info to us in the output parameter 
121                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
122                         
123                         //check for required parameters
124                         fastafile = validParameter.validFile(parameters, "fasta", true);
125                         if (fastafile == "not found") { mothurOut("fasta is a required parameter for the get.oturep command."); mothurOutEndLine(); abort = true; }
126                         else if (fastafile == "not open") { abort = true; }     
127                 
128                         listfile = validParameter.validFile(parameters, "list", true);
129                         if (listfile == "not found") { mothurOut("list is a required parameter for the get.oturep command."); mothurOutEndLine(); abort = true; }
130                         else if (listfile == "not open") { abort = true; }      
131                         
132                         phylipfile = validParameter.validFile(parameters, "phylip", true);
133                         if (phylipfile == "not found") { phylipfile = "";  }
134                         else if (phylipfile == "not open") { abort = true; }    
135                         else { distFile = phylipfile; format = "phylip";   }
136                         
137                         columnfile = validParameter.validFile(parameters, "column", true);
138                         if (columnfile == "not found") { columnfile = ""; }
139                         else if (columnfile == "not open") { abort = true; }    
140                         else { distFile = columnfile; format = "column";   }
141                         
142                         namefile = validParameter.validFile(parameters, "name", true);
143                         if (namefile == "not open") { abort = true; }   
144                         else if (namefile == "not found") { namefile = ""; }
145                         
146                         if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a get.oturep command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; }
147                         else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; }
148                 
149                         if (columnfile != "") {  if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }  }
150
151                         //check for optional parameter and set defaults
152                         // ...at some point should added some additional type checking...
153                         label = validParameter.validFile(parameters, "label", false);                   
154                         if (label == "not found") { label = ""; allLines = 1;  }
155                         else { 
156                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
157                                 else { allLines = 1;  }
158                         }
159                         
160                         groupfile = validParameter.validFile(parameters, "group", true);
161                         if (groupfile == "not open") { groupfile = ""; abort = true; }
162                         else if (groupfile == "not found") { groupfile = ""; }
163                         else {
164                                 //read in group map info.
165                                 groupMap = new GroupMap(groupfile);
166                                 int error = groupMap->readMap();
167                                 if (error == 1) { delete groupMap; abort = true; }
168                         }
169                         
170                         sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
171                         if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
172                                 mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); mothurOutEndLine();
173                                 sorted = "";
174                         }
175                         
176                         if ((sorted == "group") && (groupfile == "")) {
177                                 mothurOut("You must provide a groupfile to sort by group. I will not sort."); mothurOutEndLine();
178                                 sorted = "";
179                         }
180                         
181                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
182                         large = isTrue(temp);
183                         
184                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
185                         convert(temp, precision); 
186                         
187                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
188                         convert(temp, cutoff); 
189                         cutoff += (5 / (precision * 10.0));
190                 }
191         }
192         catch(exception& e) {
193                 errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
194                 exit(1);
195         }
196 }
197
198 //**********************************************************************************************************************
199
200 void GetOTURepCommand::help(){
201         try {
202                 mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
203                 mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
204                 mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
205                 mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
206                 mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
207                 mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
208                 mothurOut("The default value for label is all labels in your inputfile.\n");
209                 mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
210                 mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
211                 mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
212                 mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
213                 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
214         }
215         catch(exception& e) {
216                 errorOut(e, "GetOTURepCommand", "help");
217                 exit(1);
218         }
219 }
220
221 //**********************************************************************************************************************
222
223 GetOTURepCommand::~GetOTURepCommand(){}
224
225 //**********************************************************************************************************************
226
227 int GetOTURepCommand::execute(){
228         try {
229         
230                 if (abort == true) { return 0; }
231                 int error;
232                 
233                 if (!large) {
234                         //read distance files
235                         if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
236                         else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
237                         else { mothurOut("File format error."); mothurOutEndLine(); return 0;  }
238                         
239                         readMatrix->setCutoff(cutoff);
240         
241                         if(namefile != ""){     
242                                 nameMap = new NameAssignment(namefile);
243                                 nameMap->readMap();
244                         }else{  nameMap = NULL;         }
245                         
246                         readMatrix->read(nameMap);
247
248                         //get matrix
249                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
250                         globaldata->gListVector = readMatrix->getListVector();
251
252                         SparseMatrix* matrix = readMatrix->getMatrix();
253                         
254                         // Create a data structure to quickly access the distance information.
255                         // It consists of a vector of distance maps, where each map contains
256                         // all distances of a certain sequence. Vector and maps are accessed
257                         // via the index of a sequence in the distance matrix
258                         seqVec = vector<SeqMap>(globaldata->gListVector->size()); 
259                         for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
260                                 seqVec[currentCell->row][currentCell->column] = currentCell->dist;
261                         }
262                         
263                         delete matrix;
264                         delete readMatrix;
265                 }else {
266                         //process file and set up indexes
267                         if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
268                         else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
269                         else { mothurOut("File format error."); mothurOutEndLine(); return 0;  }
270                         
271                         formatMatrix->setCutoff(cutoff);
272         
273                         if(namefile != ""){     
274                                 nameMap = new NameAssignment(namefile);
275                                 nameMap->readMap();
276                         }else{  nameMap = NULL;         }
277                         
278                         formatMatrix->read(nameMap);
279
280                         //get matrix
281                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
282                         globaldata->gListVector = formatMatrix->getListVector();
283                         
284                         distFile = formatMatrix->getFormattedFileName();
285                         
286                         //positions in file where the distances for each sequence begin
287                         //rowPositions[1] = position in file where distance related to sequence 1 start.
288                         rowPositions = formatMatrix->getRowPositions();
289                         
290                         delete formatMatrix;
291                         
292                         //openfile for getMap to use
293                         openInputFile(distFile, inRow);
294                 }
295                 
296                 //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
297                 if (globaldata->gListVector != NULL) {
298                         vector<string> names;
299                         string binnames;
300                         //map names to rows in sparsematrix
301                         for (int i = 0; i < globaldata->gListVector->size(); i++) {
302                                 names.clear();
303                                 binnames = globaldata->gListVector->get(i);
304                                 
305                                 splitAtComma(binnames, names);
306                                 
307                                 for (int j = 0; j < names.size(); j++) {
308                                         nameToIndex[names[j]] = i;
309                                 }
310                         }
311                 } else { mothurOut("error, no listvector."); mothurOutEndLine(); }
312                 
313                 fasta = new FastaMap();
314                 
315                 //read fastafile
316                 fasta->readFastaFile(fastafile);
317                                 
318                 //if user gave a namesfile then use it
319                 if (namefile != "") {   readNamesFile();        }
320                 
321                 //set format to list so input can get listvector
322                 globaldata->setFormat("list");
323
324                 //read list file
325                 read = new ReadOTUFile(listfile);
326                 read->read(&*globaldata); 
327                 
328                 input = globaldata->ginput;
329                 list = globaldata->gListVector;
330                 string lastLabel = list->getLabel();
331                 
332                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
333                 set<string> processedLabels;
334                 set<string> userLabels = labels;
335         
336                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
337                         
338                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
339                                         mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
340                                         error = process(list);
341                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
342                                         
343                                         processedLabels.insert(list->getLabel());
344                                         userLabels.erase(list->getLabel());
345                         }
346                         
347                         if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
348                                         string saveLabel = list->getLabel();
349                                         
350                                         delete list;
351                                         list = input->getListVector(lastLabel);
352                                         mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
353                                         error = process(list);
354                                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
355                                         
356                                         processedLabels.insert(list->getLabel());
357                                         userLabels.erase(list->getLabel());
358                                         
359                                         //restore real lastlabel to save below
360                                         list->setLabel(saveLabel);
361                         }
362                         
363                         lastLabel = list->getLabel();
364                         
365                         delete list;
366                         list = input->getListVector();
367                 }
368                 
369                 //output error messages about any remaining user labels
370                 bool needToRun = false;
371                 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {  
372                         mothurOut("Your file does not include the label " + *it); 
373                         if (processedLabels.count(list->getLabel()) != 1) {
374                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
375                                 needToRun = true;
376                         }else {
377                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
378                         }
379                 }
380                 
381                 //run last label if you need to
382                 if (needToRun == true)  {
383                         if (list != NULL) {     delete list;    }
384                         list = input->getListVector(lastLabel);
385                         mothurOut(list->getLabel() + "\t" + toString(list->size())); mothurOutEndLine();
386                         error = process(list);
387                         delete list;
388                         if (error == 1) { return 0; } //there is an error in hte input files, abort command
389                 }
390                 
391                 //close and remove formatted matrix file
392                 if (large) {
393                         inRow.close();
394                         remove(distFile.c_str());
395                 }
396                 
397                 globaldata->gListVector = NULL;
398                 delete input;  globaldata->ginput = NULL;
399                 delete read;
400                 delete fasta;
401                 if (groupfile != "") {
402                         delete groupMap;  globaldata->gGroupmap = NULL;
403                 }
404                 
405                 return 0;
406         }
407         catch(exception& e) {
408                 errorOut(e, "GetOTURepCommand", "execute");
409                 exit(1);
410         }
411 }
412
413 //**********************************************************************************************************************
414 void GetOTURepCommand::readNamesFile() {
415         try {
416                 vector<string> dupNames;
417                 openInputFile(namefile, inNames);
418                 
419                 string name, names, sequence;
420         
421                 while(inNames){
422                         inNames >> name;                        //read from first column  A
423                         inNames >> names;               //read from second column  A,B,C,D
424                         
425                         dupNames.clear();
426                         
427                         //parse names into vector
428                         splitAtComma(names, dupNames);
429                         
430                         //store names in fasta map
431                         sequence = fasta->getSequence(name);
432                         for (int i = 0; i < dupNames.size(); i++) {
433                                 fasta->push_back(dupNames[i], sequence);
434                         }
435                 
436                         gobble(inNames);
437                 }
438                 inNames.close();
439
440         }
441         catch(exception& e) {
442                 errorOut(e, "GetOTURepCommand", "readNamesFile");
443                 exit(1);
444         }
445 }
446 //**********************************************************************************************************************
447 string GetOTURepCommand::findRep(int bin, string& group, ListVector* thisList, int& binsize) {
448         try{
449                 vector<string> names;
450                 map<string, string> groups;
451                 map<string, string>::iterator groupIt;
452
453                 //parse names into vector
454                 string binnames = thisList->get(bin);
455                 splitAtComma(binnames, names);
456                 binsize = names.size();
457
458                 //if you have a groupfile
459                 if (groupfile != "") {
460                         //find the groups that are in this bin
461                         for (size_t i = 0; i < names.size(); i++) {
462                                 string groupName = groupMap->getGroup(names[i]);
463                                 if (groupName == "not found") {  
464                                         mothurOut(names[i] + " is missing from your group file. Please correct. "); mothurOutEndLine();
465                                         groupError = true;
466                                 } else {
467                                         groups[groupName] = groupName;
468                                 }
469                         }
470                         
471                         //turn the groups into a string
472                         for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
473                                 group += groupIt->first + "-";
474                         }
475                         //rip off last dash
476                         group = group.substr(0, group.length()-1);
477                 }else{ group = ""; }
478
479                 // if only 1 sequence in bin or processing the "unique" label, then 
480                 // the first sequence of the OTU is the representative one
481                 if ((names.size() == 1) || (list->getLabel() == "unique")) {
482                         return names[0];
483                 }else{
484                         vector<int> seqIndex(names.size());
485                         vector<float> max_dist(names.size());
486                         vector<float> total_dist(names.size());
487
488                         //fill seqIndex and initialize sums
489                         for (size_t i = 0; i < names.size(); i++) {
490                                 seqIndex[i] = nameToIndex[names[i]];
491                                 max_dist[i] = 0.0;
492                                 total_dist[i] = 0.0;
493                         }
494                         
495                         // loop through all entries in seqIndex
496                         SeqMap::iterator it;
497                         SeqMap currMap;
498                         for (size_t i=0; i < seqIndex.size(); i++) {
499                         
500                                 if (!large) {   currMap = seqVec[seqIndex[i]];  }
501                                 else            {       currMap = getMap(seqIndex[i]);  }
502                                 
503                                 for (size_t j=0; j < seqIndex.size(); j++) {
504                                         it = currMap.find(seqIndex[j]);         
505                                         if (it != currMap.end()) {
506                                                 max_dist[i] = max(max_dist[i], it->second);
507                                                 max_dist[j] = max(max_dist[j], it->second);
508                                                 total_dist[i] += it->second;
509                                                 total_dist[j] += it->second;
510                                         }else{ //if you can't find the distance make it the cutoff
511                                                 max_dist[i] = max(max_dist[i], cutoff);
512                                                 max_dist[j] = max(max_dist[j], cutoff);
513                                                 total_dist[i] += cutoff;
514                                                 total_dist[j] += cutoff;
515                                         }
516                                 }
517                         }
518                         
519                         // sequence with the smallest maximum distance is the representative
520                         //if tie occurs pick sequence with smallest average distance
521                         float min = 10000;
522                         int minIndex;
523                         for (size_t i=0; i < max_dist.size(); i++) {
524                                 if (max_dist[i] < min) {
525                                         min = max_dist[i];
526                                         minIndex = i;
527                                 }else if (max_dist[i] == min) {
528                                         float currentAverage = total_dist[minIndex] / (float) total_dist.size();
529                                         float newAverage = total_dist[i] / (float) total_dist.size();
530                                         
531                                         if (newAverage < currentAverage) {
532                                                 min = max_dist[i];
533                                                 minIndex = i;
534                                         }
535                                 }
536                         }
537
538                         return(names[minIndex]);
539                 }
540         }
541         catch(exception& e) {
542                 errorOut(e, "GetOTURepCommand", "FindRep");
543                 exit(1);
544         }
545 }
546
547 //**********************************************************************************************************************
548 int GetOTURepCommand::process(ListVector* processList) {
549         try{
550                 string nameRep, name, sequence;
551
552                 //create output file
553                 if (outputDir == "") { outputDir += hasPath(listfile); }
554                 string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.fasta";
555                 openOutputFile(outputFileName, out);
556                 vector<repStruct> reps;
557                 
558                 ofstream newNamesOutput;
559                 string outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
560                 openOutputFile(outputNamesFile, newNamesOutput);
561                 
562                 //for each bin in the list vector
563                 for (int i = 0; i < processList->size(); i++) {
564                         string groups;
565                         int binsize;
566                         nameRep = findRep(i, groups, processList, binsize);
567                         
568                         //output to new names file
569                         newNamesOutput << nameRep << '\t' << processList->get(i) << endl;
570
571                         //print out name and sequence for that bin
572                         sequence = fasta->getSequence(nameRep);
573
574                         if (sequence != "not found") {
575                                 if (sorted == "") { //print them out
576                                         nameRep = nameRep + "|" + toString(i+1);
577                                         nameRep = nameRep + "|" + toString(binsize);
578                                         if (groupfile != "") {
579                                                 nameRep = nameRep + "|" + groups;
580                                         }
581                                         out << ">" << nameRep << endl;
582                                         out << sequence << endl;
583                                 }else { //save them
584                                         repStruct newRep(nameRep, i+1, binsize, groups);
585                                         reps.push_back(newRep);
586                                 }
587                         }else { 
588                                 mothurOut(nameRep + " is missing from your fasta or name file. Please correct. "); mothurOutEndLine(); 
589                                 remove(outputFileName.c_str());
590                                 remove(outputNamesFile.c_str());
591                                 return 1;
592                         }
593                 }
594                 
595                 if (sorted != "") { //then sort them and print them
596                         if (sorted == "name")           {  sort(reps.begin(), reps.end(), compareName);         }
597                         else if (sorted == "bin")       {  sort(reps.begin(), reps.end(), compareBin);          }
598                         else if (sorted == "size")      {  sort(reps.begin(), reps.end(), compareSize);         }
599                         else if (sorted == "group")     {  sort(reps.begin(), reps.end(), compareGroup);        }
600                         
601                         //print them
602                         for (int i = 0; i < reps.size(); i++) {
603                                 string sequence = fasta->getSequence(reps[i].name);
604                                 string outputName = reps[i].name + "|" + toString(reps[i].bin);
605                                 outputName = outputName + "|" + toString(reps[i].size);
606                                 if (groupfile != "") {
607                                         outputName = outputName + "|" + reps[i].group;
608                                 }
609                                 out << ">" << outputName << endl;
610                                 out << sequence << endl;
611                         }
612                 }
613
614                 out.close();
615                 newNamesOutput.close();
616                 return 0;
617
618         }
619         catch(exception& e) {
620                 errorOut(e, "GetOTURepCommand", "process");
621                 exit(1);
622         }
623 }
624
625 //**********************************************************************************************************************
626 SeqMap GetOTURepCommand::getMap(int row) {
627         try {
628                 SeqMap rowMap;
629                 
630                 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
631                 if (rowPositions[row] != -1){
632                         //go to row in file
633                         inRow.seekg(rowPositions[row]);
634                         
635                         int rowNum, numDists, colNum;
636                         float dist;
637                         
638                         inRow >> rowNum >> numDists;
639                         
640                         for(int i = 0; i < numDists; i++) {
641                                 inRow >> colNum >> dist;
642                                 rowMap[colNum] = dist;
643                                 
644                         }
645                 }
646                 
647                 return rowMap;
648         }
649         catch(exception& e) {
650                 errorOut(e, "GetOTURepCommand", "getMap");
651                 exit(1);
652         }
653 }
654 //**********************************************************************************************************************
655