5 * Created by Sarah Westcott on 4/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21 return (left.name < right.name);
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26 return (left.bin < right.bin);
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31 return (left.size < right.size);
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36 return (left.group < right.group);
38 //**********************************************************************************************************************
39 GetOTURepCommand::GetOTURepCommand(string option) {
41 globaldata = GlobalData::getInstance();
46 //allow user to run help
47 if (option == "help") {
50 //valid paramters for this command
51 string Array[] = {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
54 OptionParser parser(option);
55 map<string, string> parameters = parser.getParameters();
57 ValidParameters validParameter;
58 map<string, string>::iterator it;
60 //check to make sure all parameters are valid for command
61 for (it = parameters.begin(); it != parameters.end(); it++) {
62 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
65 //if the user changes the input directory command factory will send this info to us in the output parameter
66 string inputDir = validParameter.validFile(parameters, "inputdir", false);
67 if (inputDir == "not found"){ inputDir = ""; }
70 it = parameters.find("list");
71 //user has given a template file
72 if(it != parameters.end()){
73 path = hasPath(it->second);
74 //if the user has not given a path then, add inputdir. else leave path alone.
75 if (path == "") { parameters["list"] = inputDir + it->second; }
78 it = parameters.find("fasta");
79 //user has given a template file
80 if(it != parameters.end()){
81 path = hasPath(it->second);
82 //if the user has not given a path then, add inputdir. else leave path alone.
83 if (path == "") { parameters["fasta"] = inputDir + it->second; }
86 it = parameters.find("phylip");
87 //user has given a template file
88 if(it != parameters.end()){
89 path = hasPath(it->second);
90 //if the user has not given a path then, add inputdir. else leave path alone.
91 if (path == "") { parameters["phylip"] = inputDir + it->second; }
94 it = parameters.find("column");
95 //user has given a template file
96 if(it != parameters.end()){
97 path = hasPath(it->second);
98 //if the user has not given a path then, add inputdir. else leave path alone.
99 if (path == "") { parameters["column"] = inputDir + it->second; }
102 it = parameters.find("name");
103 //user has given a template file
104 if(it != parameters.end()){
105 path = hasPath(it->second);
106 //if the user has not given a path then, add inputdir. else leave path alone.
107 if (path == "") { parameters["name"] = inputDir + it->second; }
110 it = parameters.find("group");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["group"] = inputDir + it->second; }
120 //if the user changes the output directory command factory will send this info to us in the output parameter
121 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
123 //check for required parameters
124 fastafile = validParameter.validFile(parameters, "fasta", true);
125 if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
126 else if (fastafile == "not open") { abort = true; }
128 listfile = validParameter.validFile(parameters, "list", true);
129 if (listfile == "not found") { m->mothurOut("list is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
130 else if (listfile == "not open") { abort = true; }
132 phylipfile = validParameter.validFile(parameters, "phylip", true);
133 if (phylipfile == "not found") { phylipfile = ""; }
134 else if (phylipfile == "not open") { abort = true; }
135 else { distFile = phylipfile; format = "phylip"; }
137 columnfile = validParameter.validFile(parameters, "column", true);
138 if (columnfile == "not found") { columnfile = ""; }
139 else if (columnfile == "not open") { abort = true; }
140 else { distFile = columnfile; format = "column"; }
142 namefile = validParameter.validFile(parameters, "name", true);
143 if (namefile == "not open") { abort = true; }
144 else if (namefile == "not found") { namefile = ""; }
146 if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a get.oturep command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
147 else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
149 if (columnfile != "") { if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; } }
151 //check for optional parameter and set defaults
152 // ...at some point should added some additional type checking...
153 label = validParameter.validFile(parameters, "label", false);
154 if (label == "not found") { label = ""; allLines = 1; }
156 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
157 else { allLines = 1; }
160 groupfile = validParameter.validFile(parameters, "group", true);
161 if (groupfile == "not open") { groupfile = ""; abort = true; }
162 else if (groupfile == "not found") { groupfile = ""; }
164 sorted = validParameter.validFile(parameters, "sorted", false); if (sorted == "not found"){ sorted = ""; }
165 if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
166 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
170 if ((sorted == "group") && (groupfile == "")) {
171 m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
175 groups = validParameter.validFile(parameters, "groups", false);
176 if (groups == "not found") { groups = ""; }
178 if (groupfile == "") {
179 m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
182 splitAtDash(groups, Groups);
185 globaldata->Groups = Groups;
187 string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
188 large = isTrue(temp);
190 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
191 convert(temp, precision);
193 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10.0"; }
194 convert(temp, cutoff);
195 cutoff += (5 / (precision * 10.0));
198 catch(exception& e) {
199 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
204 //**********************************************************************************************************************
206 void GetOTURepCommand::help(){
208 m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, groups, sorted and label. The fasta and list parameters are required, as well as phylip or column and name.\n");
209 m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
210 m->mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n");
211 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
212 m->mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
213 m->mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
214 m->mothurOut("The default value for label is all labels in your inputfile.\n");
215 m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
216 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n");
217 m->mothurOut("The group parameter allows you provide a group file.\n");
218 m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n");
219 m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
220 m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
221 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
223 catch(exception& e) {
224 m->errorOut(e, "GetOTURepCommand", "help");
229 //**********************************************************************************************************************
231 GetOTURepCommand::~GetOTURepCommand(){}
233 //**********************************************************************************************************************
235 int GetOTURepCommand::execute(){
238 if (abort == true) { return 0; }
242 //read distance files
243 if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }
244 else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
245 else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0; }
247 readMatrix->setCutoff(cutoff);
250 nameMap = new NameAssignment(namefile);
252 }else{ nameMap = NULL; }
254 readMatrix->read(nameMap);
256 if (m->control_pressed) { delete readMatrix; return 0; }
259 if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
260 globaldata->gListVector = readMatrix->getListVector();
262 SparseMatrix* matrix = readMatrix->getMatrix();
264 // Create a data structure to quickly access the distance information.
265 // It consists of a vector of distance maps, where each map contains
266 // all distances of a certain sequence. Vector and maps are accessed
267 // via the index of a sequence in the distance matrix
268 seqVec = vector<SeqMap>(globaldata->gListVector->size());
269 for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
270 if (m->control_pressed) { delete readMatrix; return 0; }
271 seqVec[currentCell->row][currentCell->column] = currentCell->dist;
278 if (m->control_pressed) { return 0; }
280 //process file and set up indexes
281 if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }
282 else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
283 else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0; }
285 formatMatrix->setCutoff(cutoff);
288 nameMap = new NameAssignment(namefile);
290 }else{ nameMap = NULL; }
292 formatMatrix->read(nameMap);
294 if (m->control_pressed) { delete formatMatrix; return 0; }
297 if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
298 globaldata->gListVector = formatMatrix->getListVector();
300 distFile = formatMatrix->getFormattedFileName();
302 //positions in file where the distances for each sequence begin
303 //rowPositions[1] = position in file where distance related to sequence 1 start.
304 rowPositions = formatMatrix->getRowPositions();
309 //openfile for getMap to use
310 openInputFile(distFile, inRow);
312 if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; }
316 //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
317 if (globaldata->gListVector != NULL) {
318 vector<string> names;
320 //map names to rows in sparsematrix
321 for (int i = 0; i < globaldata->gListVector->size(); i++) {
323 binnames = globaldata->gListVector->get(i);
325 splitAtComma(binnames, names);
327 for (int j = 0; j < names.size(); j++) {
328 nameToIndex[names[j]] = i;
331 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
334 if (m->control_pressed) {
335 if (large) { inRow.close(); remove(distFile.c_str()); }
339 if (groupfile != "") {
340 //read in group map info.
341 groupMap = new GroupMap(groupfile);
342 int error = groupMap->readMap();
343 if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = ""; }
345 if (Groups.size() != 0) {
346 SharedUtil* util = new SharedUtil();
347 util->setGroups(Groups, groupMap->namesOfGroups, "getoturep");
352 //set format to list so input can get listvector
353 globaldata->setFormat("list");
356 read = new ReadOTUFile(listfile);
357 read->read(&*globaldata);
359 input = globaldata->ginput;
360 list = globaldata->gListVector;
361 string lastLabel = list->getLabel();
363 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
364 set<string> processedLabels;
365 set<string> userLabels = labels;
367 if (m->control_pressed) {
368 if (large) { inRow.close(); remove(distFile.c_str()); }
369 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0;
372 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
374 if (allLines == 1 || labels.count(list->getLabel()) == 1){
375 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
376 error = process(list);
377 if (error == 1) { return 0; } //there is an error in hte input files, abort command
379 if (m->control_pressed) {
380 if (large) { inRow.close(); remove(distFile.c_str()); }
381 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
382 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0;
385 processedLabels.insert(list->getLabel());
386 userLabels.erase(list->getLabel());
389 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
390 string saveLabel = list->getLabel();
393 list = input->getListVector(lastLabel);
394 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
395 error = process(list);
396 if (error == 1) { return 0; } //there is an error in hte input files, abort command
398 if (m->control_pressed) {
399 if (large) { inRow.close(); remove(distFile.c_str()); }
400 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
401 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0;
404 processedLabels.insert(list->getLabel());
405 userLabels.erase(list->getLabel());
407 //restore real lastlabel to save below
408 list->setLabel(saveLabel);
411 lastLabel = list->getLabel();
414 list = input->getListVector();
417 //output error messages about any remaining user labels
418 bool needToRun = false;
419 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
420 m->mothurOut("Your file does not include the label " + (*it));
421 if (processedLabels.count(lastLabel) != 1) {
422 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
425 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
429 //run last label if you need to
430 if (needToRun == true) {
431 if (list != NULL) { delete list; }
432 list = input->getListVector(lastLabel);
433 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
434 error = process(list);
436 if (error == 1) { return 0; } //there is an error in hte input files, abort command
438 if (m->control_pressed) {
439 if (large) { inRow.close(); remove(distFile.c_str()); }
440 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
441 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0;
445 //close and remove formatted matrix file
448 remove(distFile.c_str());
451 globaldata->gListVector = NULL;
452 delete input; globaldata->ginput = NULL;
456 fasta = new FastaMap();
457 fasta->readFastaFile(fastafile);
459 //if user gave a namesfile then use it
460 if (namefile != "") { readNamesFile(); }
462 //output create and output the .rep.fasta files
463 map<string, string>::iterator itNameFile;
464 for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
465 processNames(itNameFile->first, itNameFile->second);
469 if (groupfile != "") {
470 delete groupMap; globaldata->gGroupmap = NULL;
473 if (m->control_pressed) { return 0; }
475 m->mothurOutEndLine();
476 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
477 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
478 m->mothurOutEndLine();
482 catch(exception& e) {
483 m->errorOut(e, "GetOTURepCommand", "execute");
488 //**********************************************************************************************************************
489 void GetOTURepCommand::readNamesFile() {
491 vector<string> dupNames;
492 openInputFile(namefile, inNames);
494 string name, names, sequence;
497 inNames >> name; //read from first column A
498 inNames >> names; //read from second column A,B,C,D
502 //parse names into vector
503 splitAtComma(names, dupNames);
505 //store names in fasta map
506 sequence = fasta->getSequence(name);
507 for (int i = 0; i < dupNames.size(); i++) {
508 fasta->push_back(dupNames[i], sequence);
516 catch(exception& e) {
517 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
521 //**********************************************************************************************************************
522 string GetOTURepCommand::findRep(vector<string> names) {
524 // if only 1 sequence in bin or processing the "unique" label, then
525 // the first sequence of the OTU is the representative one
526 if ((names.size() == 2) || (names.size() == 1) || (list->getLabel() == "unique")) {
529 vector<int> seqIndex(names.size());
530 vector<float> max_dist(names.size());
531 vector<float> total_dist(names.size());
533 //fill seqIndex and initialize sums
534 for (size_t i = 0; i < names.size(); i++) {
535 seqIndex[i] = nameToIndex[names[i]];
540 // loop through all entries in seqIndex
543 for (size_t i=0; i < seqIndex.size(); i++) {
544 if (m->control_pressed) { return "control"; }
546 if (!large) { currMap = seqVec[seqIndex[i]]; }
547 else { currMap = getMap(seqIndex[i]); }
549 for (size_t j=0; j < seqIndex.size(); j++) {
550 it = currMap.find(seqIndex[j]);
551 if (it != currMap.end()) {
552 max_dist[i] = max(max_dist[i], it->second);
553 max_dist[j] = max(max_dist[j], it->second);
554 total_dist[i] += it->second;
555 total_dist[j] += it->second;
556 }else{ //if you can't find the distance make it the cutoff
557 max_dist[i] = max(max_dist[i], cutoff);
558 max_dist[j] = max(max_dist[j], cutoff);
559 total_dist[i] += cutoff;
560 total_dist[j] += cutoff;
565 // sequence with the smallest maximum distance is the representative
566 //if tie occurs pick sequence with smallest average distance
569 for (size_t i=0; i < max_dist.size(); i++) {
570 if (m->control_pressed) { return "control"; }
571 if (max_dist[i] < min) {
574 }else if (max_dist[i] == min) {
575 float currentAverage = total_dist[minIndex] / (float) total_dist.size();
576 float newAverage = total_dist[i] / (float) total_dist.size();
578 if (newAverage < currentAverage) {
585 return(names[minIndex]);
588 catch(exception& e) {
589 m->errorOut(e, "GetOTURepCommand", "FindRep");
594 //**********************************************************************************************************************
595 int GetOTURepCommand::process(ListVector* processList) {
597 string name, sequence;
601 if (outputDir == "") { outputDir += hasPath(listfile); }
603 ofstream newNamesOutput;
604 string outputNamesFile;
605 map<string, ofstream*> filehandles;
607 if (Groups.size() == 0) { //you don't want to use groups
608 outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
609 openOutputFile(outputNamesFile, newNamesOutput);
610 outputNames.push_back(outputNamesFile);
611 outputNameFiles[outputNamesFile] = processList->getLabel();
612 }else{ //you want to use groups
614 for (int i=0; i<Groups.size(); i++) {
616 filehandles[Groups[i]] = temp;
617 outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
619 openOutputFile(outputNamesFile, *(temp));
620 outputNames.push_back(outputNamesFile);
621 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
625 //for each bin in the list vector
626 for (int i = 0; i < processList->size(); i++) {
627 if (m->control_pressed) {
629 if (Groups.size() == 0) { //you don't want to use groups
630 newNamesOutput.close();
632 for (int j=0; j<Groups.size(); j++) {
633 (*(filehandles[Groups[j]])).close();
634 delete filehandles[Groups[j]];
640 string temp = processList->get(i);
641 vector<string> namesInBin;
642 splitAtComma(temp, namesInBin);
644 if (Groups.size() == 0) {
645 nameRep = findRep(namesInBin);
646 newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
648 map<string, vector<string> > NamesInGroup;
649 for (int j=0; j<Groups.size(); j++) { //initialize groups
650 NamesInGroup[Groups[j]].resize(0);
653 for (int j=0; j<namesInBin.size(); j++) {
654 string thisgroup = groupMap->getGroup(namesInBin[j]);
656 if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
658 if (inUsersGroups(thisgroup, Groups)) { //add this name to correct group
659 NamesInGroup[thisgroup].push_back(namesInBin[j]);
663 //get rep for each group in otu
664 for (int j=0; j<Groups.size(); j++) {
665 if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
666 //get rep for each group
667 nameRep = findRep(NamesInGroup[Groups[j]]);
669 //output group rep and other members of this group
670 (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
672 for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
673 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][k] << ",";
676 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] << endl;
682 if (Groups.size() == 0) { //you don't want to use groups
683 newNamesOutput.close();
685 for (int i=0; i<Groups.size(); i++) {
686 (*(filehandles[Groups[i]])).close();
687 delete filehandles[Groups[i]];
694 catch(exception& e) {
695 m->errorOut(e, "GetOTURepCommand", "process");
699 //**********************************************************************************************************************
700 int GetOTURepCommand::processNames(string filename, string label) {
704 if (outputDir == "") { outputDir += hasPath(listfile); }
705 string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + label + ".rep.fasta";
706 openOutputFile(outputFileName, out);
707 vector<repStruct> reps;
708 outputNames.push_back(outputFileName);
711 string tempNameFile = filename + ".temp";
712 openOutputFile(tempNameFile, out2);
715 openInputFile(filename, in);
719 string rep, binnames;
720 in >> i >> rep >> binnames; gobble(in);
721 out2 << rep << '\t' << binnames << endl;
723 vector<string> names;
724 splitAtComma(binnames, names);
725 int binsize = names.size();
727 //if you have a groupfile
729 if (groupfile != "") {
730 map<string, string> groups;
731 map<string, string>::iterator groupIt;
733 //find the groups that are in this bin
734 for (size_t i = 0; i < names.size(); i++) {
735 string groupName = groupMap->getGroup(names[i]);
736 if (groupName == "not found") {
737 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
740 groups[groupName] = groupName;
744 //turn the groups into a string
745 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
746 group += groupIt->first + "-";
749 group = group.substr(0, group.length()-1);
753 //print out name and sequence for that bin
754 string sequence = fasta->getSequence(rep);
756 if (sequence != "not found") {
757 if (sorted == "") { //print them out
758 rep = rep + "\t" + toString(i+1);
759 rep = rep + "|" + toString(binsize);
760 if (groupfile != "") {
761 rep = rep + "|" + group;
763 out << ">" << rep << endl;
764 out << sequence << endl;
766 repStruct newRep(rep, i+1, binsize, group);
767 reps.push_back(newRep);
770 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine();
775 if (sorted != "") { //then sort them and print them
776 if (sorted == "name") { sort(reps.begin(), reps.end(), compareName); }
777 else if (sorted == "bin") { sort(reps.begin(), reps.end(), compareBin); }
778 else if (sorted == "size") { sort(reps.begin(), reps.end(), compareSize); }
779 else if (sorted == "group") { sort(reps.begin(), reps.end(), compareGroup); }
782 for (int i = 0; i < reps.size(); i++) {
783 string sequence = fasta->getSequence(reps[i].name);
784 string outputName = reps[i].name + "\t" + toString(reps[i].bin);
785 outputName = outputName + "|" + toString(reps[i].size);
786 if (groupfile != "") {
787 outputName = outputName + "|" + reps[i].group;
789 out << ">" << outputName << endl;
790 out << sequence << endl;
798 remove(filename.c_str());
799 rename(tempNameFile.c_str(), filename.c_str());
804 catch(exception& e) {
805 m->errorOut(e, "GetOTURepCommand", "processNames");
809 //**********************************************************************************************************************
810 SeqMap GetOTURepCommand::getMap(int row) {
814 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
815 if (rowPositions[row] != -1){
817 inRow.seekg(rowPositions[row]);
819 int rowNum, numDists, colNum;
822 inRow >> rowNum >> numDists;
824 for(int i = 0; i < numDists; i++) {
825 inRow >> colNum >> dist;
826 rowMap[colNum] = dist;
833 catch(exception& e) {
834 m->errorOut(e, "GetOTURepCommand", "getMap");
838 //**********************************************************************************************************************