5 * Created by Sarah Westcott on 4/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "getoturepcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "formatphylip.h"
14 #include "formatcolumn.h"
15 #include "sharedutilities.h"
18 //********************************************************************************************************************
19 //sorts lowest to highest
20 inline bool compareName(repStruct left, repStruct right){
21 return (left.name < right.name);
23 //********************************************************************************************************************
24 //sorts lowest to highest
25 inline bool compareBin(repStruct left, repStruct right){
26 return (left.bin < right.bin);
28 //********************************************************************************************************************
29 //sorts lowest to highest
30 inline bool compareSize(repStruct left, repStruct right){
31 return (left.size < right.size);
33 //********************************************************************************************************************
34 //sorts lowest to highest
35 inline bool compareGroup(repStruct left, repStruct right){
36 return (left.group < right.group);
38 //**********************************************************************************************************************
39 GetOTURepCommand::GetOTURepCommand(string option) {
41 globaldata = GlobalData::getInstance();
46 //allow user to run help
47 if (option == "help") {
50 //valid paramters for this command
51 string Array[] = {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
54 OptionParser parser(option);
55 map<string, string> parameters = parser.getParameters();
57 ValidParameters validParameter;
58 map<string, string>::iterator it;
60 //check to make sure all parameters are valid for command
61 for (it = parameters.begin(); it != parameters.end(); it++) {
62 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
65 //if the user changes the input directory command factory will send this info to us in the output parameter
66 string inputDir = validParameter.validFile(parameters, "inputdir", false);
67 if (inputDir == "not found"){ inputDir = ""; }
70 it = parameters.find("list");
71 //user has given a template file
72 if(it != parameters.end()){
73 path = hasPath(it->second);
74 //if the user has not given a path then, add inputdir. else leave path alone.
75 if (path == "") { parameters["list"] = inputDir + it->second; }
78 it = parameters.find("fasta");
79 //user has given a template file
80 if(it != parameters.end()){
81 path = hasPath(it->second);
82 //if the user has not given a path then, add inputdir. else leave path alone.
83 if (path == "") { parameters["fasta"] = inputDir + it->second; }
86 it = parameters.find("phylip");
87 //user has given a template file
88 if(it != parameters.end()){
89 path = hasPath(it->second);
90 //if the user has not given a path then, add inputdir. else leave path alone.
91 if (path == "") { parameters["phylip"] = inputDir + it->second; }
94 it = parameters.find("column");
95 //user has given a template file
96 if(it != parameters.end()){
97 path = hasPath(it->second);
98 //if the user has not given a path then, add inputdir. else leave path alone.
99 if (path == "") { parameters["column"] = inputDir + it->second; }
102 it = parameters.find("name");
103 //user has given a template file
104 if(it != parameters.end()){
105 path = hasPath(it->second);
106 //if the user has not given a path then, add inputdir. else leave path alone.
107 if (path == "") { parameters["name"] = inputDir + it->second; }
110 it = parameters.find("group");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["group"] = inputDir + it->second; }
120 //if the user changes the output directory command factory will send this info to us in the output parameter
121 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
123 //check for required parameters
124 fastafile = validParameter.validFile(parameters, "fasta", true);
125 if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
126 else if (fastafile == "not open") { abort = true; }
128 listfile = validParameter.validFile(parameters, "list", true);
129 if (listfile == "not found") { m->mothurOut("list is a required parameter for the get.oturep command."); m->mothurOutEndLine(); abort = true; }
130 else if (listfile == "not open") { abort = true; }
132 phylipfile = validParameter.validFile(parameters, "phylip", true);
133 if (phylipfile == "not found") { phylipfile = ""; }
134 else if (phylipfile == "not open") { abort = true; }
135 else { distFile = phylipfile; format = "phylip"; }
137 columnfile = validParameter.validFile(parameters, "column", true);
138 if (columnfile == "not found") { columnfile = ""; }
139 else if (columnfile == "not open") { abort = true; }
140 else { distFile = columnfile; format = "column"; }
142 namefile = validParameter.validFile(parameters, "name", true);
143 if (namefile == "not open") { abort = true; }
144 else if (namefile == "not found") { namefile = ""; }
146 if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a get.oturep command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
147 else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
149 if (columnfile != "") { if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; } }
151 //check for optional parameter and set defaults
152 // ...at some point should added some additional type checking...
153 label = validParameter.validFile(parameters, "label", false);
154 if (label == "not found") { label = ""; allLines = 1; }
156 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
157 else { allLines = 1; }
160 groupfile = validParameter.validFile(parameters, "group", true);
161 if (groupfile == "not open") { groupfile = ""; abort = true; }
162 else if (groupfile == "not found") { groupfile = ""; }
164 sorted = validParameter.validFile(parameters, "sorted", false); if (sorted == "not found"){ sorted = ""; }
165 if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
166 m->mothurOut(sorted + " is not a valid option for the sorted parameter. The only options are: name, bin, size and group. I will not sort."); m->mothurOutEndLine();
170 if ((sorted == "group") && (groupfile == "")) {
171 m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
175 groups = validParameter.validFile(parameters, "groups", false);
176 if (groups == "not found") { groups = ""; }
178 if (groupfile == "") {
179 m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
182 splitAtDash(groups, Groups);
185 globaldata->Groups = Groups;
187 string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
188 large = isTrue(temp);
190 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
191 convert(temp, precision);
193 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10.0"; }
194 convert(temp, cutoff);
195 cutoff += (5 / (precision * 10.0));
198 catch(exception& e) {
199 m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
204 //**********************************************************************************************************************
206 void GetOTURepCommand::help(){
208 m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, groups, sorted and label. The fasta and list parameters are required, as well as phylip or column and name.\n");
209 m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
210 m->mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n");
211 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
212 m->mothurOut("The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n");
213 m->mothurOut("Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n");
214 m->mothurOut("The default value for label is all labels in your inputfile.\n");
215 m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
216 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n");
217 m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n");
218 m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
219 m->mothurOut("If you provide a groupfile, then it also appends the names of the groups present in that bin.\n");
220 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
222 catch(exception& e) {
223 m->errorOut(e, "GetOTURepCommand", "help");
228 //**********************************************************************************************************************
230 GetOTURepCommand::~GetOTURepCommand(){}
232 //**********************************************************************************************************************
234 int GetOTURepCommand::execute(){
237 if (abort == true) { return 0; }
241 //read distance files
242 if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }
243 else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
244 else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0; }
246 readMatrix->setCutoff(cutoff);
249 nameMap = new NameAssignment(namefile);
251 }else{ nameMap = NULL; }
253 readMatrix->read(nameMap);
255 if (m->control_pressed) { delete readMatrix; return 0; }
258 if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
259 globaldata->gListVector = readMatrix->getListVector();
261 SparseMatrix* matrix = readMatrix->getMatrix();
263 // Create a data structure to quickly access the distance information.
264 // It consists of a vector of distance maps, where each map contains
265 // all distances of a certain sequence. Vector and maps are accessed
266 // via the index of a sequence in the distance matrix
267 seqVec = vector<SeqMap>(globaldata->gListVector->size());
268 for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
269 if (m->control_pressed) { delete readMatrix; return 0; }
270 seqVec[currentCell->row][currentCell->column] = currentCell->dist;
277 if (m->control_pressed) { return 0; }
279 //process file and set up indexes
280 if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }
281 else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
282 else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0; }
284 formatMatrix->setCutoff(cutoff);
287 nameMap = new NameAssignment(namefile);
289 }else{ nameMap = NULL; }
291 formatMatrix->read(nameMap);
293 if (m->control_pressed) { delete formatMatrix; return 0; }
296 if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
297 globaldata->gListVector = formatMatrix->getListVector();
299 distFile = formatMatrix->getFormattedFileName();
301 //positions in file where the distances for each sequence begin
302 //rowPositions[1] = position in file where distance related to sequence 1 start.
303 rowPositions = formatMatrix->getRowPositions();
308 //openfile for getMap to use
309 openInputFile(distFile, inRow);
311 if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; }
315 //globaldata->gListVector bin 0 = first name read in distance matrix, globaldata->gListVector bin 1 = second name read in distance matrix
316 if (globaldata->gListVector != NULL) {
317 vector<string> names;
319 //map names to rows in sparsematrix
320 for (int i = 0; i < globaldata->gListVector->size(); i++) {
322 binnames = globaldata->gListVector->get(i);
324 splitAtComma(binnames, names);
326 for (int j = 0; j < names.size(); j++) {
327 nameToIndex[names[j]] = i;
330 } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
333 if (m->control_pressed) {
334 if (large) { inRow.close(); remove(distFile.c_str()); }
338 if (groupfile != "") {
339 //read in group map info.
340 groupMap = new GroupMap(groupfile);
341 int error = groupMap->readMap();
342 if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = ""; }
344 if (Groups.size() != 0) {
345 SharedUtil* util = new SharedUtil();
346 util->setGroups(Groups, groupMap->namesOfGroups, "getoturep");
351 //set format to list so input can get listvector
352 globaldata->setFormat("list");
355 read = new ReadOTUFile(listfile);
356 read->read(&*globaldata);
358 input = globaldata->ginput;
359 list = globaldata->gListVector;
360 string lastLabel = list->getLabel();
362 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
363 set<string> processedLabels;
364 set<string> userLabels = labels;
366 if (m->control_pressed) {
367 if (large) { inRow.close(); remove(distFile.c_str()); }
368 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0;
371 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
373 if (allLines == 1 || labels.count(list->getLabel()) == 1){
374 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
375 error = process(list);
376 if (error == 1) { return 0; } //there is an error in hte input files, abort command
378 if (m->control_pressed) {
379 if (large) { inRow.close(); remove(distFile.c_str()); }
380 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
381 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0;
384 processedLabels.insert(list->getLabel());
385 userLabels.erase(list->getLabel());
388 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
389 string saveLabel = list->getLabel();
392 list = input->getListVector(lastLabel);
393 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
394 error = process(list);
395 if (error == 1) { return 0; } //there is an error in hte input files, abort command
397 if (m->control_pressed) {
398 if (large) { inRow.close(); remove(distFile.c_str()); }
399 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
400 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0;
403 processedLabels.insert(list->getLabel());
404 userLabels.erase(list->getLabel());
406 //restore real lastlabel to save below
407 list->setLabel(saveLabel);
410 lastLabel = list->getLabel();
413 list = input->getListVector();
416 //output error messages about any remaining user labels
417 bool needToRun = false;
418 for (set<string>::iterator it = userLabels.begin(); it != userLabels.end(); it++) {
419 m->mothurOut("Your file does not include the label " + (*it));
420 if (processedLabels.count(lastLabel) != 1) {
421 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
424 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
428 //run last label if you need to
429 if (needToRun == true) {
430 if (list != NULL) { delete list; }
431 list = input->getListVector(lastLabel);
432 m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
433 error = process(list);
435 if (error == 1) { return 0; } //there is an error in hte input files, abort command
437 if (m->control_pressed) {
438 if (large) { inRow.close(); remove(distFile.c_str()); }
439 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
440 delete read; delete input; delete list; globaldata->gListVector = NULL; return 0;
444 //close and remove formatted matrix file
447 remove(distFile.c_str());
450 globaldata->gListVector = NULL;
451 delete input; globaldata->ginput = NULL;
455 fasta = new FastaMap();
456 fasta->readFastaFile(fastafile);
458 //if user gave a namesfile then use it
459 if (namefile != "") { readNamesFile(); }
461 //output create and output the .rep.fasta files
462 map<string, string>::iterator itNameFile;
463 for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
464 processNames(itNameFile->first, itNameFile->second);
468 if (groupfile != "") {
469 delete groupMap; globaldata->gGroupmap = NULL;
472 if (m->control_pressed) { return 0; }
474 m->mothurOutEndLine();
475 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
476 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
477 m->mothurOutEndLine();
481 catch(exception& e) {
482 m->errorOut(e, "GetOTURepCommand", "execute");
487 //**********************************************************************************************************************
488 void GetOTURepCommand::readNamesFile() {
490 vector<string> dupNames;
491 openInputFile(namefile, inNames);
493 string name, names, sequence;
496 inNames >> name; //read from first column A
497 inNames >> names; //read from second column A,B,C,D
501 //parse names into vector
502 splitAtComma(names, dupNames);
504 //store names in fasta map
505 sequence = fasta->getSequence(name);
506 for (int i = 0; i < dupNames.size(); i++) {
507 fasta->push_back(dupNames[i], sequence);
515 catch(exception& e) {
516 m->errorOut(e, "GetOTURepCommand", "readNamesFile");
520 //**********************************************************************************************************************
521 string GetOTURepCommand::findRep(vector<string> names) {
523 // if only 1 sequence in bin or processing the "unique" label, then
524 // the first sequence of the OTU is the representative one
525 if ((names.size() == 2) || (names.size() == 1) || (list->getLabel() == "unique")) {
528 vector<int> seqIndex(names.size());
529 vector<float> max_dist(names.size());
530 vector<float> total_dist(names.size());
532 //fill seqIndex and initialize sums
533 for (size_t i = 0; i < names.size(); i++) {
534 seqIndex[i] = nameToIndex[names[i]];
539 // loop through all entries in seqIndex
542 for (size_t i=0; i < seqIndex.size(); i++) {
543 if (m->control_pressed) { return "control"; }
545 if (!large) { currMap = seqVec[seqIndex[i]]; }
546 else { currMap = getMap(seqIndex[i]); }
548 for (size_t j=0; j < seqIndex.size(); j++) {
549 it = currMap.find(seqIndex[j]);
550 if (it != currMap.end()) {
551 max_dist[i] = max(max_dist[i], it->second);
552 max_dist[j] = max(max_dist[j], it->second);
553 total_dist[i] += it->second;
554 total_dist[j] += it->second;
555 }else{ //if you can't find the distance make it the cutoff
556 max_dist[i] = max(max_dist[i], cutoff);
557 max_dist[j] = max(max_dist[j], cutoff);
558 total_dist[i] += cutoff;
559 total_dist[j] += cutoff;
564 // sequence with the smallest maximum distance is the representative
565 //if tie occurs pick sequence with smallest average distance
568 for (size_t i=0; i < max_dist.size(); i++) {
569 if (m->control_pressed) { return "control"; }
570 if (max_dist[i] < min) {
573 }else if (max_dist[i] == min) {
574 float currentAverage = total_dist[minIndex] / (float) total_dist.size();
575 float newAverage = total_dist[i] / (float) total_dist.size();
577 if (newAverage < currentAverage) {
584 return(names[minIndex]);
587 catch(exception& e) {
588 m->errorOut(e, "GetOTURepCommand", "FindRep");
593 //**********************************************************************************************************************
594 int GetOTURepCommand::process(ListVector* processList) {
596 string name, sequence;
600 if (outputDir == "") { outputDir += hasPath(listfile); }
602 ofstream newNamesOutput;
603 string outputNamesFile;
604 map<string, ofstream*> filehandles;
606 if (Groups.size() == 0) { //you don't want to use groups
607 outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
608 openOutputFile(outputNamesFile, newNamesOutput);
609 outputNames.push_back(outputNamesFile);
610 outputNameFiles[outputNamesFile] = processList->getLabel();
611 }else{ //you want to use groups
613 for (int i=0; i<Groups.size(); i++) {
615 filehandles[Groups[i]] = temp;
616 outputNamesFile = outputDir + getRootName(getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
618 openOutputFile(outputNamesFile, *(temp));
619 outputNames.push_back(outputNamesFile);
620 outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
624 //for each bin in the list vector
625 for (int i = 0; i < processList->size(); i++) {
626 if (m->control_pressed) {
628 if (Groups.size() == 0) { //you don't want to use groups
629 newNamesOutput.close();
631 for (int j=0; j<Groups.size(); j++) {
632 (*(filehandles[Groups[j]])).close();
633 delete filehandles[Groups[j]];
639 string temp = processList->get(i);
640 vector<string> namesInBin;
641 splitAtComma(temp, namesInBin);
643 if (Groups.size() == 0) {
644 nameRep = findRep(namesInBin);
645 newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
647 map<string, vector<string> > NamesInGroup;
648 for (int j=0; j<Groups.size(); j++) { //initialize groups
649 NamesInGroup[Groups[j]].resize(0);
652 for (int j=0; j<namesInBin.size(); j++) {
653 string thisgroup = groupMap->getGroup(namesInBin[j]);
655 if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
657 if (inUsersGroups(thisgroup, Groups)) { //add this name to correct group
658 NamesInGroup[thisgroup].push_back(namesInBin[j]);
662 //get rep for each group in otu
663 for (int j=0; j<Groups.size(); j++) {
664 if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
665 //get rep for each group
666 nameRep = findRep(NamesInGroup[Groups[j]]);
668 //output group rep and other members of this group
669 (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
671 for (int k=0; k<NamesInGroup[Groups[j]].size()-1; k++) {//output list of names in this otu from this group
672 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][k] << ",";
675 (*(filehandles[Groups[j]])) << NamesInGroup[Groups[j]][NamesInGroup[Groups[j]].size()-1] << endl;
681 if (Groups.size() == 0) { //you don't want to use groups
682 newNamesOutput.close();
684 for (int i=0; i<Groups.size(); i++) {
685 (*(filehandles[Groups[i]])).close();
686 delete filehandles[Groups[i]];
693 catch(exception& e) {
694 m->errorOut(e, "GetOTURepCommand", "process");
698 //**********************************************************************************************************************
699 int GetOTURepCommand::processNames(string filename, string label) {
703 if (outputDir == "") { outputDir += hasPath(listfile); }
704 string outputFileName = outputDir + getRootName(getSimpleName(listfile)) + label + ".rep.fasta";
705 openOutputFile(outputFileName, out);
706 vector<repStruct> reps;
707 outputNames.push_back(outputFileName);
710 string tempNameFile = filename + ".temp";
711 openOutputFile(tempNameFile, out2);
714 openInputFile(filename, in);
718 string rep, binnames;
719 in >> i >> rep >> binnames; gobble(in);
720 out2 << rep << '\t' << binnames << endl;
722 vector<string> names;
723 splitAtComma(binnames, names);
724 int binsize = names.size();
726 //if you have a groupfile
728 if (groupfile != "") {
729 map<string, string> groups;
730 map<string, string>::iterator groupIt;
732 //find the groups that are in this bin
733 for (size_t i = 0; i < names.size(); i++) {
734 string groupName = groupMap->getGroup(names[i]);
735 if (groupName == "not found") {
736 m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
739 groups[groupName] = groupName;
743 //turn the groups into a string
744 for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
745 group += groupIt->first + "-";
748 group = group.substr(0, group.length()-1);
752 //print out name and sequence for that bin
753 string sequence = fasta->getSequence(rep);
755 if (sequence != "not found") {
756 if (sorted == "") { //print them out
757 rep = rep + "|" + toString(i+1);
758 rep = rep + "|" + toString(binsize);
759 if (groupfile != "") {
760 rep = rep + "|" + group;
762 out << ">" << rep << endl;
763 out << sequence << endl;
765 repStruct newRep(rep, i+1, binsize, group);
766 reps.push_back(newRep);
769 m->mothurOut(rep + " is missing from your fasta or name file, ignoring. Please correct."); m->mothurOutEndLine();
774 if (sorted != "") { //then sort them and print them
775 if (sorted == "name") { sort(reps.begin(), reps.end(), compareName); }
776 else if (sorted == "bin") { sort(reps.begin(), reps.end(), compareBin); }
777 else if (sorted == "size") { sort(reps.begin(), reps.end(), compareSize); }
778 else if (sorted == "group") { sort(reps.begin(), reps.end(), compareGroup); }
781 for (int i = 0; i < reps.size(); i++) {
782 string sequence = fasta->getSequence(reps[i].name);
783 string outputName = reps[i].name + "|" + toString(reps[i].bin);
784 outputName = outputName + "|" + toString(reps[i].size);
785 if (groupfile != "") {
786 outputName = outputName + "|" + reps[i].group;
788 out << ">" << outputName << endl;
789 out << sequence << endl;
796 rename(tempNameFile.c_str(), filename.c_str());
801 catch(exception& e) {
802 m->errorOut(e, "GetOTURepCommand", "processNames");
806 //**********************************************************************************************************************
807 SeqMap GetOTURepCommand::getMap(int row) {
811 //make sure this row exists in the file, it may not if the seq did not have any distances below the cutoff
812 if (rowPositions[row] != -1){
814 inRow.seekg(rowPositions[row]);
816 int rowNum, numDists, colNum;
819 inRow >> rowNum >> numDists;
821 for(int i = 0; i < numDists; i++) {
822 inRow >> colNum >> dist;
823 rowMap[colNum] = dist;
830 catch(exception& e) {
831 m->errorOut(e, "GetOTURepCommand", "getMap");
835 //**********************************************************************************************************************