2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){
15 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
55 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["weighted"] = tempOutNames;
66 outputTypes["wsummary"] = tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
79 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
83 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86 vector<string> myArray = setParameters();
88 OptionParser parser(option);
89 map<string,string> parameters=parser.getParameters();
90 map<string,string>::iterator it;
92 ValidParameters validParameter;
94 //check to make sure all parameters are valid for command
95 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
96 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
99 //initialize outputTypes
100 vector<string> tempOutNames;
101 outputTypes["weighted"] = tempOutNames;
102 outputTypes["wsummary"] = tempOutNames;
103 outputTypes["phylip"] = tempOutNames;
104 outputTypes["column"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("tree");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["tree"] = inputDir + it->second; }
119 it = parameters.find("group");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["group"] = inputDir + it->second; }
127 it = parameters.find("name");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["name"] = inputDir + it->second; }
139 m->Treenames.clear();
142 //check for required parameters
143 treefile = validParameter.validFile(parameters, "tree", true);
144 if (treefile == "not open") { abort = true; }
145 else if (treefile == "not found") { //if there is a current design file, use it
146 treefile = m->getTreeFile();
147 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
149 }else { m->setTreeFile(treefile); }
151 //check for required parameters
152 groupfile = validParameter.validFile(parameters, "group", true);
153 if (groupfile == "not open") { abort = true; }
154 else if (groupfile == "not found") { groupfile = ""; }
155 else { m->setGroupFile(groupfile); }
157 namefile = validParameter.validFile(parameters, "name", true);
158 if (namefile == "not open") { abort = true; }
159 else if (namefile == "not found") { namefile = ""; }
160 else { m->setNameFile(namefile); }
162 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
165 //check for optional parameter and set defaults
166 // ...at some point should added some additional type checking...
167 groups = validParameter.validFile(parameters, "groups", false);
168 if (groups == "not found") { groups = ""; }
170 m->splitAtDash(groups, Groups);
171 m->setGroups(Groups);
174 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
175 convert(itersString, iters);
177 string temp = validParameter.validFile(parameters, "distance", false);
178 if (temp == "not found") { phylip = false; outputForm = ""; }
180 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
181 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
184 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
185 random = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
188 includeRoot = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
191 m->setProcessors(temp);
192 convert(temp, processors);
194 if (!random) { iters = 0; } //turn off random calcs
199 catch(exception& e) {
200 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
204 /***********************************************************/
205 int UnifracWeightedCommand::execute() {
208 if (abort == true) { if (calledHelp) { return 0; } return 2; }
210 m->setTreeFile(treefile);
212 if (groupfile != "") {
213 //read in group map info.
214 tmap = new TreeMap(groupfile);
216 }else{ //fake out by putting everyone in one group
217 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
218 tmap = new TreeMap();
220 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
223 if (namefile != "") { readNamesFile(); }
225 read = new ReadNewickTree(treefile);
226 int readOk = read->read(tmap);
228 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
230 read->AssembleTrees();
231 T = read->getTrees();
234 //make sure all files match
235 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
237 if (namefile != "") {
238 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
239 else { numNamesInTree = m->Treenames.size(); }
240 }else { numNamesInTree = m->Treenames.size(); }
243 //output any names that are in group file but not in tree
244 if (numNamesInTree < tmap->getNumSeqs()) {
245 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
246 //is that name in the tree?
248 for (int j = 0; j < m->Treenames.size(); j++) {
249 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
253 if (m->control_pressed) {
254 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
255 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
260 //then you did not find it so report it
261 if (count == m->Treenames.size()) {
262 //if it is in your namefile then don't remove
263 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
265 if (it == nameMap.end()) {
266 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
267 tmap->removeSeq(tmap->namesOfSeqs[i]);
268 i--; //need this because removeSeq removes name from namesOfSeqs
274 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
275 m->openOutputFile(sumFile, outSum);
276 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
278 util = new SharedUtil();
279 string s; //to make work with setgroups
280 vector<string> Groups = m->getGroups();
281 vector<string> nameGroups = tmap->getNamesOfGroups();
282 util->setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
283 util->getCombos(groupComb, Groups, numComp);
286 weighted = new Weighted(tmap, includeRoot);
288 int start = time(NULL);
290 //get weighted for users tree
291 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
292 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
294 if (numComp < processors) { processors = numComp; }
296 //get weighted scores for users trees
297 for (int i = 0; i < T.size(); i++) {
299 if (m->control_pressed) { delete tmap; delete weighted;
300 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
303 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
304 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
307 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
308 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
309 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
312 userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore
314 if (m->control_pressed) { delete tmap; delete weighted;
315 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
318 for (int s=0; s<numComp; s++) {
319 //add users score to vector of user scores
320 uScores[s].push_back(userData[s]);
322 //save users tree score for summary file
323 utreeScores.push_back(userData[s]);
328 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
329 vector< vector<string> > namesOfGroupCombos;
330 for (int a=0; a<numGroups; a++) {
331 for (int l = 0; l < a; l++) {
332 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
333 namesOfGroupCombos.push_back(groups);
339 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
341 int numPairs = namesOfGroupCombos.size();
342 int numPairsPerProcessor = numPairs / processors;
344 for (int i = 0; i < processors; i++) {
345 int startPos = i * numPairsPerProcessor;
346 if(i == processors - 1){
347 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
349 lines.push_back(linePair(startPos, numPairsPerProcessor));
355 //get scores for random trees
356 for (int j = 0; j < iters; j++) {
358 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
360 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
362 createProcesses(T[i], namesOfGroupCombos, rScores);
365 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
368 if (m->control_pressed) { delete tmap; delete weighted;
369 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
372 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
376 //find the signifigance of the score for summary file
377 for (int f = 0; f < numComp; f++) {
379 sort(rScores[f].begin(), rScores[f].end());
381 //the index of the score higher than yours is returned
382 //so if you have 1000 random trees the index returned is 100
383 //then there are 900 trees with a score greater then you.
384 //giving you a signifigance of 0.900
385 int index = findIndex(userData[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
387 //the signifigance is the number of trees with the users score or higher
388 WScoreSig.push_back((iters-index)/(float)iters);
391 //out << "Tree# " << i << endl;
392 calculateFreqsCumuls();
406 if (m->control_pressed) { delete tmap; delete weighted;
407 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
411 if (phylip) { createPhylipFile(); }
413 //clear out users groups
415 delete tmap; delete weighted;
416 for (int i = 0; i < T.size(); i++) { delete T[i]; }
419 if (m->control_pressed) {
420 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
424 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
426 //set phylip file as new current phylipfile
428 itTypes = outputTypes.find("phylip");
429 if (itTypes != outputTypes.end()) {
430 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
433 //set column file as new current columnfile
434 itTypes = outputTypes.find("column");
435 if (itTypes != outputTypes.end()) {
436 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
439 m->mothurOutEndLine();
440 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
441 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
442 m->mothurOutEndLine();
447 catch(exception& e) {
448 m->errorOut(e, "UnifracWeightedCommand", "execute");
452 /**************************************************************************************************/
454 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
456 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
458 vector<int> processIDS;
462 //loop through and create all the processes you want
463 while (process != processors) {
467 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
470 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
472 //pass numSeqs to parent
474 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
475 m->openOutputFile(tempFile, out);
476 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
481 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
482 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
487 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
489 //force parent to wait until all the processes are done
490 for (int i=0;i<(processors-1);i++) {
491 int temp = processIDS[i];
495 //get data created by processes
496 for (int i=0;i<(processors-1);i++) {
499 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
500 m->openInputFile(s, in);
503 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
511 catch(exception& e) {
512 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
517 /**************************************************************************************************/
518 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
520 Tree* randT = new Tree(tmap);
522 for (int h = start; h < (start+num); h++) {
524 if (m->control_pressed) { return 0; }
526 //initialize weighted score
527 string groupA = namesOfGroupCombos[h][0];
528 string groupB = namesOfGroupCombos[h][1];
533 //create a random tree with same topology as T[i], but different labels
534 randT->assembleRandomUnifracTree(groupA, groupB);
536 if (m->control_pressed) { delete randT; return 0; }
538 //get wscore of random tree
539 EstOutput randomData = weighted->getValues(randT, groupA, groupB);
541 if (m->control_pressed) { delete randT; return 0; }
544 scores[h].push_back(randomData[0]);
552 catch(exception& e) {
553 m->errorOut(e, "UnifracWeightedCommand", "driver");
557 /***********************************************************/
558 void UnifracWeightedCommand::printWeightedFile() {
562 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
564 for(int a = 0; a < numComp; a++) {
565 output->initFile(groupComb[a], tags);
567 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
568 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
569 output->output(data);
575 catch(exception& e) {
576 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
582 /***********************************************************/
583 void UnifracWeightedCommand::printWSummaryFile() {
586 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
587 m->mothurOut("Tree#\tGroups\tWScore\t");
588 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
589 outSum << endl; m->mothurOutEndLine();
592 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
596 for (int i = 0; i < T.size(); i++) {
597 for (int j = 0; j < numComp; j++) {
599 if (WScoreSig[count] > (1/(float)iters)) {
600 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
601 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
602 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
604 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
605 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
606 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
609 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
610 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
611 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
618 catch(exception& e) {
619 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
623 /***********************************************************/
624 void UnifracWeightedCommand::createPhylipFile() {
628 for (int i = 0; i < T.size(); i++) {
630 string phylipFileName;
631 if ((outputForm == "lt") || (outputForm == "square")) {
632 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
633 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
635 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
636 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
640 m->openOutputFile(phylipFileName, out);
642 if ((outputForm == "lt") || (outputForm == "square")) {
644 out << m->getNumGroups() << endl;
647 //make matrix with scores in it
648 vector< vector<float> > dists; dists.resize(m->getNumGroups());
649 for (int i = 0; i < m->getNumGroups(); i++) {
650 dists[i].resize(m->getNumGroups(), 0.0);
653 //flip it so you can print it
654 for (int r=0; r<m->getNumGroups(); r++) {
655 for (int l = 0; l < r; l++) {
656 dists[r][l] = utreeScores[count];
657 dists[l][r] = utreeScores[count];
663 for (int r=0; r<m->getNumGroups(); r++) {
665 string name = (m->getGroups())[r];
666 if (name.length() < 10) { //pad with spaces to make compatible
667 while (name.length() < 10) { name += " "; }
670 if (outputForm == "lt") {
674 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
676 }else if (outputForm == "square") {
680 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
684 for (int l = 0; l < r; l++) {
685 string otherName = (m->getGroups())[l];
686 if (otherName.length() < 10) { //pad with spaces to make compatible
687 while (otherName.length() < 10) { otherName += " "; }
690 out << name << '\t' << otherName << dists[r][l] << endl;
697 catch(exception& e) {
698 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
702 /***********************************************************/
703 int UnifracWeightedCommand::findIndex(float score, int index) {
705 for (int i = 0; i < rScores[index].size(); i++) {
706 if (rScores[index][i] >= score) { return i; }
708 return rScores[index].size();
710 catch(exception& e) {
711 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
716 /***********************************************************/
718 void UnifracWeightedCommand::calculateFreqsCumuls() {
720 //clear out old tree values
722 rScoreFreq.resize(numComp);
724 rCumul.resize(numComp);
727 //calculate frequency
728 for (int f = 0; f < numComp; f++) {
729 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
730 validScores[rScores[f][i]] = rScores[f][i];
731 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
732 if (it != rScoreFreq[f].end()) {
733 rScoreFreq[f][rScores[f][i]]++;
735 rScoreFreq[f][rScores[f][i]] = 1;
741 for(int a = 0; a < numComp; a++) {
742 float rcumul = 1.0000;
743 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
744 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
745 //make rscoreFreq map and rCumul
746 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
747 rCumul[a][it->first] = rcumul;
748 //get percentage of random trees with that info
749 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
750 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
755 catch(exception& e) {
756 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
760 /*****************************************************************/
761 int UnifracWeightedCommand::readNamesFile() {
764 numUniquesInName = 0;
767 m->openInputFile(namefile, in);
769 string first, second;
770 map<string, string>::iterator itNames;
773 in >> first >> second; m->gobble(in);
777 itNames = m->names.find(first);
778 if (itNames == m->names.end()) {
779 m->names[first] = second;
781 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
782 vector<string> dupNames;
783 m->splitAtComma(second, dupNames);
785 for (int i = 0; i < dupNames.size(); i++) {
786 nameMap[dupNames[i]] = dupNames[i];
787 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
789 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
795 catch(exception& e) {
796 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
800 /***********************************************************/