2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){
15 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
55 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["weighted"] = tempOutNames;
66 outputTypes["wsummary"] = tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
79 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
83 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86 vector<string> myArray = setParameters();
88 OptionParser parser(option);
89 map<string,string> parameters=parser.getParameters();
90 map<string,string>::iterator it;
92 ValidParameters validParameter;
94 //check to make sure all parameters are valid for command
95 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
96 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
99 //initialize outputTypes
100 vector<string> tempOutNames;
101 outputTypes["weighted"] = tempOutNames;
102 outputTypes["wsummary"] = tempOutNames;
103 outputTypes["phylip"] = tempOutNames;
104 outputTypes["column"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("tree");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["tree"] = inputDir + it->second; }
119 it = parameters.find("group");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["group"] = inputDir + it->second; }
127 it = parameters.find("name");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["name"] = inputDir + it->second; }
139 m->Treenames.clear();
142 //check for required parameters
143 treefile = validParameter.validFile(parameters, "tree", true);
144 if (treefile == "not open") { abort = true; }
145 else if (treefile == "not found") { //if there is a current design file, use it
146 treefile = m->getTreeFile();
147 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
149 }else { m->setTreeFile(treefile); }
151 //check for required parameters
152 groupfile = validParameter.validFile(parameters, "group", true);
153 if (groupfile == "not open") { abort = true; }
154 else if (groupfile == "not found") { groupfile = ""; }
155 else { m->setGroupFile(groupfile); }
157 namefile = validParameter.validFile(parameters, "name", true);
158 if (namefile == "not open") { abort = true; }
159 else if (namefile == "not found") { namefile = ""; }
160 else { m->setNameFile(namefile); }
162 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
165 //check for optional parameter and set defaults
166 // ...at some point should added some additional type checking...
167 groups = validParameter.validFile(parameters, "groups", false);
168 if (groups == "not found") { groups = ""; }
170 m->splitAtDash(groups, Groups);
171 m->setGroups(Groups);
174 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
175 convert(itersString, iters);
177 string temp = validParameter.validFile(parameters, "distance", false);
178 if (temp == "not found") { phylip = false; outputForm = ""; }
180 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
181 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
184 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
185 random = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
188 includeRoot = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
191 m->setProcessors(temp);
192 convert(temp, processors);
194 if (!random) { iters = 0; } //turn off random calcs
199 catch(exception& e) {
200 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
204 /***********************************************************/
205 int UnifracWeightedCommand::execute() {
208 if (abort == true) { if (calledHelp) { return 0; } return 2; }
210 m->setTreeFile(treefile);
212 if (groupfile != "") {
213 //read in group map info.
214 tmap = new TreeMap(groupfile);
216 }else{ //fake out by putting everyone in one group
217 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
218 tmap = new TreeMap();
220 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
223 if (namefile != "") { readNamesFile(); }
225 read = new ReadNewickTree(treefile);
226 int readOk = read->read(tmap);
228 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
230 read->AssembleTrees();
231 T = read->getTrees();
234 //make sure all files match
235 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
237 if (namefile != "") {
238 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
239 else { numNamesInTree = m->Treenames.size(); }
240 }else { numNamesInTree = m->Treenames.size(); }
243 //output any names that are in group file but not in tree
244 if (numNamesInTree < tmap->getNumSeqs()) {
245 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
246 //is that name in the tree?
248 for (int j = 0; j < m->Treenames.size(); j++) {
249 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
253 if (m->control_pressed) {
254 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
255 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
260 //then you did not find it so report it
261 if (count == m->Treenames.size()) {
262 //if it is in your namefile then don't remove
263 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
265 if (it == nameMap.end()) {
266 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
267 tmap->removeSeq(tmap->namesOfSeqs[i]);
268 i--; //need this because removeSeq removes name from namesOfSeqs
274 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
275 m->openOutputFile(sumFile, outSum);
276 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
278 util = new SharedUtil();
279 string s; //to make work with setgroups
280 Groups = m->getGroups();
281 vector<string> nameGroups = tmap->getNamesOfGroups();
282 util->setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
283 util->getCombos(groupComb, Groups, numComp);
284 m->setGroups(Groups);
287 weighted = new Weighted(tmap, includeRoot);
289 int start = time(NULL);
291 //get weighted for users tree
292 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
293 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
295 if (numComp < processors) { processors = numComp; }
297 //get weighted scores for users trees
298 for (int i = 0; i < T.size(); i++) {
300 if (m->control_pressed) { delete tmap; delete weighted;
301 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
304 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
305 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
308 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
309 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
310 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
313 userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore
315 if (m->control_pressed) { delete tmap; delete weighted;
316 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
319 for (int s=0; s<numComp; s++) {
320 //add users score to vector of user scores
321 uScores[s].push_back(userData[s]);
323 //save users tree score for summary file
324 utreeScores.push_back(userData[s]);
329 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
330 vector< vector<string> > namesOfGroupCombos;
331 for (int a=0; a<numGroups; a++) {
332 for (int l = 0; l < a; l++) {
333 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
334 namesOfGroupCombos.push_back(groups);
340 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
342 int numPairs = namesOfGroupCombos.size();
343 int numPairsPerProcessor = numPairs / processors;
345 for (int i = 0; i < processors; i++) {
346 int startPos = i * numPairsPerProcessor;
347 if(i == processors - 1){
348 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
350 lines.push_back(linePair(startPos, numPairsPerProcessor));
356 //get scores for random trees
357 for (int j = 0; j < iters; j++) {
359 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
361 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
363 createProcesses(T[i], namesOfGroupCombos, rScores);
366 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
369 if (m->control_pressed) { delete tmap; delete weighted;
370 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
373 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
377 //find the signifigance of the score for summary file
378 for (int f = 0; f < numComp; f++) {
380 sort(rScores[f].begin(), rScores[f].end());
382 //the index of the score higher than yours is returned
383 //so if you have 1000 random trees the index returned is 100
384 //then there are 900 trees with a score greater then you.
385 //giving you a signifigance of 0.900
386 int index = findIndex(userData[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
388 //the signifigance is the number of trees with the users score or higher
389 WScoreSig.push_back((iters-index)/(float)iters);
392 //out << "Tree# " << i << endl;
393 calculateFreqsCumuls();
407 if (m->control_pressed) { delete tmap; delete weighted;
408 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
412 if (phylip) { createPhylipFile(); }
414 //clear out users groups
416 delete tmap; delete weighted;
417 for (int i = 0; i < T.size(); i++) { delete T[i]; }
420 if (m->control_pressed) {
421 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
425 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
427 //set phylip file as new current phylipfile
429 itTypes = outputTypes.find("phylip");
430 if (itTypes != outputTypes.end()) {
431 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
434 //set column file as new current columnfile
435 itTypes = outputTypes.find("column");
436 if (itTypes != outputTypes.end()) {
437 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
440 m->mothurOutEndLine();
441 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
442 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
443 m->mothurOutEndLine();
448 catch(exception& e) {
449 m->errorOut(e, "UnifracWeightedCommand", "execute");
453 /**************************************************************************************************/
455 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
457 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
459 vector<int> processIDS;
463 //loop through and create all the processes you want
464 while (process != processors) {
468 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
471 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
473 //pass numSeqs to parent
475 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
476 m->openOutputFile(tempFile, out);
477 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
482 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
483 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
488 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
490 //force parent to wait until all the processes are done
491 for (int i=0;i<(processors-1);i++) {
492 int temp = processIDS[i];
496 //get data created by processes
497 for (int i=0;i<(processors-1);i++) {
500 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
501 m->openInputFile(s, in);
504 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
512 catch(exception& e) {
513 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
518 /**************************************************************************************************/
519 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
521 Tree* randT = new Tree(tmap);
523 for (int h = start; h < (start+num); h++) {
525 if (m->control_pressed) { return 0; }
527 //initialize weighted score
528 string groupA = namesOfGroupCombos[h][0];
529 string groupB = namesOfGroupCombos[h][1];
534 //create a random tree with same topology as T[i], but different labels
535 randT->assembleRandomUnifracTree(groupA, groupB);
537 if (m->control_pressed) { delete randT; return 0; }
539 //get wscore of random tree
540 EstOutput randomData = weighted->getValues(randT, groupA, groupB);
542 if (m->control_pressed) { delete randT; return 0; }
545 scores[h].push_back(randomData[0]);
553 catch(exception& e) {
554 m->errorOut(e, "UnifracWeightedCommand", "driver");
558 /***********************************************************/
559 void UnifracWeightedCommand::printWeightedFile() {
563 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
565 for(int a = 0; a < numComp; a++) {
566 output->initFile(groupComb[a], tags);
568 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
569 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
570 output->output(data);
576 catch(exception& e) {
577 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
583 /***********************************************************/
584 void UnifracWeightedCommand::printWSummaryFile() {
587 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
588 m->mothurOut("Tree#\tGroups\tWScore\t");
589 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
590 outSum << endl; m->mothurOutEndLine();
593 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
597 for (int i = 0; i < T.size(); i++) {
598 for (int j = 0; j < numComp; j++) {
600 if (WScoreSig[count] > (1/(float)iters)) {
601 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
602 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
603 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
605 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
606 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
607 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
610 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
611 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
612 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
619 catch(exception& e) {
620 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
624 /***********************************************************/
625 void UnifracWeightedCommand::createPhylipFile() {
629 for (int i = 0; i < T.size(); i++) {
631 string phylipFileName;
632 if ((outputForm == "lt") || (outputForm == "square")) {
633 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
634 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
636 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
637 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
641 m->openOutputFile(phylipFileName, out);
643 if ((outputForm == "lt") || (outputForm == "square")) {
645 out << m->getNumGroups() << endl;
648 //make matrix with scores in it
649 vector< vector<float> > dists; dists.resize(m->getNumGroups());
650 for (int i = 0; i < m->getNumGroups(); i++) {
651 dists[i].resize(m->getNumGroups(), 0.0);
654 //flip it so you can print it
655 for (int r=0; r<m->getNumGroups(); r++) {
656 for (int l = 0; l < r; l++) {
657 dists[r][l] = utreeScores[count];
658 dists[l][r] = utreeScores[count];
664 for (int r=0; r<m->getNumGroups(); r++) {
666 string name = (m->getGroups())[r];
667 if (name.length() < 10) { //pad with spaces to make compatible
668 while (name.length() < 10) { name += " "; }
671 if (outputForm == "lt") {
675 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
677 }else if (outputForm == "square") {
681 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
685 for (int l = 0; l < r; l++) {
686 string otherName = (m->getGroups())[l];
687 if (otherName.length() < 10) { //pad with spaces to make compatible
688 while (otherName.length() < 10) { otherName += " "; }
691 out << name << '\t' << otherName << dists[r][l] << endl;
698 catch(exception& e) {
699 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
703 /***********************************************************/
704 int UnifracWeightedCommand::findIndex(float score, int index) {
706 for (int i = 0; i < rScores[index].size(); i++) {
707 if (rScores[index][i] >= score) { return i; }
709 return rScores[index].size();
711 catch(exception& e) {
712 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
717 /***********************************************************/
719 void UnifracWeightedCommand::calculateFreqsCumuls() {
721 //clear out old tree values
723 rScoreFreq.resize(numComp);
725 rCumul.resize(numComp);
728 //calculate frequency
729 for (int f = 0; f < numComp; f++) {
730 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
731 validScores[rScores[f][i]] = rScores[f][i];
732 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
733 if (it != rScoreFreq[f].end()) {
734 rScoreFreq[f][rScores[f][i]]++;
736 rScoreFreq[f][rScores[f][i]] = 1;
742 for(int a = 0; a < numComp; a++) {
743 float rcumul = 1.0000;
744 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
745 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
746 //make rscoreFreq map and rCumul
747 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
748 rCumul[a][it->first] = rcumul;
749 //get percentage of random trees with that info
750 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
751 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
756 catch(exception& e) {
757 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
761 /*****************************************************************/
762 int UnifracWeightedCommand::readNamesFile() {
765 numUniquesInName = 0;
768 m->openInputFile(namefile, in);
770 string first, second;
771 map<string, string>::iterator itNames;
774 in >> first >> second; m->gobble(in);
778 itNames = m->names.find(first);
779 if (itNames == m->names.end()) {
780 m->names[first] = second;
782 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
783 vector<string> dupNames;
784 m->splitAtComma(second, dupNames);
786 for (int i = 0; i < dupNames.size(); i++) {
787 nameMap[dupNames[i]] = dupNames[i];
788 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
790 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
796 catch(exception& e) {
797 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
801 /***********************************************************/