2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){
15 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
55 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["weighted"] = tempOutNames;
66 outputTypes["wsummary"] = tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
79 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
83 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86 vector<string> myArray = setParameters();
88 OptionParser parser(option);
89 map<string,string> parameters=parser.getParameters();
90 map<string,string>::iterator it;
92 ValidParameters validParameter;
94 //check to make sure all parameters are valid for command
95 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
96 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
99 //initialize outputTypes
100 vector<string> tempOutNames;
101 outputTypes["weighted"] = tempOutNames;
102 outputTypes["wsummary"] = tempOutNames;
103 outputTypes["phylip"] = tempOutNames;
104 outputTypes["column"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("tree");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["tree"] = inputDir + it->second; }
119 it = parameters.find("group");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["group"] = inputDir + it->second; }
127 it = parameters.find("name");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["name"] = inputDir + it->second; }
138 m->namesOfGroups.clear();
139 m->Treenames.clear();
142 //check for required parameters
143 treefile = validParameter.validFile(parameters, "tree", true);
144 if (treefile == "not open") { abort = true; }
145 else if (treefile == "not found") { //if there is a current design file, use it
146 treefile = m->getTreeFile();
147 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
151 //check for required parameters
152 groupfile = validParameter.validFile(parameters, "group", true);
153 if (groupfile == "not open") { abort = true; }
154 else if (groupfile == "not found") { groupfile = ""; }
156 namefile = validParameter.validFile(parameters, "name", true);
157 if (namefile == "not open") { abort = true; }
158 else if (namefile == "not found") { namefile = ""; }
160 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
163 //check for optional parameter and set defaults
164 // ...at some point should added some additional type checking...
165 groups = validParameter.validFile(parameters, "groups", false);
166 if (groups == "not found") { groups = ""; }
168 m->splitAtDash(groups, Groups);
172 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
173 convert(itersString, iters);
175 string temp = validParameter.validFile(parameters, "distance", false);
176 if (temp == "not found") { phylip = false; outputForm = ""; }
178 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
179 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
182 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
183 random = m->isTrue(temp);
185 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
186 includeRoot = m->isTrue(temp);
188 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
189 m->setProcessors(temp);
190 convert(temp, processors);
192 if (!random) { iters = 0; } //turn off random calcs
197 catch(exception& e) {
198 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
202 /***********************************************************/
203 int UnifracWeightedCommand::execute() {
206 if (abort == true) { if (calledHelp) { return 0; } return 2; }
208 m->setTreeFile(treefile);
210 if (groupfile != "") {
211 //read in group map info.
212 tmap = new TreeMap(groupfile);
214 }else{ //fake out by putting everyone in one group
215 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
216 tmap = new TreeMap();
218 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
221 if (namefile != "") { readNamesFile(); }
223 read = new ReadNewickTree(treefile);
224 int readOk = read->read(tmap);
226 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
228 read->AssembleTrees();
229 T = read->getTrees();
232 //make sure all files match
233 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
235 if (namefile != "") {
236 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
237 else { numNamesInTree = m->Treenames.size(); }
238 }else { numNamesInTree = m->Treenames.size(); }
241 //output any names that are in group file but not in tree
242 if (numNamesInTree < tmap->getNumSeqs()) {
243 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
244 //is that name in the tree?
246 for (int j = 0; j < m->Treenames.size(); j++) {
247 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
251 if (m->control_pressed) {
252 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
253 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
258 //then you did not find it so report it
259 if (count == m->Treenames.size()) {
260 //if it is in your namefile then don't remove
261 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
263 if (it == nameMap.end()) {
264 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
265 tmap->removeSeq(tmap->namesOfSeqs[i]);
266 i--; //need this because removeSeq removes name from namesOfSeqs
272 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
273 m->openOutputFile(sumFile, outSum);
274 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
276 util = new SharedUtil();
277 string s; //to make work with setgroups
278 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
279 util->getCombos(groupComb, m->Groups, numComp);
282 weighted = new Weighted(tmap, includeRoot);
284 int start = time(NULL);
286 //get weighted for users tree
287 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
288 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
290 if (numComp < processors) { processors = numComp; }
292 //get weighted scores for users trees
293 for (int i = 0; i < T.size(); i++) {
295 if (m->control_pressed) { delete tmap; delete weighted;
296 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
299 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
300 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
303 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
304 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
305 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
308 userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore
310 if (m->control_pressed) { delete tmap; delete weighted;
311 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
314 for (int s=0; s<numComp; s++) {
315 //add users score to vector of user scores
316 uScores[s].push_back(userData[s]);
318 //save users tree score for summary file
319 utreeScores.push_back(userData[s]);
324 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
325 vector< vector<string> > namesOfGroupCombos;
326 for (int a=0; a<numGroups; a++) {
327 for (int l = 0; l < a; l++) {
328 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
329 namesOfGroupCombos.push_back(groups);
335 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
337 int numPairs = namesOfGroupCombos.size();
338 int numPairsPerProcessor = numPairs / processors;
340 for (int i = 0; i < processors; i++) {
341 int startPos = i * numPairsPerProcessor;
342 if(i == processors - 1){
343 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
345 lines.push_back(linePair(startPos, numPairsPerProcessor));
351 //get scores for random trees
352 for (int j = 0; j < iters; j++) {
354 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
356 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
358 createProcesses(T[i], namesOfGroupCombos, rScores);
361 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
364 if (m->control_pressed) { delete tmap; delete weighted;
365 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
368 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
372 //find the signifigance of the score for summary file
373 for (int f = 0; f < numComp; f++) {
375 sort(rScores[f].begin(), rScores[f].end());
377 //the index of the score higher than yours is returned
378 //so if you have 1000 random trees the index returned is 100
379 //then there are 900 trees with a score greater then you.
380 //giving you a signifigance of 0.900
381 int index = findIndex(userData[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
383 //the signifigance is the number of trees with the users score or higher
384 WScoreSig.push_back((iters-index)/(float)iters);
387 //out << "Tree# " << i << endl;
388 calculateFreqsCumuls();
402 if (m->control_pressed) { delete tmap; delete weighted;
403 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
407 if (phylip) { createPhylipFile(); }
409 //clear out users groups
411 delete tmap; delete weighted;
412 for (int i = 0; i < T.size(); i++) { delete T[i]; }
415 if (m->control_pressed) {
416 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
420 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
422 //set phylip file as new current phylipfile
424 itTypes = outputTypes.find("phylip");
425 if (itTypes != outputTypes.end()) {
426 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
429 //set column file as new current columnfile
430 itTypes = outputTypes.find("column");
431 if (itTypes != outputTypes.end()) {
432 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
435 m->mothurOutEndLine();
436 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
437 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
438 m->mothurOutEndLine();
443 catch(exception& e) {
444 m->errorOut(e, "UnifracWeightedCommand", "execute");
448 /**************************************************************************************************/
450 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
452 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
454 vector<int> processIDS;
458 //loop through and create all the processes you want
459 while (process != processors) {
463 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
466 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
468 //pass numSeqs to parent
470 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
471 m->openOutputFile(tempFile, out);
472 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
477 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
478 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
483 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
485 //force parent to wait until all the processes are done
486 for (int i=0;i<(processors-1);i++) {
487 int temp = processIDS[i];
491 //get data created by processes
492 for (int i=0;i<(processors-1);i++) {
495 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
496 m->openInputFile(s, in);
499 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
507 catch(exception& e) {
508 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
513 /**************************************************************************************************/
514 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
516 Tree* randT = new Tree(tmap);
518 for (int h = start; h < (start+num); h++) {
520 if (m->control_pressed) { return 0; }
522 //initialize weighted score
523 string groupA = namesOfGroupCombos[h][0];
524 string groupB = namesOfGroupCombos[h][1];
529 //create a random tree with same topology as T[i], but different labels
530 randT->assembleRandomUnifracTree(groupA, groupB);
532 if (m->control_pressed) { delete randT; return 0; }
534 //get wscore of random tree
535 EstOutput randomData = weighted->getValues(randT, groupA, groupB);
537 if (m->control_pressed) { delete randT; return 0; }
540 scores[h].push_back(randomData[0]);
548 catch(exception& e) {
549 m->errorOut(e, "UnifracWeightedCommand", "driver");
553 /***********************************************************/
554 void UnifracWeightedCommand::printWeightedFile() {
558 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
560 for(int a = 0; a < numComp; a++) {
561 output->initFile(groupComb[a], tags);
563 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
564 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
565 output->output(data);
571 catch(exception& e) {
572 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
578 /***********************************************************/
579 void UnifracWeightedCommand::printWSummaryFile() {
582 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" << endl;
583 m->mothurOut("Tree#\tGroups\tWScore\tWSig"); m->mothurOutEndLine();
586 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
590 for (int i = 0; i < T.size(); i++) {
591 for (int j = 0; j < numComp; j++) {
593 if (WScoreSig[count] > (1/(float)iters)) {
594 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
595 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
596 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
598 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
599 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
600 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
603 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl;
604 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl;
605 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n");
612 catch(exception& e) {
613 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
617 /***********************************************************/
618 void UnifracWeightedCommand::createPhylipFile() {
622 for (int i = 0; i < T.size(); i++) {
624 string phylipFileName;
625 if ((outputForm == "lt") || (outputForm == "square")) {
626 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
627 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
629 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
630 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
634 m->openOutputFile(phylipFileName, out);
636 if ((outputForm == "lt") || (outputForm == "square")) {
638 out << m->Groups.size() << endl;
641 //make matrix with scores in it
642 vector< vector<float> > dists; dists.resize(m->Groups.size());
643 for (int i = 0; i < m->Groups.size(); i++) {
644 dists[i].resize(m->Groups.size(), 0.0);
647 //flip it so you can print it
648 for (int r=0; r<m->Groups.size(); r++) {
649 for (int l = 0; l < r; l++) {
650 dists[r][l] = utreeScores[count];
651 dists[l][r] = utreeScores[count];
657 for (int r=0; r<m->Groups.size(); r++) {
659 string name = m->Groups[r];
660 if (name.length() < 10) { //pad with spaces to make compatible
661 while (name.length() < 10) { name += " "; }
664 if (outputForm == "lt") {
668 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
670 }else if (outputForm == "square") {
674 for (int l = 0; l < m->Groups.size(); l++) { out << dists[r][l] << '\t'; }
678 for (int l = 0; l < r; l++) {
679 string otherName = m->Groups[l];
680 if (otherName.length() < 10) { //pad with spaces to make compatible
681 while (otherName.length() < 10) { otherName += " "; }
684 out << name << '\t' << otherName << dists[r][l] << endl;
691 catch(exception& e) {
692 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
696 /***********************************************************/
697 int UnifracWeightedCommand::findIndex(float score, int index) {
699 for (int i = 0; i < rScores[index].size(); i++) {
700 if (rScores[index][i] >= score) { return i; }
702 return rScores[index].size();
704 catch(exception& e) {
705 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
710 /***********************************************************/
712 void UnifracWeightedCommand::calculateFreqsCumuls() {
714 //clear out old tree values
716 rScoreFreq.resize(numComp);
718 rCumul.resize(numComp);
721 //calculate frequency
722 for (int f = 0; f < numComp; f++) {
723 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
724 validScores[rScores[f][i]] = rScores[f][i];
725 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
726 if (it != rScoreFreq[f].end()) {
727 rScoreFreq[f][rScores[f][i]]++;
729 rScoreFreq[f][rScores[f][i]] = 1;
735 for(int a = 0; a < numComp; a++) {
736 float rcumul = 1.0000;
737 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
738 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
739 //make rscoreFreq map and rCumul
740 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
741 rCumul[a][it->first] = rcumul;
742 //get percentage of random trees with that info
743 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
744 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
749 catch(exception& e) {
750 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
754 /*****************************************************************/
755 int UnifracWeightedCommand::readNamesFile() {
758 numUniquesInName = 0;
761 m->openInputFile(namefile, in);
763 string first, second;
764 map<string, string>::iterator itNames;
767 in >> first >> second; m->gobble(in);
771 itNames = m->names.find(first);
772 if (itNames == m->names.end()) {
773 m->names[first] = second;
775 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
776 vector<string> dupNames;
777 m->splitAtComma(second, dupNames);
779 for (int i = 0; i < dupNames.size(); i++) {
780 nameMap[dupNames[i]] = dupNames[i];
781 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
783 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
789 catch(exception& e) {
790 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
794 /***********************************************************/