2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){
15 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n";
55 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["weighted"] = tempOutNames;
66 outputTypes["wsummary"] = tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
79 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
85 vector<string> myArray = setParameters();
87 OptionParser parser(option);
88 map<string,string> parameters=parser.getParameters();
89 map<string,string>::iterator it;
91 ValidParameters validParameter;
93 //check to make sure all parameters are valid for command
94 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
95 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
98 //initialize outputTypes
99 vector<string> tempOutNames;
100 outputTypes["weighted"] = tempOutNames;
101 outputTypes["wsummary"] = tempOutNames;
102 outputTypes["phylip"] = tempOutNames;
103 outputTypes["column"] = tempOutNames;
105 //if the user changes the input directory command factory will send this info to us in the output parameter
106 string inputDir = validParameter.validFile(parameters, "inputdir", false);
107 if (inputDir == "not found"){ inputDir = ""; }
110 it = parameters.find("tree");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = m->hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["tree"] = inputDir + it->second; }
118 it = parameters.find("group");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["group"] = inputDir + it->second; }
126 it = parameters.find("name");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["name"] = inputDir + it->second; }
137 //check for required parameters
138 treefile = validParameter.validFile(parameters, "tree", true);
139 if (treefile == "not open") { abort = true; }
140 else if (treefile == "not found") { //if there is a current design file, use it
141 treefile = m->getTreeFile();
142 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
143 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
146 //check for required parameters
147 groupfile = validParameter.validFile(parameters, "group", true);
148 if (groupfile == "not open") { abort = true; }
149 else if (groupfile == "not found") { groupfile = ""; }
151 namefile = validParameter.validFile(parameters, "name", true);
152 if (namefile == "not open") { abort = true; }
153 else if (namefile == "not found") { namefile = ""; }
155 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
158 //check for optional parameter and set defaults
159 // ...at some point should added some additional type checking...
160 groups = validParameter.validFile(parameters, "groups", false);
161 if (groups == "not found") { groups = ""; }
163 m->splitAtDash(groups, Groups);
167 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
168 convert(itersString, iters);
170 string temp = validParameter.validFile(parameters, "distance", false);
171 if (temp == "not found") { phylip = false; outputForm = ""; }
173 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
174 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
177 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
178 random = m->isTrue(temp);
180 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
181 includeRoot = m->isTrue(temp);
183 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
184 m->setProcessors(temp);
185 convert(temp, processors);
187 if (!random) { iters = 0; } //turn off random calcs
192 catch(exception& e) {
193 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
197 /***********************************************************/
198 int UnifracWeightedCommand::execute() {
201 if (abort == true) { if (calledHelp) { return 0; } return 2; }
203 if (groupfile != "") {
204 //read in group map info.
205 tmap = new TreeMap(groupfile);
207 }else{ //fake out by putting everyone in one group
208 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
209 tmap = new TreeMap();
211 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
214 if (namefile != "") { readNamesFile(); }
216 read = new ReadNewickTree(treefile);
217 int readOk = read->read(tmap);
219 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
221 read->AssembleTrees();
222 T = read->getTrees();
225 //make sure all files match
226 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
228 if (namefile != "") {
229 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
230 else { numNamesInTree = m->Treenames.size(); }
231 }else { numNamesInTree = m->Treenames.size(); }
234 //output any names that are in group file but not in tree
235 if (numNamesInTree < tmap->getNumSeqs()) {
236 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
237 //is that name in the tree?
239 for (int j = 0; j < m->Treenames.size(); j++) {
240 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
244 if (m->control_pressed) {
245 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
246 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
251 //then you did not find it so report it
252 if (count == m->Treenames.size()) {
253 //if it is in your namefile then don't remove
254 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
256 if (it == nameMap.end()) {
257 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
258 tmap->removeSeq(tmap->namesOfSeqs[i]);
259 i--; //need this because removeSeq removes name from namesOfSeqs
265 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
266 m->openOutputFile(sumFile, outSum);
267 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
269 util = new SharedUtil();
270 string s; //to make work with setgroups
271 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
272 util->getCombos(groupComb, m->Groups, numComp);
275 weighted = new Weighted(tmap, includeRoot);
277 int start = time(NULL);
279 //get weighted for users tree
280 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
281 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
283 if (numComp < processors) { processors = numComp; }
285 //get weighted scores for users trees
286 for (int i = 0; i < T.size(); i++) {
288 if (m->control_pressed) { delete tmap; delete weighted;
289 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
292 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
293 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
296 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
297 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
298 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
301 userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore
303 if (m->control_pressed) { delete tmap; delete weighted;
304 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
307 for (int s=0; s<numComp; s++) {
308 //add users score to vector of user scores
309 uScores[s].push_back(userData[s]);
311 //save users tree score for summary file
312 utreeScores.push_back(userData[s]);
317 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
318 vector< vector<string> > namesOfGroupCombos;
319 for (int a=0; a<numGroups; a++) {
320 for (int l = 0; l < a; l++) {
321 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
322 namesOfGroupCombos.push_back(groups);
328 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
330 int numPairs = namesOfGroupCombos.size();
331 int numPairsPerProcessor = numPairs / processors;
333 for (int i = 0; i < processors; i++) {
334 int startPos = i * numPairsPerProcessor;
335 if(i == processors - 1){
336 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
338 lines.push_back(linePair(startPos, numPairsPerProcessor));
344 //get scores for random trees
345 for (int j = 0; j < iters; j++) {
347 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
349 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
351 createProcesses(T[i], namesOfGroupCombos, rScores);
354 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
357 if (m->control_pressed) { delete tmap; delete weighted;
358 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
361 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
365 //find the signifigance of the score for summary file
366 for (int f = 0; f < numComp; f++) {
368 sort(rScores[f].begin(), rScores[f].end());
370 //the index of the score higher than yours is returned
371 //so if you have 1000 random trees the index returned is 100
372 //then there are 900 trees with a score greater then you.
373 //giving you a signifigance of 0.900
374 int index = findIndex(userData[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
376 //the signifigance is the number of trees with the users score or higher
377 WScoreSig.push_back((iters-index)/(float)iters);
380 //out << "Tree# " << i << endl;
381 calculateFreqsCumuls();
395 if (m->control_pressed) { delete tmap; delete weighted;
396 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
400 if (phylip) { createPhylipFile(); }
402 //clear out users groups
404 delete tmap; delete weighted;
405 for (int i = 0; i < T.size(); i++) { delete T[i]; }
408 if (m->control_pressed) {
409 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
413 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
415 //set phylip file as new current phylipfile
417 itTypes = outputTypes.find("phylip");
418 if (itTypes != outputTypes.end()) {
419 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
422 //set column file as new current columnfile
423 itTypes = outputTypes.find("column");
424 if (itTypes != outputTypes.end()) {
425 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
428 m->mothurOutEndLine();
429 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
430 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
431 m->mothurOutEndLine();
436 catch(exception& e) {
437 m->errorOut(e, "UnifracWeightedCommand", "execute");
441 /**************************************************************************************************/
443 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
445 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
447 vector<int> processIDS;
451 //loop through and create all the processes you want
452 while (process != processors) {
456 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
459 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
461 //pass numSeqs to parent
463 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
464 m->openOutputFile(tempFile, out);
465 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
470 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
471 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
476 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
478 //force parent to wait until all the processes are done
479 for (int i=0;i<(processors-1);i++) {
480 int temp = processIDS[i];
484 //get data created by processes
485 for (int i=0;i<(processors-1);i++) {
488 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
489 m->openInputFile(s, in);
492 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
500 catch(exception& e) {
501 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
506 /**************************************************************************************************/
507 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
509 Tree* randT = new Tree(tmap);
511 for (int h = start; h < (start+num); h++) {
513 if (m->control_pressed) { return 0; }
515 //initialize weighted score
516 string groupA = namesOfGroupCombos[h][0];
517 string groupB = namesOfGroupCombos[h][1];
522 //create a random tree with same topology as T[i], but different labels
523 randT->assembleRandomUnifracTree(groupA, groupB);
525 if (m->control_pressed) { delete randT; return 0; }
527 //get wscore of random tree
528 EstOutput randomData = weighted->getValues(randT, groupA, groupB);
530 if (m->control_pressed) { delete randT; return 0; }
533 scores[h].push_back(randomData[0]);
541 catch(exception& e) {
542 m->errorOut(e, "UnifracWeightedCommand", "driver");
546 /***********************************************************/
547 void UnifracWeightedCommand::printWeightedFile() {
551 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
553 for(int a = 0; a < numComp; a++) {
554 output->initFile(groupComb[a], tags);
556 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
557 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
558 output->output(data);
564 catch(exception& e) {
565 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
571 /***********************************************************/
572 void UnifracWeightedCommand::printWSummaryFile() {
575 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" << endl;
576 m->mothurOut("Tree#\tGroups\tWScore\tWSig"); m->mothurOutEndLine();
579 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
583 for (int i = 0; i < T.size(); i++) {
584 for (int j = 0; j < numComp; j++) {
586 if (WScoreSig[count] > (1/(float)iters)) {
587 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
588 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
589 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
591 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
592 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
593 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
596 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl;
597 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl;
598 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n");
605 catch(exception& e) {
606 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
610 /***********************************************************/
611 void UnifracWeightedCommand::createPhylipFile() {
615 for (int i = 0; i < T.size(); i++) {
617 string phylipFileName;
618 if ((outputForm == "lt") || (outputForm == "square")) {
619 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
620 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
622 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
623 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
627 m->openOutputFile(phylipFileName, out);
629 if ((outputForm == "lt") || (outputForm == "square")) {
631 out << m->Groups.size() << endl;
634 //make matrix with scores in it
635 vector< vector<float> > dists; dists.resize(m->Groups.size());
636 for (int i = 0; i < m->Groups.size(); i++) {
637 dists[i].resize(m->Groups.size(), 0.0);
640 //flip it so you can print it
641 for (int r=0; r<m->Groups.size(); r++) {
642 for (int l = 0; l < r; l++) {
643 dists[r][l] = utreeScores[count];
644 dists[l][r] = utreeScores[count];
650 for (int r=0; r<m->Groups.size(); r++) {
652 string name = m->Groups[r];
653 if (name.length() < 10) { //pad with spaces to make compatible
654 while (name.length() < 10) { name += " "; }
657 if (outputForm == "lt") {
661 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
663 }else if (outputForm == "square") {
667 for (int l = 0; l < m->Groups.size(); l++) { out << dists[r][l] << '\t'; }
671 for (int l = 0; l < r; l++) {
672 string otherName = m->Groups[l];
673 if (otherName.length() < 10) { //pad with spaces to make compatible
674 while (otherName.length() < 10) { otherName += " "; }
677 out << name << '\t' << otherName << dists[r][l] << endl;
684 catch(exception& e) {
685 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
689 /***********************************************************/
690 int UnifracWeightedCommand::findIndex(float score, int index) {
692 for (int i = 0; i < rScores[index].size(); i++) {
693 if (rScores[index][i] >= score) { return i; }
695 return rScores[index].size();
697 catch(exception& e) {
698 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
703 /***********************************************************/
705 void UnifracWeightedCommand::calculateFreqsCumuls() {
707 //clear out old tree values
709 rScoreFreq.resize(numComp);
711 rCumul.resize(numComp);
714 //calculate frequency
715 for (int f = 0; f < numComp; f++) {
716 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
717 validScores[rScores[f][i]] = rScores[f][i];
718 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
719 if (it != rScoreFreq[f].end()) {
720 rScoreFreq[f][rScores[f][i]]++;
722 rScoreFreq[f][rScores[f][i]] = 1;
728 for(int a = 0; a < numComp; a++) {
729 float rcumul = 1.0000;
730 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
731 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
732 //make rscoreFreq map and rCumul
733 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
734 rCumul[a][it->first] = rcumul;
735 //get percentage of random trees with that info
736 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
737 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
742 catch(exception& e) {
743 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
747 /*****************************************************************/
748 int UnifracWeightedCommand::readNamesFile() {
751 numUniquesInName = 0;
754 m->openInputFile(namefile, in);
756 string first, second;
757 map<string, string>::iterator itNames;
760 in >> first >> second; m->gobble(in);
764 itNames = m->names.find(first);
765 if (itNames == m->names.end()) {
766 m->names[first] = second;
768 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
769 vector<string> dupNames;
770 m->splitAtComma(second, dupNames);
772 for (int i = 0; i < dupNames.size(); i++) {
773 nameMap[dupNames[i]] = dupNames[i];
774 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
776 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
782 catch(exception& e) {
783 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
787 /***********************************************************/