2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){
15 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
55 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["weighted"] = tempOutNames;
66 outputTypes["wsummary"] = tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
79 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
83 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86 vector<string> myArray = setParameters();
88 OptionParser parser(option);
89 map<string,string> parameters=parser.getParameters();
90 map<string,string>::iterator it;
92 ValidParameters validParameter;
94 //check to make sure all parameters are valid for command
95 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
96 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
99 //initialize outputTypes
100 vector<string> tempOutNames;
101 outputTypes["weighted"] = tempOutNames;
102 outputTypes["wsummary"] = tempOutNames;
103 outputTypes["phylip"] = tempOutNames;
104 outputTypes["column"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("tree");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["tree"] = inputDir + it->second; }
119 it = parameters.find("group");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["group"] = inputDir + it->second; }
127 it = parameters.find("name");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["name"] = inputDir + it->second; }
138 m->namesOfGroups.clear();
139 m->Treenames.clear();
142 //check for required parameters
143 treefile = validParameter.validFile(parameters, "tree", true);
144 if (treefile == "not open") { abort = true; }
145 else if (treefile == "not found") { //if there is a current design file, use it
146 treefile = m->getTreeFile();
147 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
149 }else { m->setTreeFile(treefile); }
151 //check for required parameters
152 groupfile = validParameter.validFile(parameters, "group", true);
153 if (groupfile == "not open") { abort = true; }
154 else if (groupfile == "not found") { groupfile = ""; }
155 else { m->setGroupFile(groupfile); }
157 namefile = validParameter.validFile(parameters, "name", true);
158 if (namefile == "not open") { abort = true; }
159 else if (namefile == "not found") { namefile = ""; }
160 else { m->setNameFile(namefile); }
162 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
165 //check for optional parameter and set defaults
166 // ...at some point should added some additional type checking...
167 groups = validParameter.validFile(parameters, "groups", false);
168 if (groups == "not found") { groups = ""; }
170 m->splitAtDash(groups, Groups);
174 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
175 convert(itersString, iters);
177 string temp = validParameter.validFile(parameters, "distance", false);
178 if (temp == "not found") { phylip = false; outputForm = ""; }
180 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
181 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
184 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
185 random = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
188 includeRoot = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
191 m->setProcessors(temp);
192 convert(temp, processors);
194 if (!random) { iters = 0; } //turn off random calcs
199 catch(exception& e) {
200 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
204 /***********************************************************/
205 int UnifracWeightedCommand::execute() {
208 if (abort == true) { if (calledHelp) { return 0; } return 2; }
210 m->setTreeFile(treefile);
212 if (groupfile != "") {
213 //read in group map info.
214 tmap = new TreeMap(groupfile);
216 }else{ //fake out by putting everyone in one group
217 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
218 tmap = new TreeMap();
220 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
223 if (namefile != "") { readNamesFile(); }
225 read = new ReadNewickTree(treefile);
226 int readOk = read->read(tmap);
228 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
230 read->AssembleTrees();
231 T = read->getTrees();
234 //make sure all files match
235 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
237 if (namefile != "") {
238 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
239 else { numNamesInTree = m->Treenames.size(); }
240 }else { numNamesInTree = m->Treenames.size(); }
243 //output any names that are in group file but not in tree
244 if (numNamesInTree < tmap->getNumSeqs()) {
245 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
246 //is that name in the tree?
248 for (int j = 0; j < m->Treenames.size(); j++) {
249 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
253 if (m->control_pressed) {
254 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
255 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
260 //then you did not find it so report it
261 if (count == m->Treenames.size()) {
262 //if it is in your namefile then don't remove
263 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
265 if (it == nameMap.end()) {
266 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
267 tmap->removeSeq(tmap->namesOfSeqs[i]);
268 i--; //need this because removeSeq removes name from namesOfSeqs
274 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
275 m->openOutputFile(sumFile, outSum);
276 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
278 util = new SharedUtil();
279 string s; //to make work with setgroups
280 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
281 util->getCombos(groupComb, m->Groups, numComp);
284 weighted = new Weighted(tmap, includeRoot);
286 int start = time(NULL);
288 //get weighted for users tree
289 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
290 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
292 if (numComp < processors) { processors = numComp; }
294 //get weighted scores for users trees
295 for (int i = 0; i < T.size(); i++) {
297 if (m->control_pressed) { delete tmap; delete weighted;
298 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
301 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
302 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
305 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
306 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
307 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
310 userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore
312 if (m->control_pressed) { delete tmap; delete weighted;
313 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
316 for (int s=0; s<numComp; s++) {
317 //add users score to vector of user scores
318 uScores[s].push_back(userData[s]);
320 //save users tree score for summary file
321 utreeScores.push_back(userData[s]);
326 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
327 vector< vector<string> > namesOfGroupCombos;
328 for (int a=0; a<numGroups; a++) {
329 for (int l = 0; l < a; l++) {
330 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
331 namesOfGroupCombos.push_back(groups);
337 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
339 int numPairs = namesOfGroupCombos.size();
340 int numPairsPerProcessor = numPairs / processors;
342 for (int i = 0; i < processors; i++) {
343 int startPos = i * numPairsPerProcessor;
344 if(i == processors - 1){
345 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
347 lines.push_back(linePair(startPos, numPairsPerProcessor));
353 //get scores for random trees
354 for (int j = 0; j < iters; j++) {
356 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
358 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
360 createProcesses(T[i], namesOfGroupCombos, rScores);
363 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
366 if (m->control_pressed) { delete tmap; delete weighted;
367 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
370 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
374 //find the signifigance of the score for summary file
375 for (int f = 0; f < numComp; f++) {
377 sort(rScores[f].begin(), rScores[f].end());
379 //the index of the score higher than yours is returned
380 //so if you have 1000 random trees the index returned is 100
381 //then there are 900 trees with a score greater then you.
382 //giving you a signifigance of 0.900
383 int index = findIndex(userData[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
385 //the signifigance is the number of trees with the users score or higher
386 WScoreSig.push_back((iters-index)/(float)iters);
389 //out << "Tree# " << i << endl;
390 calculateFreqsCumuls();
404 if (m->control_pressed) { delete tmap; delete weighted;
405 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
409 if (phylip) { createPhylipFile(); }
411 //clear out users groups
413 delete tmap; delete weighted;
414 for (int i = 0; i < T.size(); i++) { delete T[i]; }
417 if (m->control_pressed) {
418 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
422 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
424 //set phylip file as new current phylipfile
426 itTypes = outputTypes.find("phylip");
427 if (itTypes != outputTypes.end()) {
428 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
431 //set column file as new current columnfile
432 itTypes = outputTypes.find("column");
433 if (itTypes != outputTypes.end()) {
434 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
437 m->mothurOutEndLine();
438 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
439 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
440 m->mothurOutEndLine();
445 catch(exception& e) {
446 m->errorOut(e, "UnifracWeightedCommand", "execute");
450 /**************************************************************************************************/
452 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
454 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
456 vector<int> processIDS;
460 //loop through and create all the processes you want
461 while (process != processors) {
465 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
468 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
470 //pass numSeqs to parent
472 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
473 m->openOutputFile(tempFile, out);
474 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
479 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
480 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
485 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
487 //force parent to wait until all the processes are done
488 for (int i=0;i<(processors-1);i++) {
489 int temp = processIDS[i];
493 //get data created by processes
494 for (int i=0;i<(processors-1);i++) {
497 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
498 m->openInputFile(s, in);
501 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
509 catch(exception& e) {
510 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
515 /**************************************************************************************************/
516 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
518 Tree* randT = new Tree(tmap);
520 for (int h = start; h < (start+num); h++) {
522 if (m->control_pressed) { return 0; }
524 //initialize weighted score
525 string groupA = namesOfGroupCombos[h][0];
526 string groupB = namesOfGroupCombos[h][1];
531 //create a random tree with same topology as T[i], but different labels
532 randT->assembleRandomUnifracTree(groupA, groupB);
534 if (m->control_pressed) { delete randT; return 0; }
536 //get wscore of random tree
537 EstOutput randomData = weighted->getValues(randT, groupA, groupB);
539 if (m->control_pressed) { delete randT; return 0; }
542 scores[h].push_back(randomData[0]);
550 catch(exception& e) {
551 m->errorOut(e, "UnifracWeightedCommand", "driver");
555 /***********************************************************/
556 void UnifracWeightedCommand::printWeightedFile() {
560 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
562 for(int a = 0; a < numComp; a++) {
563 output->initFile(groupComb[a], tags);
565 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
566 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
567 output->output(data);
573 catch(exception& e) {
574 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
580 /***********************************************************/
581 void UnifracWeightedCommand::printWSummaryFile() {
584 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
585 m->mothurOut("Tree#\tGroups\tWScore\t");
586 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
587 outSum << endl; m->mothurOutEndLine();
590 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
594 for (int i = 0; i < T.size(); i++) {
595 for (int j = 0; j < numComp; j++) {
597 if (WScoreSig[count] > (1/(float)iters)) {
598 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
599 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
600 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
602 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
603 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
604 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
607 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
608 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
609 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
616 catch(exception& e) {
617 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
621 /***********************************************************/
622 void UnifracWeightedCommand::createPhylipFile() {
626 for (int i = 0; i < T.size(); i++) {
628 string phylipFileName;
629 if ((outputForm == "lt") || (outputForm == "square")) {
630 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
631 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
633 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
634 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
638 m->openOutputFile(phylipFileName, out);
640 if ((outputForm == "lt") || (outputForm == "square")) {
642 out << m->Groups.size() << endl;
645 //make matrix with scores in it
646 vector< vector<float> > dists; dists.resize(m->Groups.size());
647 for (int i = 0; i < m->Groups.size(); i++) {
648 dists[i].resize(m->Groups.size(), 0.0);
651 //flip it so you can print it
652 for (int r=0; r<m->Groups.size(); r++) {
653 for (int l = 0; l < r; l++) {
654 dists[r][l] = utreeScores[count];
655 dists[l][r] = utreeScores[count];
661 for (int r=0; r<m->Groups.size(); r++) {
663 string name = m->Groups[r];
664 if (name.length() < 10) { //pad with spaces to make compatible
665 while (name.length() < 10) { name += " "; }
668 if (outputForm == "lt") {
672 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
674 }else if (outputForm == "square") {
678 for (int l = 0; l < m->Groups.size(); l++) { out << dists[r][l] << '\t'; }
682 for (int l = 0; l < r; l++) {
683 string otherName = m->Groups[l];
684 if (otherName.length() < 10) { //pad with spaces to make compatible
685 while (otherName.length() < 10) { otherName += " "; }
688 out << name << '\t' << otherName << dists[r][l] << endl;
695 catch(exception& e) {
696 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
700 /***********************************************************/
701 int UnifracWeightedCommand::findIndex(float score, int index) {
703 for (int i = 0; i < rScores[index].size(); i++) {
704 if (rScores[index][i] >= score) { return i; }
706 return rScores[index].size();
708 catch(exception& e) {
709 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
714 /***********************************************************/
716 void UnifracWeightedCommand::calculateFreqsCumuls() {
718 //clear out old tree values
720 rScoreFreq.resize(numComp);
722 rCumul.resize(numComp);
725 //calculate frequency
726 for (int f = 0; f < numComp; f++) {
727 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
728 validScores[rScores[f][i]] = rScores[f][i];
729 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
730 if (it != rScoreFreq[f].end()) {
731 rScoreFreq[f][rScores[f][i]]++;
733 rScoreFreq[f][rScores[f][i]] = 1;
739 for(int a = 0; a < numComp; a++) {
740 float rcumul = 1.0000;
741 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
742 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
743 //make rscoreFreq map and rCumul
744 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
745 rCumul[a][it->first] = rcumul;
746 //get percentage of random trees with that info
747 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
748 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
753 catch(exception& e) {
754 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
758 /*****************************************************************/
759 int UnifracWeightedCommand::readNamesFile() {
762 numUniquesInName = 0;
765 m->openInputFile(namefile, in);
767 string first, second;
768 map<string, string>::iterator itNames;
771 in >> first >> second; m->gobble(in);
775 itNames = m->names.find(first);
776 if (itNames == m->names.end()) {
777 m->names[first] = second;
779 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
780 vector<string> dupNames;
781 m->splitAtComma(second, dupNames);
783 for (int i = 0; i < dupNames.size(); i++) {
784 nameMap[dupNames[i]] = dupNames[i];
785 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
787 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
793 catch(exception& e) {
794 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
798 /***********************************************************/