2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){
15 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
55 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["weighted"] = tempOutNames;
66 outputTypes["wsummary"] = tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
79 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
83 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86 vector<string> myArray = setParameters();
88 OptionParser parser(option);
89 map<string,string> parameters=parser.getParameters();
90 map<string,string>::iterator it;
92 ValidParameters validParameter;
94 //check to make sure all parameters are valid for command
95 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
96 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
99 //initialize outputTypes
100 vector<string> tempOutNames;
101 outputTypes["weighted"] = tempOutNames;
102 outputTypes["wsummary"] = tempOutNames;
103 outputTypes["phylip"] = tempOutNames;
104 outputTypes["column"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("tree");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["tree"] = inputDir + it->second; }
119 it = parameters.find("group");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["group"] = inputDir + it->second; }
127 it = parameters.find("name");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["name"] = inputDir + it->second; }
138 m->namesOfGroups.clear();
139 m->Treenames.clear();
142 //check for required parameters
143 treefile = validParameter.validFile(parameters, "tree", true);
144 if (treefile == "not open") { abort = true; }
145 else if (treefile == "not found") { //if there is a current design file, use it
146 treefile = m->getTreeFile();
147 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
149 }else { m->setTreeFile(treefile); }
151 //check for required parameters
152 groupfile = validParameter.validFile(parameters, "group", true);
153 if (groupfile == "not open") { abort = true; }
154 else if (groupfile == "not found") { groupfile = ""; }
155 else { m->setGroupFile(groupfile); }
157 namefile = validParameter.validFile(parameters, "name", true);
158 if (namefile == "not open") { abort = true; }
159 else if (namefile == "not found") { namefile = ""; }
160 else { m->setNameFile(namefile); }
162 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
165 //check for optional parameter and set defaults
166 // ...at some point should added some additional type checking...
167 groups = validParameter.validFile(parameters, "groups", false);
168 if (groups == "not found") { groups = ""; }
170 m->splitAtDash(groups, Groups);
174 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
175 convert(itersString, iters);
177 string temp = validParameter.validFile(parameters, "distance", false);
178 if (temp == "not found") { phylip = false; outputForm = ""; }
180 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
181 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
184 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
185 random = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
188 includeRoot = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
191 m->setProcessors(temp);
192 convert(temp, processors);
194 if (!random) { iters = 0; } //turn off random calcs
199 catch(exception& e) {
200 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
204 /***********************************************************/
205 int UnifracWeightedCommand::execute() {
208 if (abort == true) { if (calledHelp) { return 0; } return 2; }
210 m->setTreeFile(treefile);
212 if (groupfile != "") {
213 //read in group map info.
214 tmap = new TreeMap(groupfile);
216 }else{ //fake out by putting everyone in one group
217 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
218 tmap = new TreeMap();
220 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
223 if (namefile != "") { readNamesFile(); }
225 read = new ReadNewickTree(treefile);
226 int readOk = read->read(tmap);
228 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
230 read->AssembleTrees();
231 T = read->getTrees();
234 //make sure all files match
235 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
237 if (namefile != "") {
238 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
239 else { numNamesInTree = m->Treenames.size(); }
240 }else { numNamesInTree = m->Treenames.size(); }
243 //output any names that are in group file but not in tree
244 if (numNamesInTree < tmap->getNumSeqs()) {
245 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
246 //is that name in the tree?
248 for (int j = 0; j < m->Treenames.size(); j++) {
249 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
253 if (m->control_pressed) {
254 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
255 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
260 //then you did not find it so report it
261 if (count == m->Treenames.size()) {
262 //if it is in your namefile then don't remove
263 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
265 if (it == nameMap.end()) {
266 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
267 tmap->removeSeq(tmap->namesOfSeqs[i]);
268 i--; //need this because removeSeq removes name from namesOfSeqs
274 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
275 m->openOutputFile(sumFile, outSum);
276 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
278 util = new SharedUtil();
279 string s; //to make work with setgroups
280 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
281 util->getCombos(groupComb, m->Groups, numComp);
284 weighted = new Weighted(tmap, includeRoot);
286 int start = time(NULL);
288 //get weighted for users tree
289 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
290 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
292 if (numComp < processors) { processors = numComp; }
294 //get weighted scores for users trees
295 for (int i = 0; i < T.size(); i++) {
297 if (m->control_pressed) { delete tmap; delete weighted;
298 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
301 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
302 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
305 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
306 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
307 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
310 userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore
312 if (m->control_pressed) { delete tmap; delete weighted;
313 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
316 for (int s=0; s<numComp; s++) {
317 //add users score to vector of user scores
318 uScores[s].push_back(userData[s]);
320 //save users tree score for summary file
321 utreeScores.push_back(userData[s]);
326 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
327 vector< vector<string> > namesOfGroupCombos;
328 for (int a=0; a<numGroups; a++) {
329 for (int l = 0; l < a; l++) {
330 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
331 namesOfGroupCombos.push_back(groups);
337 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
339 int numPairs = namesOfGroupCombos.size();
340 int numPairsPerProcessor = numPairs / processors;
342 for (int i = 0; i < processors; i++) {
343 int startPos = i * numPairsPerProcessor;
344 if(i == processors - 1){
345 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
347 lines.push_back(linePair(startPos, numPairsPerProcessor));
353 //get scores for random trees
354 for (int j = 0; j < iters; j++) {
356 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
358 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
360 createProcesses(T[i], namesOfGroupCombos, rScores);
363 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
366 if (m->control_pressed) { delete tmap; delete weighted;
367 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
370 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
374 //find the signifigance of the score for summary file
375 for (int f = 0; f < numComp; f++) {
377 sort(rScores[f].begin(), rScores[f].end());
379 //the index of the score higher than yours is returned
380 //so if you have 1000 random trees the index returned is 100
381 //then there are 900 trees with a score greater then you.
382 //giving you a signifigance of 0.900
383 int index = findIndex(userData[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
385 //the signifigance is the number of trees with the users score or higher
386 WScoreSig.push_back((iters-index)/(float)iters);
389 //out << "Tree# " << i << endl;
390 calculateFreqsCumuls();
404 if (m->control_pressed) { delete tmap; delete weighted;
405 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
409 if (phylip) { createPhylipFile(); }
411 //clear out users groups
413 delete tmap; delete weighted;
414 for (int i = 0; i < T.size(); i++) { delete T[i]; }
417 if (m->control_pressed) {
418 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
422 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
424 //set phylip file as new current phylipfile
426 itTypes = outputTypes.find("phylip");
427 if (itTypes != outputTypes.end()) {
428 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
431 //set column file as new current columnfile
432 itTypes = outputTypes.find("column");
433 if (itTypes != outputTypes.end()) {
434 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
437 m->mothurOutEndLine();
438 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
439 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
440 m->mothurOutEndLine();
445 catch(exception& e) {
446 m->errorOut(e, "UnifracWeightedCommand", "execute");
450 /**************************************************************************************************/
452 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
454 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
456 vector<int> processIDS;
460 //loop through and create all the processes you want
461 while (process != processors) {
465 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
468 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
470 //pass numSeqs to parent
472 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
473 m->openOutputFile(tempFile, out);
474 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
479 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
480 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
485 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
487 //force parent to wait until all the processes are done
488 for (int i=0;i<(processors-1);i++) {
489 int temp = processIDS[i];
493 //get data created by processes
494 for (int i=0;i<(processors-1);i++) {
497 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
498 m->openInputFile(s, in);
501 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
509 catch(exception& e) {
510 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
515 /**************************************************************************************************/
516 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
518 Tree* randT = new Tree(tmap);
520 for (int h = start; h < (start+num); h++) {
522 if (m->control_pressed) { return 0; }
524 //initialize weighted score
525 string groupA = namesOfGroupCombos[h][0];
526 string groupB = namesOfGroupCombos[h][1];
531 //create a random tree with same topology as T[i], but different labels
532 randT->assembleRandomUnifracTree(groupA, groupB);
534 if (m->control_pressed) { delete randT; return 0; }
536 //get wscore of random tree
537 EstOutput randomData = weighted->getValues(randT, groupA, groupB);
539 if (m->control_pressed) { delete randT; return 0; }
542 scores[h].push_back(randomData[0]);
550 catch(exception& e) {
551 m->errorOut(e, "UnifracWeightedCommand", "driver");
555 /***********************************************************/
556 void UnifracWeightedCommand::printWeightedFile() {
560 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
562 for(int a = 0; a < numComp; a++) {
563 output->initFile(groupComb[a], tags);
565 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
566 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
567 output->output(data);
573 catch(exception& e) {
574 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
580 /***********************************************************/
581 void UnifracWeightedCommand::printWSummaryFile() {
584 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" << endl;
585 m->mothurOut("Tree#\tGroups\tWScore\tWSig"); m->mothurOutEndLine();
588 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
592 for (int i = 0; i < T.size(); i++) {
593 for (int j = 0; j < numComp; j++) {
595 if (WScoreSig[count] > (1/(float)iters)) {
596 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
597 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
598 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
600 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
601 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
602 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
605 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl;
606 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl;
607 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n");
614 catch(exception& e) {
615 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
619 /***********************************************************/
620 void UnifracWeightedCommand::createPhylipFile() {
624 for (int i = 0; i < T.size(); i++) {
626 string phylipFileName;
627 if ((outputForm == "lt") || (outputForm == "square")) {
628 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
629 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
631 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
632 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
636 m->openOutputFile(phylipFileName, out);
638 if ((outputForm == "lt") || (outputForm == "square")) {
640 out << m->Groups.size() << endl;
643 //make matrix with scores in it
644 vector< vector<float> > dists; dists.resize(m->Groups.size());
645 for (int i = 0; i < m->Groups.size(); i++) {
646 dists[i].resize(m->Groups.size(), 0.0);
649 //flip it so you can print it
650 for (int r=0; r<m->Groups.size(); r++) {
651 for (int l = 0; l < r; l++) {
652 dists[r][l] = utreeScores[count];
653 dists[l][r] = utreeScores[count];
659 for (int r=0; r<m->Groups.size(); r++) {
661 string name = m->Groups[r];
662 if (name.length() < 10) { //pad with spaces to make compatible
663 while (name.length() < 10) { name += " "; }
666 if (outputForm == "lt") {
670 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
672 }else if (outputForm == "square") {
676 for (int l = 0; l < m->Groups.size(); l++) { out << dists[r][l] << '\t'; }
680 for (int l = 0; l < r; l++) {
681 string otherName = m->Groups[l];
682 if (otherName.length() < 10) { //pad with spaces to make compatible
683 while (otherName.length() < 10) { otherName += " "; }
686 out << name << '\t' << otherName << dists[r][l] << endl;
693 catch(exception& e) {
694 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
698 /***********************************************************/
699 int UnifracWeightedCommand::findIndex(float score, int index) {
701 for (int i = 0; i < rScores[index].size(); i++) {
702 if (rScores[index][i] >= score) { return i; }
704 return rScores[index].size();
706 catch(exception& e) {
707 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
712 /***********************************************************/
714 void UnifracWeightedCommand::calculateFreqsCumuls() {
716 //clear out old tree values
718 rScoreFreq.resize(numComp);
720 rCumul.resize(numComp);
723 //calculate frequency
724 for (int f = 0; f < numComp; f++) {
725 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
726 validScores[rScores[f][i]] = rScores[f][i];
727 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
728 if (it != rScoreFreq[f].end()) {
729 rScoreFreq[f][rScores[f][i]]++;
731 rScoreFreq[f][rScores[f][i]] = 1;
737 for(int a = 0; a < numComp; a++) {
738 float rcumul = 1.0000;
739 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
740 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
741 //make rscoreFreq map and rCumul
742 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
743 rCumul[a][it->first] = rcumul;
744 //get percentage of random trees with that info
745 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
746 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
751 catch(exception& e) {
752 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
756 /*****************************************************************/
757 int UnifracWeightedCommand::readNamesFile() {
760 numUniquesInName = 0;
763 m->openInputFile(namefile, in);
765 string first, second;
766 map<string, string>::iterator itNames;
769 in >> first >> second; m->gobble(in);
773 itNames = m->names.find(first);
774 if (itNames == m->names.end()) {
775 m->names[first] = second;
777 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
778 vector<string> dupNames;
779 m->splitAtComma(second, dupNames);
781 for (int i = 0; i < dupNames.size(); i++) {
782 nameMap[dupNames[i]] = dupNames[i];
783 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
785 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
791 catch(exception& e) {
792 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
796 /***********************************************************/