2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){
15 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
55 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["weighted"] = tempOutNames;
66 outputTypes["wsummary"] = tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
79 abort = false; calledHelp = false;
81 //allow user to run help
82 if(option == "help") { help(); abort = true; calledHelp = true; }
83 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86 vector<string> myArray = setParameters();
88 OptionParser parser(option);
89 map<string,string> parameters=parser.getParameters();
90 map<string,string>::iterator it;
92 ValidParameters validParameter;
94 //check to make sure all parameters are valid for command
95 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
96 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
99 //initialize outputTypes
100 vector<string> tempOutNames;
101 outputTypes["weighted"] = tempOutNames;
102 outputTypes["wsummary"] = tempOutNames;
103 outputTypes["phylip"] = tempOutNames;
104 outputTypes["column"] = tempOutNames;
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("tree");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["tree"] = inputDir + it->second; }
119 it = parameters.find("group");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["group"] = inputDir + it->second; }
127 it = parameters.find("name");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["name"] = inputDir + it->second; }
139 m->Treenames.clear();
142 //check for required parameters
143 treefile = validParameter.validFile(parameters, "tree", true);
144 if (treefile == "not open") { treefile = ""; abort = true; }
145 else if (treefile == "not found") { //if there is a current design file, use it
146 treefile = m->getTreeFile();
147 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
149 }else { m->setTreeFile(treefile); }
151 //check for required parameters
152 groupfile = validParameter.validFile(parameters, "group", true);
153 if (groupfile == "not open") { abort = true; }
154 else if (groupfile == "not found") { groupfile = ""; }
155 else { m->setGroupFile(groupfile); }
157 namefile = validParameter.validFile(parameters, "name", true);
158 if (namefile == "not open") { namefile = ""; abort = true; }
159 else if (namefile == "not found") { namefile = ""; }
160 else { m->setNameFile(namefile); }
162 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
165 //check for optional parameter and set defaults
166 // ...at some point should added some additional type checking...
167 groups = validParameter.validFile(parameters, "groups", false);
168 if (groups == "not found") { groups = ""; }
170 m->splitAtDash(groups, Groups);
171 m->setGroups(Groups);
174 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
175 m->mothurConvert(itersString, iters);
177 string temp = validParameter.validFile(parameters, "distance", false);
178 if (temp == "not found") { phylip = false; outputForm = ""; }
180 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
181 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
184 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
185 random = m->isTrue(temp);
187 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
188 includeRoot = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
191 m->setProcessors(temp);
192 m->mothurConvert(temp, processors);
194 if (!random) { iters = 0; } //turn off random calcs
196 if (namefile == "") {
197 vector<string> files; files.push_back(treefile);
198 parser.getNameFile(files);
204 catch(exception& e) {
205 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
209 /***********************************************************/
210 int UnifracWeightedCommand::execute() {
213 if (abort == true) { if (calledHelp) { return 0; } return 2; }
215 m->setTreeFile(treefile);
217 if (groupfile != "") {
218 //read in group map info.
219 tmap = new TreeMap(groupfile);
221 }else{ //fake out by putting everyone in one group
222 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
223 tmap = new TreeMap();
225 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
228 if (namefile != "") { readNamesFile(); }
230 read = new ReadNewickTree(treefile);
231 int readOk = read->read(tmap);
233 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
235 read->AssembleTrees();
236 T = read->getTrees();
239 //make sure all files match
240 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
242 if (namefile != "") {
243 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
244 else { numNamesInTree = m->Treenames.size(); }
245 }else { numNamesInTree = m->Treenames.size(); }
248 //output any names that are in group file but not in tree
249 if (numNamesInTree < tmap->getNumSeqs()) {
250 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
251 //is that name in the tree?
253 for (int j = 0; j < m->Treenames.size(); j++) {
254 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
258 if (m->control_pressed) {
259 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
260 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
265 //then you did not find it so report it
266 if (count == m->Treenames.size()) {
267 //if it is in your namefile then don't remove
268 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
270 if (it == nameMap.end()) {
271 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
272 tmap->removeSeq(tmap->namesOfSeqs[i]);
273 i--; //need this because removeSeq removes name from namesOfSeqs
279 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
280 m->openOutputFile(sumFile, outSum);
281 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
283 util = new SharedUtil();
284 string s; //to make work with setgroups
285 Groups = m->getGroups();
286 vector<string> nameGroups = tmap->getNamesOfGroups();
287 util->setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
288 util->getCombos(groupComb, Groups, numComp);
289 m->setGroups(Groups);
292 weighted = new Weighted(tmap, includeRoot);
294 int start = time(NULL);
296 //get weighted for users tree
297 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
298 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
300 if (numComp < processors) { processors = numComp; }
302 //get weighted scores for users trees
303 for (int i = 0; i < T.size(); i++) {
305 if (m->control_pressed) { delete tmap; delete weighted;
306 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
309 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
310 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
313 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
314 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
315 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
318 userData = weighted->getValues(T[i], processors, outputDir); //userData[0] = weightedscore
320 if (m->control_pressed) { delete tmap; delete weighted;
321 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
324 for (int s=0; s<numComp; s++) {
325 //add users score to vector of user scores
326 uScores[s].push_back(userData[s]);
328 //save users tree score for summary file
329 utreeScores.push_back(userData[s]);
334 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
335 vector< vector<string> > namesOfGroupCombos;
336 for (int a=0; a<numGroups; a++) {
337 for (int l = 0; l < a; l++) {
338 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
339 namesOfGroupCombos.push_back(groups);
345 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
347 int numPairs = namesOfGroupCombos.size();
348 int numPairsPerProcessor = numPairs / processors;
350 for (int i = 0; i < processors; i++) {
351 int startPos = i * numPairsPerProcessor;
352 if(i == processors - 1){
353 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
355 lines.push_back(linePair(startPos, numPairsPerProcessor));
361 //get scores for random trees
362 for (int j = 0; j < iters; j++) {
364 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
366 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
368 createProcesses(T[i], namesOfGroupCombos, rScores);
371 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
374 if (m->control_pressed) { delete tmap; delete weighted;
375 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
378 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
382 //find the signifigance of the score for summary file
383 for (int f = 0; f < numComp; f++) {
385 sort(rScores[f].begin(), rScores[f].end());
387 //the index of the score higher than yours is returned
388 //so if you have 1000 random trees the index returned is 100
389 //then there are 900 trees with a score greater then you.
390 //giving you a signifigance of 0.900
391 int index = findIndex(userData[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
393 //the signifigance is the number of trees with the users score or higher
394 WScoreSig.push_back((iters-index)/(float)iters);
397 //out << "Tree# " << i << endl;
398 calculateFreqsCumuls();
412 if (m->control_pressed) { delete tmap; delete weighted;
413 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
417 if (phylip) { createPhylipFile(); }
419 //clear out users groups
421 delete tmap; delete weighted;
422 for (int i = 0; i < T.size(); i++) { delete T[i]; }
425 if (m->control_pressed) {
426 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
430 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
432 //set phylip file as new current phylipfile
434 itTypes = outputTypes.find("phylip");
435 if (itTypes != outputTypes.end()) {
436 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
439 //set column file as new current columnfile
440 itTypes = outputTypes.find("column");
441 if (itTypes != outputTypes.end()) {
442 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
445 m->mothurOutEndLine();
446 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
447 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
448 m->mothurOutEndLine();
453 catch(exception& e) {
454 m->errorOut(e, "UnifracWeightedCommand", "execute");
458 /**************************************************************************************************/
460 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
462 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
464 vector<int> processIDS;
468 //loop through and create all the processes you want
469 while (process != processors) {
473 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
476 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
478 //pass numSeqs to parent
480 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
481 m->openOutputFile(tempFile, out);
482 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
487 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
488 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
493 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
495 //force parent to wait until all the processes are done
496 for (int i=0;i<(processors-1);i++) {
497 int temp = processIDS[i];
501 //get data created by processes
502 for (int i=0;i<(processors-1);i++) {
505 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
506 m->openInputFile(s, in);
509 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
517 catch(exception& e) {
518 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
523 /**************************************************************************************************/
524 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
526 Tree* randT = new Tree(tmap);
528 for (int h = start; h < (start+num); h++) {
530 if (m->control_pressed) { return 0; }
532 //initialize weighted score
533 string groupA = namesOfGroupCombos[h][0];
534 string groupB = namesOfGroupCombos[h][1];
539 //create a random tree with same topology as T[i], but different labels
540 randT->assembleRandomUnifracTree(groupA, groupB);
542 if (m->control_pressed) { delete randT; return 0; }
544 //get wscore of random tree
545 EstOutput randomData = weighted->getValues(randT, groupA, groupB);
547 if (m->control_pressed) { delete randT; return 0; }
550 scores[h].push_back(randomData[0]);
558 catch(exception& e) {
559 m->errorOut(e, "UnifracWeightedCommand", "driver");
563 /***********************************************************/
564 void UnifracWeightedCommand::printWeightedFile() {
568 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
570 for(int a = 0; a < numComp; a++) {
571 output->initFile(groupComb[a], tags);
573 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
574 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
575 output->output(data);
581 catch(exception& e) {
582 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
588 /***********************************************************/
589 void UnifracWeightedCommand::printWSummaryFile() {
592 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
593 m->mothurOut("Tree#\tGroups\tWScore\t");
594 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
595 outSum << endl; m->mothurOutEndLine();
598 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
602 for (int i = 0; i < T.size(); i++) {
603 for (int j = 0; j < numComp; j++) {
605 if (WScoreSig[count] > (1/(float)iters)) {
606 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
607 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
608 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
610 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
611 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
612 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
615 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
616 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
617 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
624 catch(exception& e) {
625 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
629 /***********************************************************/
630 void UnifracWeightedCommand::createPhylipFile() {
634 for (int i = 0; i < T.size(); i++) {
636 string phylipFileName;
637 if ((outputForm == "lt") || (outputForm == "square")) {
638 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
639 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
641 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
642 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
646 m->openOutputFile(phylipFileName, out);
648 if ((outputForm == "lt") || (outputForm == "square")) {
650 out << m->getNumGroups() << endl;
653 //make matrix with scores in it
654 vector< vector<float> > dists; dists.resize(m->getNumGroups());
655 for (int i = 0; i < m->getNumGroups(); i++) {
656 dists[i].resize(m->getNumGroups(), 0.0);
659 //flip it so you can print it
660 for (int r=0; r<m->getNumGroups(); r++) {
661 for (int l = 0; l < r; l++) {
662 dists[r][l] = utreeScores[count];
663 dists[l][r] = utreeScores[count];
669 for (int r=0; r<m->getNumGroups(); r++) {
671 string name = (m->getGroups())[r];
672 if (name.length() < 10) { //pad with spaces to make compatible
673 while (name.length() < 10) { name += " "; }
676 if (outputForm == "lt") {
680 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
682 }else if (outputForm == "square") {
686 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
690 for (int l = 0; l < r; l++) {
691 string otherName = (m->getGroups())[l];
692 if (otherName.length() < 10) { //pad with spaces to make compatible
693 while (otherName.length() < 10) { otherName += " "; }
696 out << name << '\t' << otherName << dists[r][l] << endl;
703 catch(exception& e) {
704 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
708 /***********************************************************/
709 int UnifracWeightedCommand::findIndex(float score, int index) {
711 for (int i = 0; i < rScores[index].size(); i++) {
712 if (rScores[index][i] >= score) { return i; }
714 return rScores[index].size();
716 catch(exception& e) {
717 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
722 /***********************************************************/
724 void UnifracWeightedCommand::calculateFreqsCumuls() {
726 //clear out old tree values
728 rScoreFreq.resize(numComp);
730 rCumul.resize(numComp);
733 //calculate frequency
734 for (int f = 0; f < numComp; f++) {
735 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
736 validScores[rScores[f][i]] = rScores[f][i];
737 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
738 if (it != rScoreFreq[f].end()) {
739 rScoreFreq[f][rScores[f][i]]++;
741 rScoreFreq[f][rScores[f][i]] = 1;
747 for(int a = 0; a < numComp; a++) {
748 float rcumul = 1.0000;
749 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
750 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
751 //make rscoreFreq map and rCumul
752 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
753 rCumul[a][it->first] = rcumul;
754 //get percentage of random trees with that info
755 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
756 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
761 catch(exception& e) {
762 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
766 /*****************************************************************/
767 int UnifracWeightedCommand::readNamesFile() {
770 numUniquesInName = 0;
773 m->openInputFile(namefile, in);
775 string first, second;
776 map<string, string>::iterator itNames;
779 in >> first >> second; m->gobble(in);
783 itNames = m->names.find(first);
784 if (itNames == m->names.end()) {
785 m->names[first] = second;
787 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
788 vector<string> dupNames;
789 m->splitAtComma(second, dupNames);
791 for (int i = 0; i < dupNames.size(); i++) {
792 nameMap[dupNames[i]] = dupNames[i];
793 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
795 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
801 catch(exception& e) {
802 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
806 /***********************************************************/