6 * Created by westcott on 1/4/10.
7 * Copyright 2010 Schloss Lab. All rights reserved.
11 #include "pcoacommand.h"
13 //**********************************************************************************************************************
14 vector<string> PCOACommand::getValidParameters(){
16 string Array[] = {"phylip", "metric","outputdir","inputdir"};
17 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
21 m->errorOut(e, "PCOACommand", "getValidParameters");
25 //**********************************************************************************************************************
26 PCOACommand::PCOACommand(){
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["pcoa"] = tempOutNames;
32 outputTypes["loadings"] = tempOutNames;
33 outputTypes["corr"] = tempOutNames;
36 m->errorOut(e, "PCOACommand", "PCOACommand");
40 //**********************************************************************************************************************
41 vector<string> PCOACommand::getRequiredParameters(){
43 string Array[] = {"phylip"};
44 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
48 m->errorOut(e, "PCOACommand", "getRequiredParameters");
52 //**********************************************************************************************************************
53 vector<string> PCOACommand::getRequiredFiles(){
55 vector<string> myArray;
59 m->errorOut(e, "PCOACommand", "getRequiredFiles");
63 //**********************************************************************************************************************
65 PCOACommand::PCOACommand(string option) {
69 //allow user to run help
70 if(option == "help") { help(); abort = true; }
73 //valid paramters for this command
74 string Array[] = {"phylip","metric","outputdir", "inputdir"};
75 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77 OptionParser parser(option);
78 map<string, string> parameters = parser. getParameters();
80 ValidParameters validParameter;
81 map<string, string>::iterator it;
83 //check to make sure all parameters are valid for command
84 for (it = parameters.begin(); it != parameters.end(); it++) {
85 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
87 //if the user changes the input directory command factory will send this info to us in the output parameter
88 string inputDir = validParameter.validFile(parameters, "inputdir", false);
89 if (inputDir == "not found"){ inputDir = ""; }
92 it = parameters.find("phylip");
93 //user has given a template file
94 if(it != parameters.end()){
95 path = m->hasPath(it->second);
96 //if the user has not given a path then, add inputdir. else leave path alone.
97 if (path == "") { parameters["phylip"] = inputDir + it->second; }
101 //initialize outputTypes
102 vector<string> tempOutNames;
103 outputTypes["pcoa"] = tempOutNames;
104 outputTypes["loadings"] = tempOutNames;
105 outputTypes["corr"] = tempOutNames;
107 //required parameters
108 phylipfile = validParameter.validFile(parameters, "phylip", true);
109 if (phylipfile == "not open") { abort = true; }
110 else if (phylipfile == "not found") { phylipfile = ""; abort = true; }
111 else { filename = phylipfile; }
113 //if the user changes the output directory command factory will send this info to us in the output parameter
114 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
116 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it
119 //error checking on files
120 if (phylipfile == "") { m->mothurOut("You must provide a distance file before running the pcoa command."); m->mothurOutEndLine(); abort = true; }
122 string temp = validParameter.validFile(parameters, "metric", false); if (temp == "not found"){ temp = "T"; }
123 metric = m->isTrue(temp);
127 catch(exception& e) {
128 m->errorOut(e, "PCOACommand", "PCOACommand");
132 //**********************************************************************************************************************
133 void PCOACommand::help(){
136 m->mothurOut("The pcoa command parameters are phylip and metric"); m->mothurOutEndLine();
137 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
138 m->mothurOut("The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"); m->mothurOutEndLine();
139 m->mothurOut("Example pcoa(phylip=yourDistanceFile).\n");
140 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
142 catch(exception& e) {
143 m->errorOut(e, "PCOACommand", "help");
147 //**********************************************************************************************************************
148 PCOACommand::~PCOACommand(){}
149 //**********************************************************************************************************************
150 int PCOACommand::execute(){
153 if (abort == true) { return 0; }
155 cout.setf(ios::fixed, ios::floatfield);
156 cout.setf(ios::showpoint);
157 cerr.setf(ios::fixed, ios::floatfield);
158 cerr.setf(ios::showpoint);
160 vector<string> names;
161 vector<vector<double> > D;
163 fbase = outputDir + m->getRootName(m->getSimpleName(filename));
165 read(filename, names, D);
167 if (m->control_pressed) { return 0; }
169 double offset = 0.0000;
172 vector<vector<double> > G = D;
173 vector<vector<double> > copy_G;
174 //int rank = D.size();
176 m->mothurOut("\nProcessing...\n\n");
178 for(int count=0;count<2;count++){
179 recenter(offset, D, G); if (m->control_pressed) { return 0; }
180 tred2(G, d, e); if (m->control_pressed) { return 0; }
181 qtli(d, e, G); if (m->control_pressed) { return 0; }
182 offset = d[d.size()-1];
183 if(offset > 0.0) break;
186 if (m->control_pressed) { return 0; }
188 output(fbase, names, G, d);
190 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
194 for (int i = 1; i < 4; i++) {
196 vector< vector<double> > EuclidDists = calculateEuclidianDistance(G, i); //G is the pcoa file
198 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
200 double corr = calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix
202 m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine();
204 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
208 m->mothurOutEndLine();
209 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
210 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
211 m->mothurOutEndLine();
215 catch(exception& e) {
216 m->errorOut(e, "PCOACommand", "execute");
220 /*********************************************************************************************************************************/
221 vector< vector<double> > PCOACommand::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
224 vector< vector<double> > dists; dists.resize(axes.size());
225 for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes.size(), 0.0); }
227 if (dimensions == 1) { //one dimension calc = abs(x-y)
229 for (int i = 0; i < dists.size(); i++) {
231 if (m->control_pressed) { return dists; }
233 for (int j = 0; j < i; j++) {
234 dists[i][j] = abs(axes[i][0] - axes[j][0]);
235 dists[j][i] = dists[i][j];
239 }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)
241 for (int i = 0; i < dists.size(); i++) {
243 if (m->control_pressed) { return dists; }
245 for (int j = 0; j < i; j++) {
246 double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
247 double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
249 dists[i][j] = sqrt((firstDim + secondDim));
250 dists[j][i] = dists[i][j];
254 }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2)
256 for (int i = 0; i < dists.size(); i++) {
258 if (m->control_pressed) { return dists; }
260 for (int j = 0; j < i; j++) {
261 double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
262 double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
263 double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2]));
265 dists[i][j] = sqrt((firstDim + secondDim + thirdDim));
266 dists[j][i] = dists[i][j];
270 }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
274 catch(exception& e) {
275 m->errorOut(e, "PCOACommand", "calculateEuclidianDistance");
279 /*********************************************************************************************************************************/
280 double PCOACommand::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
283 //find average for - X
284 vector<float> averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0);
285 for (int i = 0; i < euclidDists.size(); i++) {
286 for (int j = 0; j < euclidDists[i].size(); j++) {
287 averageEuclid[i] += euclidDists[i][j];
290 for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size(); }
292 //find average for - Y
293 vector<float> averageUser; averageUser.resize(userDists.size(), 0.0);
294 for (int i = 0; i < userDists.size(); i++) {
295 for (int j = 0; j < userDists[i].size(); j++) {
296 averageUser[i] += userDists[i][j];
299 for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) userDists.size(); }
301 double numerator = 0.0;
302 double denomTerm1 = 0.0;
303 double denomTerm2 = 0.0;
305 for (int i = 0; i < euclidDists.size(); i++) {
307 for (int k = 0; k < i; k++) {
309 float Yi = userDists[i][k];
310 float Xi = euclidDists[i][k];
312 numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k]));
313 denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k]));
314 denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k]));
318 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
319 double r = numerator / denom;
323 catch(exception& e) {
324 m->errorOut(e, "PCOACommand", "calculateEuclidianDistance");
328 /*********************************************************************************************************************************/
330 inline double SIGN(const double a, const double b)
332 return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
335 /*********************************************************************************************************************************/
337 void PCOACommand::get_comment(istream& f, char begin, char end){
340 while(d != end){ d = f.get(); }
343 catch(exception& e) {
344 m->errorOut(e, "PCOACommand", "get_comment");
349 /*********************************************************************************************************************************/
351 int PCOACommand::read_phylip(istream& f, int square_m, vector<string>& name_list, vector<vector<double> >& d){
359 name_list.resize(rank);
362 for(int i=0;i<rank;i++)
364 for(int i=0;i<rank;i++) {
366 // cout << i << "\t" << name_list[i] << endl;
367 for(int j=0;j<rank;j++) {
368 if (m->control_pressed) { return 0; }
371 if (d[i][j] == -0.0000)
376 else if(square_m == 2){
377 for(int i=0;i<rank;i++){
382 for(int i=1;i<rank;i++){
385 for(int j=0;j<i;j++){
386 if (m->control_pressed) { return 0; }
388 if (d[i][j] == -0.0000)
397 catch(exception& e) {
398 m->errorOut(e, "PCOACommand", "read_phylip");
404 /*********************************************************************************************************************************/
406 void PCOACommand::read(string fname, vector<string>& names, vector<vector<double> >& D){
409 m->openInputFile(fname, f);
411 //check whether matrix is square
417 f >> numSeqs >> name;
419 while((d=f.get()) != EOF){
421 //is d a number meaning its square
427 //is d a line return meaning its lower triangle
435 //reopen to get back to beginning
436 m->openInputFile(fname, f);
437 read_phylip(f, q, names, D);
439 catch(exception& e) {
440 m->errorOut(e, "PCOACommand", "read");
445 /*********************************************************************************************************************************/
447 double PCOACommand::pythag(double a, double b) { return(pow(a*a+b*b,0.5)); }
449 /*********************************************************************************************************************************/
451 void PCOACommand::matrix_mult(vector<vector<double> > first, vector<vector<double> > second, vector<vector<double> >& product){
453 int first_rows = first.size();
454 int first_cols = first[0].size();
455 int second_cols = second[0].size();
457 product.resize(first_rows);
458 for(int i=0;i<first_rows;i++){
459 product[i].resize(second_cols);
462 for(int i=0;i<first_rows;i++){
463 for(int j=0;j<second_cols;j++){
465 for(int k=0;k<first_cols;k++){
466 product[i][j] += first[i][k] * second[k][j];
471 catch(exception& e) {
472 m->errorOut(e, "PCOACommand", "matrix_mult");
478 /*********************************************************************************************************************************/
480 void PCOACommand::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
484 vector<vector<double> > A(rank);
485 vector<vector<double> > C(rank);
486 for(int i=0;i<rank;i++){
491 double scale = -1.0000 / (double) rank;
493 for(int i=0;i<rank;i++){
495 C[i][i] = 1.0000 + scale;
496 for(int j=i+1;j<rank;j++){
497 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
498 C[i][j] = C[j][i] = scale;
505 catch(exception& e) {
506 m->errorOut(e, "PCOACommand", "recenter");
512 /*********************************************************************************************************************************/
514 // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
516 void PCOACommand::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
518 double scale, hh, h, g, f;
525 for(int i=n-1;i>0;i--){
529 for(int k=0;k<l+1;k++){
530 scale += fabs(a[i][k]);
536 for(int k=0;k<l+1;k++){
538 h += a[i][k] * a[i][k];
541 g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
546 for(int j=0;j<l+1;j++){
547 a[j][i] = a[i][j] / h;
549 for(int k=0;k<j+1;k++){
550 g += a[j][k] * a[i][k];
552 for(int k=j+1;k<l+1;k++){
553 g += a[k][j] * a[i][k];
559 for(int j=0;j<l+1;j++){
561 e[j] = g = e[j] - hh * f;
562 for(int k=0;k<j+1;k++){
563 a[j][k] -= (f * e[k] + g * a[i][k]);
578 for(int i=0;i<n;i++){
581 for(int j=0;j<l;j++){
583 for(int k=0;k<l;k++){
584 g += a[i][k] * a[k][j];
586 for(int k=0;k<l;k++){
587 a[k][j] -= g * a[k][i];
593 for(int j=0;j<l;j++){
594 a[j][i] = a[i][j] = 0.0;
598 catch(exception& e) {
599 m->errorOut(e, "PCOACommand", "tred2");
605 /*********************************************************************************************************************************/
607 // This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
609 void PCOACommand::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
612 double s, r, p, g, f, dd, c, b;
615 for(int i=1;i<=n;i++){
620 for(int l=0;l<n;l++){
624 dd = fabs(d[m]) + fabs(d[m+1]);
625 if(fabs(e[m])+dd == dd) break;
628 if(iter++ == 30) cerr << "Too many iterations in tqli\n";
629 g = (d[l+1]-d[l]) / (2.0 * e[l]);
631 g = d[m] - d[l] + e[l] / (g + SIGN(r,g));
637 e[i+1] = (r=pythag(f,g));
646 r = (d[i] - g) * s + 2.0 * c * b;
647 d[i+1] = g + ( p = s * r);
649 for(int k=0;k<n;k++){
651 z[k][i+1] = s * z[k][i] + c * f;
652 z[k][i] = c * z[k][i] - s * f;
655 if(r == 0.00 && i >= l) continue;
664 for(int i=0;i<n;i++){
666 for(int j=i;j<n;j++){
674 for(int j=0;j<n;j++){
682 catch(exception& e) {
683 m->errorOut(e, "PCOACommand", "qtli");
688 /*********************************************************************************************************************************/
690 void PCOACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
692 int rank = name_list.size();
693 double dsum = 0.0000;
694 for(int i=0;i<rank;i++){
696 for(int j=0;j<rank;j++){
697 if(d[j] >= 0) { G[i][j] *= pow(d[j],0.5); }
698 else { G[i][j] = 0.00000; }
702 ofstream pcaData((fnameRoot+"pcoa").c_str(), ios::trunc);
703 pcaData.setf(ios::fixed, ios::floatfield);
704 pcaData.setf(ios::showpoint);
705 outputNames.push_back(fnameRoot+"pcoa");
706 outputTypes["pcoa"].push_back(fnameRoot+"pcoa");
708 ofstream pcaLoadings((fnameRoot+"pcoa.loadings").c_str(), ios::trunc);
709 pcaLoadings.setf(ios::fixed, ios::floatfield);
710 pcaLoadings.setf(ios::showpoint);
711 outputNames.push_back(fnameRoot+"pcoa.loadings");
712 outputTypes["loadings"].push_back(fnameRoot+"pcoa.loadings");
714 pcaLoadings << "axis\tloading\n";
715 for(int i=0;i<rank;i++){
716 pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl;
720 for(int i=0;i<rank;i++){
721 pcaData << '\t' << "axis" << i+1;
725 for(int i=0;i<rank;i++){
726 pcaData << name_list[i] << '\t';
727 for(int j=0;j<rank;j++){
728 pcaData << G[i][j] << '\t';
733 catch(exception& e) {
734 m->errorOut(e, "PCOACommand", "output");
739 /*********************************************************************************************************************************/