6 * Created by westcott on 1/4/10.
7 * Copyright 2010 Schloss Lab. All rights reserved.
11 #include "pcoacommand.h"
13 //**********************************************************************************************************************
14 vector<string> PCOACommand::getValidParameters(){
16 string Array[] = {"phylip", "metric","outputdir","inputdir"};
17 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
21 m->errorOut(e, "PCOACommand", "getValidParameters");
25 //**********************************************************************************************************************
26 PCOACommand::PCOACommand(){
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["pcoa"] = tempOutNames;
32 outputTypes["loadings"] = tempOutNames;
33 outputTypes["corr"] = tempOutNames;
36 m->errorOut(e, "PCOACommand", "PCOACommand");
40 //**********************************************************************************************************************
41 vector<string> PCOACommand::getRequiredParameters(){
43 string Array[] = {"phylip"};
44 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
48 m->errorOut(e, "PCOACommand", "getRequiredParameters");
52 //**********************************************************************************************************************
53 vector<string> PCOACommand::getRequiredFiles(){
55 vector<string> myArray;
59 m->errorOut(e, "PCOACommand", "getRequiredFiles");
63 //**********************************************************************************************************************
65 PCOACommand::PCOACommand(string option) {
69 //allow user to run help
70 if(option == "help") { help(); abort = true; }
73 //valid paramters for this command
74 string Array[] = {"phylip","metric","outputdir", "inputdir"};
75 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77 OptionParser parser(option);
78 map<string, string> parameters = parser. getParameters();
80 ValidParameters validParameter;
81 map<string, string>::iterator it;
83 //check to make sure all parameters are valid for command
84 for (it = parameters.begin(); it != parameters.end(); it++) {
85 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
87 //if the user changes the input directory command factory will send this info to us in the output parameter
88 string inputDir = validParameter.validFile(parameters, "inputdir", false);
89 if (inputDir == "not found"){ inputDir = ""; }
92 it = parameters.find("phylip");
93 //user has given a template file
94 if(it != parameters.end()){
95 path = m->hasPath(it->second);
96 //if the user has not given a path then, add inputdir. else leave path alone.
97 if (path == "") { parameters["phylip"] = inputDir + it->second; }
101 //initialize outputTypes
102 vector<string> tempOutNames;
103 outputTypes["pcoa"] = tempOutNames;
104 outputTypes["loadings"] = tempOutNames;
105 outputTypes["corr"] = tempOutNames;
107 //required parameters
108 phylipfile = validParameter.validFile(parameters, "phylip", true);
109 if (phylipfile == "not open") { abort = true; }
110 else if (phylipfile == "not found") { phylipfile = ""; abort = true; }
111 else { filename = phylipfile; }
113 //if the user changes the output directory command factory will send this info to us in the output parameter
114 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
116 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it
119 //error checking on files
120 if (phylipfile == "") { m->mothurOut("You must provide a distance file before running the pcoa command."); m->mothurOutEndLine(); abort = true; }
122 string temp = validParameter.validFile(parameters, "metric", false); if (temp == "not found"){ temp = "T"; }
123 metric = m->isTrue(temp);
127 catch(exception& e) {
128 m->errorOut(e, "PCOACommand", "PCOACommand");
132 //**********************************************************************************************************************
133 void PCOACommand::help(){
136 m->mothurOut("The pcoa command parameters are phylip and metric"); m->mothurOutEndLine();
137 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
138 m->mothurOut("The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"); m->mothurOutEndLine();
139 m->mothurOut("Example pcoa(phylip=yourDistanceFile).\n");
140 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
142 catch(exception& e) {
143 m->errorOut(e, "PCOACommand", "help");
147 //**********************************************************************************************************************
148 PCOACommand::~PCOACommand(){}
149 //**********************************************************************************************************************
150 int PCOACommand::execute(){
153 if (abort == true) { return 0; }
155 cout.setf(ios::fixed, ios::floatfield);
156 cout.setf(ios::showpoint);
157 cerr.setf(ios::fixed, ios::floatfield);
158 cerr.setf(ios::showpoint);
160 vector<string> names;
161 vector<vector<double> > D;
163 fbase = outputDir + m->getRootName(m->getSimpleName(filename));
165 read(filename, names, D);
167 if (m->control_pressed) { return 0; }
169 double offset = 0.0000;
172 vector<vector<double> > G = D;
173 vector<vector<double> > copy_G;
175 m->mothurOut("\nProcessing...\n\n");
177 for(int count=0;count<2;count++){
178 recenter(offset, D, G); if (m->control_pressed) { return 0; }
179 linearCalc.tred2(G, d, e); if (m->control_pressed) { return 0; }
180 linearCalc.qtli(d, e, G); if (m->control_pressed) { return 0; }
181 offset = d[d.size()-1];
182 if(offset > 0.0) break;
185 if (m->control_pressed) { return 0; }
187 output(fbase, names, G, d);
189 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
193 for (int i = 1; i < 4; i++) {
195 vector< vector<double> > EuclidDists = calculateEuclidianDistance(G, i); //G is the pcoa file
197 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
199 double corr = calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix
201 m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine();
203 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
207 m->mothurOutEndLine();
208 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
209 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
210 m->mothurOutEndLine();
214 catch(exception& e) {
215 m->errorOut(e, "PCOACommand", "execute");
219 /*********************************************************************************************************************************/
220 vector< vector<double> > PCOACommand::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
223 vector< vector<double> > dists; dists.resize(axes.size());
224 for (int i = 0; i < dists.size(); i++) { dists[i].resize(axes.size(), 0.0); }
226 if (dimensions == 1) { //one dimension calc = abs(x-y)
228 for (int i = 0; i < dists.size(); i++) {
230 if (m->control_pressed) { return dists; }
232 for (int j = 0; j < i; j++) {
233 dists[i][j] = abs(axes[i][0] - axes[j][0]);
234 dists[j][i] = dists[i][j];
238 }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)
240 for (int i = 0; i < dists.size(); i++) {
242 if (m->control_pressed) { return dists; }
244 for (int j = 0; j < i; j++) {
245 double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
246 double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
248 dists[i][j] = sqrt((firstDim + secondDim));
249 dists[j][i] = dists[i][j];
253 }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2)
255 for (int i = 0; i < dists.size(); i++) {
257 if (m->control_pressed) { return dists; }
259 for (int j = 0; j < i; j++) {
260 double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
261 double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
262 double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2]));
264 dists[i][j] = sqrt((firstDim + secondDim + thirdDim));
265 dists[j][i] = dists[i][j];
269 }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
273 catch(exception& e) {
274 m->errorOut(e, "PCOACommand", "calculateEuclidianDistance");
278 /*********************************************************************************************************************************/
279 double PCOACommand::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
282 //find average for - X
283 vector<float> averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0);
284 for (int i = 0; i < euclidDists.size(); i++) {
285 for (int j = 0; j < euclidDists[i].size(); j++) {
286 averageEuclid[i] += euclidDists[i][j];
289 for (int i = 0; i < averageEuclid.size(); i++) { averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size(); }
291 //find average for - Y
292 vector<float> averageUser; averageUser.resize(userDists.size(), 0.0);
293 for (int i = 0; i < userDists.size(); i++) {
294 for (int j = 0; j < userDists[i].size(); j++) {
295 averageUser[i] += userDists[i][j];
298 for (int i = 0; i < averageUser.size(); i++) { averageUser[i] = averageUser[i] / (float) userDists.size(); }
300 double numerator = 0.0;
301 double denomTerm1 = 0.0;
302 double denomTerm2 = 0.0;
304 for (int i = 0; i < euclidDists.size(); i++) {
306 for (int k = 0; k < i; k++) {
308 float Yi = userDists[i][k];
309 float Xi = euclidDists[i][k];
311 numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k]));
312 denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k]));
313 denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k]));
317 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
318 double r = numerator / denom;
322 catch(exception& e) {
323 m->errorOut(e, "PCOACommand", "calculateEuclidianDistance");
327 /*********************************************************************************************************************************/
329 void PCOACommand::get_comment(istream& f, char begin, char end){
332 while(d != end){ d = f.get(); }
335 catch(exception& e) {
336 m->errorOut(e, "PCOACommand", "get_comment");
341 /*********************************************************************************************************************************/
343 int PCOACommand::read_phylip(istream& f, int square_m, vector<string>& name_list, vector<vector<double> >& d){
351 name_list.resize(rank);
354 for(int i=0;i<rank;i++)
356 for(int i=0;i<rank;i++) {
358 // cout << i << "\t" << name_list[i] << endl;
359 for(int j=0;j<rank;j++) {
360 if (m->control_pressed) { return 0; }
363 if (d[i][j] == -0.0000)
368 else if(square_m == 2){
369 for(int i=0;i<rank;i++){
374 for(int i=1;i<rank;i++){
377 for(int j=0;j<i;j++){
378 if (m->control_pressed) { return 0; }
380 if (d[i][j] == -0.0000)
389 catch(exception& e) {
390 m->errorOut(e, "PCOACommand", "read_phylip");
396 /*********************************************************************************************************************************/
398 void PCOACommand::read(string fname, vector<string>& names, vector<vector<double> >& D){
401 m->openInputFile(fname, f);
403 //check whether matrix is square
409 f >> numSeqs >> name;
411 while((d=f.get()) != EOF){
413 //is d a number meaning its square
419 //is d a line return meaning its lower triangle
427 //reopen to get back to beginning
428 m->openInputFile(fname, f);
429 read_phylip(f, q, names, D);
431 catch(exception& e) {
432 m->errorOut(e, "PCOACommand", "read");
437 /*********************************************************************************************************************************/
439 void PCOACommand::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
443 vector<vector<double> > A(rank);
444 vector<vector<double> > C(rank);
445 for(int i=0;i<rank;i++){
450 double scale = -1.0000 / (double) rank;
452 for(int i=0;i<rank;i++){
454 C[i][i] = 1.0000 + scale;
455 for(int j=i+1;j<rank;j++){
456 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
457 C[i][j] = C[j][i] = scale;
461 A = linearCalc.matrix_mult(C,A);
462 G = linearCalc.matrix_mult(A,C);
464 catch(exception& e) {
465 m->errorOut(e, "PCOACommand", "recenter");
471 /*********************************************************************************************************************************/
473 void PCOACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
475 int rank = name_list.size();
476 double dsum = 0.0000;
477 for(int i=0;i<rank;i++){
479 for(int j=0;j<rank;j++){
480 if(d[j] >= 0) { G[i][j] *= pow(d[j],0.5); }
481 else { G[i][j] = 0.00000; }
485 ofstream pcaData((fnameRoot+"pcoa").c_str(), ios::trunc);
486 pcaData.setf(ios::fixed, ios::floatfield);
487 pcaData.setf(ios::showpoint);
488 outputNames.push_back(fnameRoot+"pcoa");
489 outputTypes["pcoa"].push_back(fnameRoot+"pcoa");
491 ofstream pcaLoadings((fnameRoot+"pcoa.loadings").c_str(), ios::trunc);
492 pcaLoadings.setf(ios::fixed, ios::floatfield);
493 pcaLoadings.setf(ios::showpoint);
494 outputNames.push_back(fnameRoot+"pcoa.loadings");
495 outputTypes["loadings"].push_back(fnameRoot+"pcoa.loadings");
497 pcaLoadings << "axis\tloading\n";
498 for(int i=0;i<rank;i++){
499 pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl;
503 for(int i=0;i<rank;i++){
504 pcaData << '\t' << "axis" << i+1;
508 for(int i=0;i<rank;i++){
509 pcaData << name_list[i] << '\t';
510 for(int j=0;j<rank;j++){
511 pcaData << G[i][j] << '\t';
516 catch(exception& e) {
517 m->errorOut(e, "PCOACommand", "output");
522 /*********************************************************************************************************************************/