]> git.donarmstrong.com Git - mothur.git/blob - pcoacommand.cpp
finished chimera.slayer adding trim parameter, added persample parameter to sub.sampl...
[mothur.git] / pcoacommand.cpp
1
2 /*
3  *  pcacommand.cpp
4  *  Mothur
5  *
6  *  Created by westcott on 1/4/10.
7  *  Copyright 2010 Schloss Lab. All rights reserved.
8  *
9  */
10
11 #include "pcoacommand.h"
12
13 //**********************************************************************************************************************
14 vector<string> PCOACommand::getValidParameters(){       
15         try {
16                 string Array[] =  {"phylip", "metric","outputdir","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "PCOACommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 PCOACommand::PCOACommand(){     
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["pcoa"] = tempOutNames;
32                 outputTypes["loadings"] = tempOutNames;
33                 outputTypes["corr"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "PCOACommand", "PCOACommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> PCOACommand::getRequiredParameters(){    
42         try {
43                 string Array[] =  {"phylip"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "PCOACommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> PCOACommand::getRequiredFiles(){ 
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "PCOACommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64
65 PCOACommand::PCOACommand(string option)  {
66         try {
67                 abort = false;
68                 
69                 //allow user to run help
70                 if(option == "help") { help(); abort = true; }
71                 
72                 else {
73                         //valid paramters for this command
74                         string Array[] =  {"phylip","metric","outputdir", "inputdir"};
75                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
76                         
77                         OptionParser parser(option);
78                         map<string, string> parameters = parser. getParameters();
79                         
80                         ValidParameters validParameter;
81                         map<string, string>::iterator it;
82                 
83                         //check to make sure all parameters are valid for command
84                         for (it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         //if the user changes the input directory command factory will send this info to us in the output parameter 
88                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
89                         if (inputDir == "not found"){   inputDir = "";          }
90                         else {
91                                 string path;
92                                 it = parameters.find("phylip");
93                                 //user has given a template file
94                                 if(it != parameters.end()){ 
95                                         path = m->hasPath(it->second);
96                                         //if the user has not given a path then, add inputdir. else leave path alone.
97                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
98                                 }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["pcoa"] = tempOutNames;
104                         outputTypes["loadings"] = tempOutNames;
105                         outputTypes["corr"] = tempOutNames;
106                         
107                         //required parameters
108                         phylipfile = validParameter.validFile(parameters, "phylip", true);
109                         if (phylipfile == "not open") { abort = true; }
110                         else if (phylipfile == "not found") { phylipfile = ""; abort = true; }  
111                         else {  filename = phylipfile;  }
112                         
113                         //if the user changes the output directory command factory will send this info to us in the output parameter 
114                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
115                                 outputDir = ""; 
116                                 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it      
117                         }
118                         
119                         //error checking on files       
120                         if (phylipfile == "")   { m->mothurOut("You must provide a distance file before running the pcoa command."); m->mothurOutEndLine(); abort = true; }             
121                 
122                         string temp = validParameter.validFile(parameters, "metric", false);    if (temp == "not found"){       temp = "T";                             }
123                         metric = m->isTrue(temp); 
124                 }
125
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "PCOACommand", "PCOACommand");
129                 exit(1);
130         }
131 }
132 //**********************************************************************************************************************
133 void PCOACommand::help(){
134         try {
135         
136                 m->mothurOut("The pcoa command parameters are phylip and metric"); m->mothurOutEndLine();
137                 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
138                 m->mothurOut("The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"); m->mothurOutEndLine();
139                 m->mothurOut("Example pcoa(phylip=yourDistanceFile).\n");
140                 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "PCOACommand", "help");
144                 exit(1);
145         }
146 }
147 //**********************************************************************************************************************
148 PCOACommand::~PCOACommand(){}
149 //**********************************************************************************************************************
150 int PCOACommand::execute(){
151         try {
152         
153                 if (abort == true) { return 0; }
154                 
155                 cout.setf(ios::fixed, ios::floatfield);
156                 cout.setf(ios::showpoint);
157                 cerr.setf(ios::fixed, ios::floatfield);
158                 cerr.setf(ios::showpoint);
159                 
160                 vector<string> names;
161                 vector<vector<double> > D;
162         
163                 fbase = outputDir + m->getRootName(m->getSimpleName(filename));
164                 
165                 read(filename, names, D);
166                 
167                 if (m->control_pressed) { return 0; }
168         
169                 double offset = 0.0000;
170                 vector<double> d;
171                 vector<double> e;
172                 vector<vector<double> > G = D;
173                 vector<vector<double> > copy_G;
174                                 
175                 m->mothurOut("\nProcessing...\n\n");
176                 
177                 for(int count=0;count<2;count++){
178                         recenter(offset, D, G);                                 if (m->control_pressed) { return 0; }
179                         linearCalc.tred2(G, d, e);                              if (m->control_pressed) { return 0; }
180                         linearCalc.qtli(d, e, G);                               if (m->control_pressed) { return 0; }
181                         offset = d[d.size()-1];
182                         if(offset > 0.0) break;
183                 } 
184                 
185                 if (m->control_pressed) { return 0; }
186                 
187                 output(fbase, names, G, d);
188                 
189                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
190                 
191                 if (metric) {   
192                         
193                         for (int i = 1; i < 4; i++) {
194                                                         
195                                 vector< vector<double> > EuclidDists = calculateEuclidianDistance(G, i); //G is the pcoa file
196                                 
197                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
198                                 
199                                 double corr = calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix
200                                 
201                                 m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine();
202                                 
203                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
204                         }
205                 }
206                 
207                 m->mothurOutEndLine();
208                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
209                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
210                 m->mothurOutEndLine();
211                 
212                 return 0;
213         }
214         catch(exception& e) {
215                 m->errorOut(e, "PCOACommand", "execute");
216                 exit(1);
217         }
218 }
219 /*********************************************************************************************************************************/
220 vector< vector<double> > PCOACommand::calculateEuclidianDistance(vector< vector<double> >& axes, int dimensions){
221         try {
222                 //make square matrix
223                 vector< vector<double> > dists; dists.resize(axes.size());
224                 for (int i = 0; i < dists.size(); i++) {  dists[i].resize(axes.size(), 0.0); }
225                         
226                 if (dimensions == 1) { //one dimension calc = abs(x-y)
227                         
228                         for (int i = 0; i < dists.size(); i++) {
229                                 
230                                 if (m->control_pressed) { return dists; }
231                                 
232                                 for (int j = 0; j < i; j++) {
233                                         dists[i][j] = abs(axes[i][0] - axes[j][0]);
234                                         dists[j][i] = dists[i][j];
235                                 }
236                         }
237                         
238                 }else if (dimensions == 2) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2)
239                         
240                         for (int i = 0; i < dists.size(); i++) {
241                                 
242                                 if (m->control_pressed) { return dists; }
243                                 
244                                 for (int j = 0; j < i; j++) {
245                                         double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
246                                         double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
247
248                                         dists[i][j] = sqrt((firstDim + secondDim));
249                                         dists[j][i] = dists[i][j];
250                                 }
251                         }
252                         
253                 }else if (dimensions == 3) { //two dimension calc = sqrt ((x1 - y1)^2 + (x2 - y2)^2 + (x3 - y3)^2)
254                         
255                         for (int i = 0; i < dists.size(); i++) {
256                                 
257                                 if (m->control_pressed) { return dists; }
258                                 
259                                 for (int j = 0; j < i; j++) {
260                                         double firstDim = ((axes[i][0] - axes[j][0]) * (axes[i][0] - axes[j][0]));
261                                         double secondDim = ((axes[i][1] - axes[j][1]) * (axes[i][1] - axes[j][1]));
262                                         double thirdDim = ((axes[i][2] - axes[j][2]) * (axes[i][2] - axes[j][2]));
263                                         
264                                         dists[i][j] = sqrt((firstDim + secondDim + thirdDim));
265                                         dists[j][i] = dists[i][j];
266                                 }
267                         }
268                         
269                 }else { m->mothurOut("[ERROR]: too many dimensions, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
270                 
271                 return dists;
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "PCOACommand", "calculateEuclidianDistance");
275                 exit(1);
276         }
277 }
278 /*********************************************************************************************************************************/
279 double PCOACommand::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
280         try {
281                 
282                 //find average for - X
283                 vector<float> averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0);
284                 for (int i = 0; i < euclidDists.size(); i++) {
285                         for (int j = 0; j < euclidDists[i].size(); j++) {
286                                 averageEuclid[i] += euclidDists[i][j];  
287                         }
288                 }
289                 for (int i = 0; i < averageEuclid.size(); i++) {  averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size();   }
290                 
291                 //find average for - Y
292                 vector<float> averageUser; averageUser.resize(userDists.size(), 0.0);
293                 for (int i = 0; i < userDists.size(); i++) {
294                         for (int j = 0; j < userDists[i].size(); j++) {
295                                 averageUser[i] += userDists[i][j];  
296                         }
297                 }
298                 for (int i = 0; i < averageUser.size(); i++) {  averageUser[i] = averageUser[i] / (float) userDists.size();  }
299                 
300                 double numerator = 0.0;
301                 double denomTerm1 = 0.0;
302                 double denomTerm2 = 0.0;
303                 
304                 for (int i = 0; i < euclidDists.size(); i++) {
305                         
306                         for (int k = 0; k < i; k++) {
307                                 
308                                 float Yi = userDists[i][k];
309                                 float Xi = euclidDists[i][k];
310                                         
311                                 numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k]));
312                                 denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k]));
313                                 denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k]));
314                         }
315                 }
316                 
317                 double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
318                 double r = numerator / denom;
319                 
320                 return r;
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "PCOACommand", "calculateEuclidianDistance");
324                 exit(1);
325         }
326 }
327 /*********************************************************************************************************************************/
328
329 void PCOACommand::get_comment(istream& f, char begin, char end){
330         try {
331                 char d=f.get();
332                 while(d != end){        d = f.get();    }
333                 d = f.peek();
334         }
335         catch(exception& e) {
336                 m->errorOut(e, "PCOACommand", "get_comment");
337                 exit(1);
338         }
339 }       
340
341 /*********************************************************************************************************************************/
342
343 int PCOACommand::read_phylip(istream& f, int square_m, vector<string>& name_list, vector<vector<double> >& d){
344         try {
345                 //     int count1=0;
346                 //     int count2=0;
347                 
348                 int rank;
349                 f >> rank;
350                 
351                 name_list.resize(rank);
352                 d.resize(rank);
353                 if(square_m == 1){
354                         for(int i=0;i<rank;i++)
355                                 d[i].resize(rank);
356                         for(int i=0;i<rank;i++) {
357                                 f >> name_list[i];
358                                 //                      cout << i << "\t" << name_list[i] << endl;
359                                 for(int j=0;j<rank;j++) {
360                                         if (m->control_pressed) { return 0; }
361                                         
362                                         f >> d[i][j];
363                                         if (d[i][j] == -0.0000)
364                                                 d[i][j] = 0.0000;
365                                 }
366                         }
367                 }
368                 else if(square_m == 2){
369                         for(int i=0;i<rank;i++){
370                                 d[i].resize(rank);
371                         }
372                         d[0][0] = 0.0000;
373                         f >> name_list[0];
374                         for(int i=1;i<rank;i++){
375                                 f >> name_list[i];
376                                 d[i][i]=0.0000;
377                                 for(int j=0;j<i;j++){
378                                         if (m->control_pressed) { return 0; }
379                                         f >> d[i][j];
380                                         if (d[i][j] == -0.0000)
381                                                 d[i][j] = 0.0000;
382                                         d[j][i]=d[i][j];
383                                 }
384                         }
385                 }
386                 
387                 return 0;
388         }
389         catch(exception& e) {
390                 m->errorOut(e, "PCOACommand", "read_phylip");
391                 exit(1);
392         }
393
394 }
395
396 /*********************************************************************************************************************************/
397
398 void PCOACommand::read(string fname, vector<string>& names, vector<vector<double> >& D){
399         try {
400                 ifstream f;
401                 m->openInputFile(fname, f);
402                         
403                 //check whether matrix is square
404                 char d;
405                 int q = 1;
406                 int numSeqs;
407                 string name;
408                 
409                 f >> numSeqs >> name; 
410                 
411                 while((d=f.get()) != EOF){
412                         
413                         //is d a number meaning its square
414                         if(isalnum(d)){ 
415                                 q = 1; 
416                                 break; 
417                         }
418                         
419                         //is d a line return meaning its lower triangle
420                         if(d == '\n'){
421                                 q = 2;
422                                 break;
423                         }
424                 }
425                 f.close();
426                 
427                 //reopen to get back to beginning
428                 m->openInputFile(fname, f);                     
429                 read_phylip(f, q, names, D);
430         }
431                 catch(exception& e) {
432                 m->errorOut(e, "PCOACommand", "read");
433                 exit(1);
434         }
435 }
436
437 /*********************************************************************************************************************************/
438
439 void PCOACommand::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
440         try {
441                 int rank = D.size();
442                 
443                 vector<vector<double> > A(rank);
444                 vector<vector<double> > C(rank);
445                 for(int i=0;i<rank;i++){
446                         A[i].resize(rank);
447                         C[i].resize(rank);
448                 }
449                 
450                 double scale = -1.0000 / (double) rank;
451                 
452                 for(int i=0;i<rank;i++){
453                         A[i][i] = 0.0000;
454                         C[i][i] = 1.0000 + scale;
455                         for(int j=i+1;j<rank;j++){
456                                 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
457                                 C[i][j] = C[j][i] = scale;
458                         }
459                 }
460                 
461                 A = linearCalc.matrix_mult(C,A);
462                 G = linearCalc.matrix_mult(A,C);
463         }
464         catch(exception& e) {
465                 m->errorOut(e, "PCOACommand", "recenter");
466                 exit(1);
467         }
468
469 }
470
471 /*********************************************************************************************************************************/
472
473 void PCOACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
474         try {
475                 int rank = name_list.size();
476                 double dsum = 0.0000;
477                 for(int i=0;i<rank;i++){
478                         dsum += d[i];
479                         for(int j=0;j<rank;j++){
480                                 if(d[j] >= 0)   {       G[i][j] *= pow(d[j],0.5);       }
481                                 else                    {       G[i][j] = 0.00000;                      }
482                         }
483                 }
484                 
485                 ofstream pcaData((fnameRoot+"pcoa").c_str(), ios::trunc);
486                 pcaData.setf(ios::fixed, ios::floatfield);
487                 pcaData.setf(ios::showpoint);   
488                 outputNames.push_back(fnameRoot+"pcoa");
489                 outputTypes["pcoa"].push_back(fnameRoot+"pcoa");
490                 
491                 ofstream pcaLoadings((fnameRoot+"pcoa.loadings").c_str(), ios::trunc);
492                 pcaLoadings.setf(ios::fixed, ios::floatfield);
493                 pcaLoadings.setf(ios::showpoint);
494                 outputNames.push_back(fnameRoot+"pcoa.loadings");
495                 outputTypes["loadings"].push_back(fnameRoot+"pcoa.loadings");   
496                 
497                 pcaLoadings << "axis\tloading\n";
498                 for(int i=0;i<rank;i++){
499                         pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl;
500                 }
501                 
502                 pcaData << "group";
503                 for(int i=0;i<rank;i++){
504                         pcaData << '\t' << "axis" << i+1;
505                 }
506                 pcaData << endl;
507                 
508                 for(int i=0;i<rank;i++){
509                         pcaData << name_list[i] << '\t';
510                         for(int j=0;j<rank;j++){
511                                 pcaData << G[i][j] << '\t';
512                         }
513                         pcaData << endl;
514                 }
515         }
516         catch(exception& e) {
517                 m->errorOut(e, "PCOACommand", "output");
518                 exit(1);
519         }
520 }
521
522 /*********************************************************************************************************************************/
523