]> git.donarmstrong.com Git - mothur.git/blob - pcacommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / pcacommand.cpp
1 /*
2  *  pcacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pcacommand.h"
11 #include "inputdata.h"
12
13 //**********************************************************************************************************************
14 vector<string> PCACommand::setParameters(){     
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","pca-loadings",false,false,true); parameters.push_back(pshared);        
17                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","pca-loadings",false,false,true); parameters.push_back(prelabund);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
19                 CommandParameter pmetric("metric", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmetric);
20                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "PCACommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string PCACommand::getHelpString(){     
35         try {
36                 string helpString = "";
37                 helpString += "The pca command parameters are shared, relabund, label, groups and metric.  shared or relabund is required unless you have a valid current file."; 
38                 helpString += "The label parameter is used to analyze specific labels in your input. Default is the first label in your shared or relabund file. Multiple labels may be separated by dashes.\n";
39                 helpString += "The groups parameter allows you to specify which groups you would like analyzed. Groupnames are separated by dashes.\n";
40                 helpString += "The metric parameter allows you to indicate if would like the pearson correlation coefficient calculated. Default=True";
41                 helpString += "Example pca(groups=yourGroups).\n";
42                 helpString += "Example pca(groups=A-B-C).\n";
43                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
44                 return helpString;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "PCACommand", "getHelpString");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string PCACommand::getOutputPattern(string type) {
53     try {
54         string pattern = "";
55         
56         if (type == "pca") {  pattern = "[filename],[distance],pca.axes"; } 
57         else if (type == "loadings") {  pattern = "[filename],[distance],pca.loadings"; } 
58         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
59         
60         return pattern;
61     }
62     catch(exception& e) {
63         m->errorOut(e, "PCACommand", "getOutputPattern");
64         exit(1);
65     }
66 }
67
68 //**********************************************************************************************************************
69 PCACommand::PCACommand(){       
70         try {
71                 abort = true; calledHelp = true; 
72                 setParameters();
73                 vector<string> tempOutNames;
74                 outputTypes["pca"] = tempOutNames;
75                 outputTypes["loadings"] = tempOutNames;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "PCACommand", "PCACommand");
79                 exit(1);
80         }
81 }
82 //**********************************************************************************************************************
83
84 PCACommand::PCACommand(string option)  {
85         try {
86                 abort = false; calledHelp = false;   
87                 
88                 //allow user to run help
89                 if(option == "help") { help(); abort = true; calledHelp = true; }
90                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91                 
92                 else {
93                         vector<string> myArray = setParameters();
94                         
95                         OptionParser parser(option);
96                         map<string, string> parameters = parser. getParameters();
97                         
98                         ValidParameters validParameter;
99                         map<string, string>::iterator it;
100                         
101                         //check to make sure all parameters are valid for command
102                         for (it = parameters.begin(); it != parameters.end(); it++) { 
103                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
104                         }
105         
106                         //initialize outputTypes
107                         vector<string> tempOutNames;
108                         outputTypes["pca"] = tempOutNames;
109                         outputTypes["loadings"] = tempOutNames;
110                         
111                         //if the user changes the input directory command factory will send this info to us in the output parameter 
112                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
113                         if (inputDir == "not found"){   inputDir = "";          }
114                         else {
115                                 string path;
116                                 it = parameters.find("shared");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
122                                 }
123                                 
124                                 it = parameters.find("relabund");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
130                                 }
131                         }
132                         
133                         //check for required parameters
134                         sharedfile = validParameter.validFile(parameters, "shared", true);
135                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
136                         else if (sharedfile == "not found") { sharedfile = ""; }
137                         else {  mode = "sharedfile"; inputFile = sharedfile; m->setSharedFile(sharedfile); }
138                         
139                         relabundfile = validParameter.validFile(parameters, "relabund", true);
140                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
141                         else if (relabundfile == "not found") { relabundfile = ""; }
142                         else {  mode = "relabund"; inputFile = relabundfile; m->setRelAbundFile(relabundfile); }
143                         
144                         
145                         if ((sharedfile == "") && (relabundfile == "")) { 
146                                 //is there are current file available for any of these?
147                                 //give priority to shared, then list, then rabund, then sabund
148                                 //if there is a current shared file, use it
149                                 sharedfile = m->getSharedFile(); 
150                                 if (sharedfile != "") { inputFile = sharedfile; mode = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
151                                 else { 
152                                         relabundfile = m->getRelAbundFile(); 
153                                         if (relabundfile != "") { inputFile = relabundfile; mode = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
154                                         else { 
155                                                 m->mothurOut("No valid current files. You must provide a relabund or shared file."); m->mothurOutEndLine(); 
156                                                 abort = true;
157                                         }
158                                 }
159                         }
160                                 
161                         //if the user changes the output directory command factory will send this info to us in the output parameter 
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
163                                 outputDir = ""; 
164                                 outputDir += m->hasPath(inputFile); //if user entered a file with a path then preserve it       
165                         }
166                                                 
167                         string temp = validParameter.validFile(parameters, "metric", false);    if (temp == "not found"){       temp = "T";                             }
168                         metric = m->isTrue(temp); 
169                         
170                         label = validParameter.validFile(parameters, "label", false);                   
171                         if (label == "not found") { label = ""; if(labels.size() == 0) {  m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); } }
172                         else { m->splitAtDash(label, labels); }
173                         
174                         groups = validParameter.validFile(parameters, "groups", false);                 
175                         if (groups == "not found") { groups = "";  }
176                         else { m->splitAtDash(groups, Groups);  }                       
177                         m->setGroups(Groups);                   
178                         
179                 }
180                 
181         }
182         catch(exception& e) {
183                 m->errorOut(e, "PCACommand", "PCACommand");
184                 exit(1);
185         }
186 }
187 //**********************************************************************************************************************
188 int PCACommand::execute(){
189         try {
190                 
191                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
192                 
193                 cout.setf(ios::fixed, ios::floatfield);
194                 cout.setf(ios::showpoint);
195                 cerr.setf(ios::fixed, ios::floatfield);
196                 cerr.setf(ios::showpoint);
197                 
198                 //get first line of shared file
199                 vector< vector<double> > matrix;
200                 InputData* input;
201                 if (mode == "sharedfile")                       {  
202                         input = new InputData(inputFile, "sharedfile");
203                 }else if (mode == "relabund")   { 
204                         input = new InputData(inputFile, "relabund");
205                 }else {  m->mothurOut("[ERROR]: filetype not recognized."); m->mothurOutEndLine();  return 0; }
206                 
207                 vector<SharedRAbundFloatVector*> lookupFloat = input->getSharedRAbundFloatVectors();
208                 string lastLabel = lookupFloat[0]->getLabel();
209                         
210                 set<string> processedLabels;
211                 set<string> userLabels = labels;
212                 
213                 //if the user gave no labels, then use the first one read
214                 if (labels.size() == 0) { 
215                         label = lastLabel;  
216                         
217                         process(lookupFloat);
218                 }
219                 
220                 //as long as you are not at the end of the file or done wih the lines you want
221                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
222                         
223                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  lookupFloat.clear(); return 0;  }
224                         
225                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
226                                 processedLabels.insert(lookupFloat[0]->getLabel());
227                                 userLabels.erase(lookupFloat[0]->getLabel());
228                                 
229                                 process(lookupFloat);
230                         }
231                         
232                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
233                                 string saveLabel = lookupFloat[0]->getLabel();
234                                 
235                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  lookupFloat.clear();
236                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
237                                 
238                                 process(lookupFloat);
239                                 
240                                 processedLabels.insert(lookupFloat[0]->getLabel());
241                                 userLabels.erase(lookupFloat[0]->getLabel());
242                                 
243                                 //restore real lastlabel to save below
244                                 lookupFloat[0]->setLabel(saveLabel);
245                         }
246                         
247                         lastLabel = lookupFloat[0]->getLabel();                 
248                         
249                         //get next line to process
250                         //prevent memory leak
251                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } lookupFloat.clear();
252                         lookupFloat = input->getSharedRAbundFloatVectors();
253                 }
254                 
255                 
256                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } lookupFloat.clear(); return 0;  }
257                 
258                 //output error messages about any remaining user labels
259                 set<string>::iterator it;
260                 bool needToRun = false;
261                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
262                         m->mothurOut("Your file does not include the label " + *it); 
263                         if (processedLabels.count(lastLabel) != 1) {
264                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
265                                 needToRun = true;
266                         }else {
267                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
268                         }
269                 }
270                 
271                 //run last label if you need to
272                 if (needToRun == true)  {
273                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } }  lookupFloat.clear();
274                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
275                         
276                         process(lookupFloat);
277                         
278                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } lookupFloat.clear();
279                 }       
280                 
281                 for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } lookupFloat.clear();
282                 delete input;
283                 
284                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
285                 
286                 m->mothurOutEndLine();
287                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
288                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
289                 m->mothurOutEndLine();
290                 
291                 return 0;
292         }
293         catch(exception& e) {
294                 m->errorOut(e, "PCACommand", "execute");
295                 exit(1);
296         }
297 }
298
299 /**********************************************************************************************************************
300 vector< vector<double> > PCACommand::createMatrix(vector<SharedRAbundFloatVector*> lookupFloat){
301         try {
302                 vector< vector<double> > matrix; matrix.resize(lookupFloat.size());
303                 
304                 //fill matrix with shared files relative abundances
305                 for (int i = 0; i < lookupFloat.size(); i++) {
306                         for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) {
307                                 matrix[i].push_back(lookupFloat[i]->getAbundance(j));
308                         }
309                 }
310                 
311                 vector< vector<double> > transposeMatrix; transposeMatrix.resize(matrix[0].size());
312                 for (int i = 0; i < transposeMatrix.size(); i++) {
313                         for (int j = 0; j < matrix.size(); j++) {
314                                 transposeMatrix[i].push_back(matrix[j][i]);
315                         }
316                 }
317                 
318                 matrix = linearCalc.matrix_mult(matrix, transposeMatrix);
319                 
320                 return matrix;
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "PCACommand", "createMatrix");   
324                 exit(1);
325         }
326 }*/
327 //**********************************************************************************************************************
328
329 int PCACommand::process(vector<SharedRAbundFloatVector*>& lookupFloat){
330         try {
331                 m->mothurOut("\nProcessing " + lookupFloat[0]->getLabel()); m->mothurOutEndLine();
332         
333                 int numOTUs = lookupFloat[0]->getNumBins();
334                 int numSamples = lookupFloat.size();
335                 
336                 vector< vector<double> > matrix(numSamples);
337                 vector<double> colMeans(numOTUs);
338                 
339                 //fill matrix with shared relative abundances, re-center
340                 for (int i = 0; i < lookupFloat.size(); i++) {
341                         matrix[i].resize(numOTUs, 0);
342                         
343                         for (int j = 0; j < numOTUs; j++) {
344                                 matrix[i][j] = lookupFloat[i]->getAbundance(j);
345                                 colMeans[j] += matrix[i][j];
346                         }
347                 }
348                 
349
350                 for(int j=0;j<numOTUs;j++){
351                         colMeans[j] = colMeans[j] / (double)numSamples;
352                 }
353                 
354                 vector<vector<double> > centered = matrix;
355                 for(int i=0;i<numSamples;i++){
356                         for(int j=0;j<numOTUs;j++){
357                                 centered[i][j] = centered[i][j] - colMeans[j];                          
358                         }
359                 }
360
361                 
362                 vector< vector<double> > transpose(numOTUs);
363                 for (int i = 0; i < numOTUs; i++) {
364                         transpose[i].resize(numSamples, 0);
365                         
366                         for (int j = 0; j < numSamples; j++) {
367                                 transpose[i][j] = centered[j][i];
368                         }
369                 }
370
371                 vector<vector<double> > crossProduct = linearCalc.matrix_mult(transpose, centered);     
372                 
373                 vector<double> d;
374                 vector<double> e;
375
376                 linearCalc.tred2(crossProduct, d, e);           if (m->control_pressed) { return 0; }
377                 linearCalc.qtli(d, e, crossProduct);            if (m->control_pressed) { return 0; }
378                 
379                 vector<vector<double> > X = linearCalc.matrix_mult(centered, crossProduct);
380                 
381                 if (m->control_pressed) { return 0; }
382                 
383                 string fbase = outputDir + m->getRootName(m->getSimpleName(inputFile));
384                 //string outputFileName = fbase + lookupFloat[0]->getLabel();
385                 output(fbase, lookupFloat[0]->getLabel(), m->getGroups(), X, d);
386                 
387                 if (metric) {   
388                         
389                         vector<vector<double> > observedEuclideanDistance = linearCalc.getObservedEuclideanDistance(centered);
390                         
391                         for (int i = 1; i < 4; i++) {
392                                 
393                                 vector< vector<double> > PCAEuclidDists = linearCalc.calculateEuclidianDistance(X, i); //G is the pca file
394                                 
395                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
396
397                                 double corr = linearCalc.calcPearson(PCAEuclidDists, observedEuclideanDistance);
398                                                                 
399                                 m->mothurOut("Rsq " + toString(i) + " axis: " + toString(corr * corr)); m->mothurOutEndLine();
400                                 
401                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
402                         }
403                 }
404                 
405                 return 0;
406         }
407         catch(exception& e) {
408                 m->errorOut(e, "PCACommand", "process");        
409                 exit(1);
410         }
411 }
412 /*********************************************************************************************************************************/
413
414 void PCACommand::output(string fbase, string label, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
415         try {
416
417                 int numEigenValues = d.size();
418                 double dsum = 0.0000;
419                 for(int i=0;i<numEigenValues;i++){
420                         dsum += d[i];
421                 }
422                 
423                 ofstream pcaData;
424         map<string, string> variables; 
425         variables["[filename]"] = fbase;
426         variables["[distance]"] = label;
427         string pcaFileName = getOutputFileName("pca",variables);
428         m->openOutputFile(pcaFileName, pcaData);
429                 pcaData.setf(ios::fixed, ios::floatfield);
430                 pcaData.setf(ios::showpoint);   
431                 outputNames.push_back(pcaFileName);
432                 outputTypes["pca"].push_back(pcaFileName);
433                 
434                 ofstream pcaLoadings;
435         string loadingsFilename = getOutputFileName("loadings",variables);
436          m->openOutputFile(loadingsFilename, pcaLoadings);
437                 pcaLoadings.setf(ios::fixed, ios::floatfield);
438                 pcaLoadings.setf(ios::showpoint);
439                 outputNames.push_back(loadingsFilename);
440                 outputTypes["loadings"].push_back(loadingsFilename);    
441                 
442                 pcaLoadings << "axis\tloading\n";
443                 for(int i=0;i<numEigenValues;i++){
444                         pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl;
445                 }
446                 
447                 pcaData << "group";
448                 for(int i=0;i<numEigenValues;i++){
449                         pcaData << '\t' << "axis" << i+1;
450                 }
451                 pcaData << endl;
452                 
453                 for(int i=0;i<name_list.size();i++){
454                         pcaData << name_list[i] << '\t';
455                         for(int j=0;j<numEigenValues;j++){
456                                 pcaData << G[i][j] << '\t';
457                         }
458                         pcaData << endl;
459                 }
460         }
461         catch(exception& e) {
462                 m->errorOut(e, "PCACommand", "output");
463                 exit(1);
464         }
465 }
466 /*********************************************************************************************************************************/
467
468