]> git.donarmstrong.com Git - mothur.git/blob - pcacommand.cpp
pca command
[mothur.git] / pcacommand.cpp
1 /*
2  *  pcacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "pcacommand.h"
11 #include "inputdata.h"
12
13 //**********************************************************************************************************************
14 vector<string> PCACommand::getValidParameters(){        
15         try {
16                 string Array[] =  {"label", "groups","metric","outputdir","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "PCACommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 PCACommand::PCACommand(){       
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["pca"] = tempOutNames;
32                 outputTypes["loadings"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "PCACommand", "PCACommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> PCACommand::getRequiredParameters(){     
41         try {
42                 vector<string> myArray;
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "PCACommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> PCACommand::getRequiredFiles(){  
52         try {
53                 string Array[] =  {"shared","relabund","or"};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "PCACommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 PCACommand::PCACommand(string option)  {
65         try {
66                 abort = false;
67                 
68                 globaldata = GlobalData::getInstance();
69                 
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"label","groups","metric","outputdir", "inputdir"};
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string, string> parameters = parser. getParameters();
80                         
81                         ValidParameters validParameter;
82                         map<string, string>::iterator it;
83                         
84                         //check to make sure all parameters are valid for command
85                         for (it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
87                         }
88                         //if the user changes the input directory command factory will send this info to us in the output parameter 
89                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              if (inputDir == "not found"){   inputDir = "";          }
90                         
91                         //initialize outputTypes
92                         vector<string> tempOutNames;
93                         outputTypes["pca"] = tempOutNames;
94                         outputTypes["loadings"] = tempOutNames;
95                         
96                         //make sure the user has already run the read.otu command
97                         if ((globaldata->getSharedFile() == "") && (globaldata->getRelAbundFile() == "")) {
98                                 m->mothurOut("You must read a list and a group, shared or relabund file before you can use the pca command."); m->mothurOutEndLine(); abort = true; 
99                         }
100                         
101                         if (globaldata->getSharedFile() != "")          { mode = "shared"; inputFile = globaldata->getSharedFile();             }
102                         if (globaldata->getRelAbundFile() != "")        { mode = "relabund"; inputFile = globaldata->getRelAbundFile(); }
103                         
104                         //if the user changes the output directory command factory will send this info to us in the output parameter 
105                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
106                                 outputDir = ""; 
107                                 outputDir += m->hasPath(inputFile); //if user entered a file with a path then preserve it       
108                         }
109                                                 
110                         string temp = validParameter.validFile(parameters, "metric", false);    if (temp == "not found"){       temp = "T";                             }
111                         metric = m->isTrue(temp); 
112                         
113                         label = validParameter.validFile(parameters, "label", false);                   
114                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine();  }   
115                         else { m->splitAtDash(label, labels); }
116                         
117                         groups = validParameter.validFile(parameters, "groups", false);                 
118                         if (groups == "not found") { groups = "";  }
119                         else { m->splitAtDash(groups, Groups);  }                       
120                         globaldata->Groups = Groups;                    
121                         
122                 }
123                 
124         }
125         catch(exception& e) {
126                 m->errorOut(e, "PCACommand", "PCACommand");
127                 exit(1);
128         }
129 }
130 //**********************************************************************************************************************
131 void PCACommand::help(){
132         try {
133                 m->mothurOut("The pca command can only be run after a successful read.otu command."); m->mothurOutEndLine();
134                 m->mothurOut("The pca command parameters are label, groups and metric. No parameters are required."); m->mothurOutEndLine();
135                 m->mothurOut("The label parameter is used to analyze specific labels in your input. Default is the first label in your shared or relabund file. Multpile labels may be separated by dashes.\n");
136                 m->mothurOut("The groups parameter allows you to specify which groups you would like analyzed. Groupnames are separated by dashes.\n");
137                 m->mothurOut("The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"); m->mothurOutEndLine();
138                 m->mothurOut("Example pca(groups=yourGroups).\n");
139                 m->mothurOut("Example pca(groups=A-B-C).\n");
140                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "PCACommand", "help");
144                 exit(1);
145         }
146 }
147 //**********************************************************************************************************************
148 PCACommand::~PCACommand(){}
149 //**********************************************************************************************************************
150 int PCACommand::execute(){
151         try {
152                 
153                 if (abort == true) { return 0; }
154                 
155                 cout.setf(ios::fixed, ios::floatfield);
156                 cout.setf(ios::showpoint);
157                 cerr.setf(ios::fixed, ios::floatfield);
158                 cerr.setf(ios::showpoint);
159                 
160                 //get first line of shared file
161                 vector< vector<double> > matrix;
162                 InputData* input;
163                 if (mode == "shared")                   {  
164                         input = new InputData(inputFile, "sharedfile");
165                 }else if (mode == "relabund")   { 
166                         input = new InputData(inputFile, "relabund");
167                 }else {  m->mothurOut("[ERROR]: filetype not recognized."); m->mothurOutEndLine();  return 0; }
168                 
169                 vector<SharedRAbundFloatVector*> lookupFloat = input->getSharedRAbundFloatVectors();
170                 string lastLabel = lookupFloat[0]->getLabel();
171                         
172                 set<string> processedLabels;
173                 set<string> userLabels = labels;
174                 
175                 //if the user gave no labels, then use the first one read
176                 if (labels.size() == 0) { 
177                         label = lastLabel;  
178                         
179                         process(lookupFloat);
180                 }
181                 
182                 //as long as you are not at the end of the file or done wih the lines you want
183                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
184                         
185                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  lookupFloat.clear(); return 0;  }
186                         
187                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
188                                 processedLabels.insert(lookupFloat[0]->getLabel());
189                                 userLabels.erase(lookupFloat[0]->getLabel());
190                                 
191                                 process(lookupFloat);
192                         }
193                         
194                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
195                                 string saveLabel = lookupFloat[0]->getLabel();
196                                 
197                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  lookupFloat.clear();
198                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
199                                 
200                                 process(lookupFloat);
201                                 
202                                 processedLabels.insert(lookupFloat[0]->getLabel());
203                                 userLabels.erase(lookupFloat[0]->getLabel());
204                                 
205                                 //restore real lastlabel to save below
206                                 lookupFloat[0]->setLabel(saveLabel);
207                         }
208                         
209                         lastLabel = lookupFloat[0]->getLabel();                 
210                         
211                         //get next line to process
212                         //prevent memory leak
213                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } lookupFloat.clear();
214                         lookupFloat = input->getSharedRAbundFloatVectors();
215                 }
216                 
217                 
218                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } delete input; for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } lookupFloat.clear(); return 0;  }
219                 
220                 //output error messages about any remaining user labels
221                 set<string>::iterator it;
222                 bool needToRun = false;
223                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
224                         m->mothurOut("Your file does not include the label " + *it); 
225                         if (processedLabels.count(lastLabel) != 1) {
226                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
227                                 needToRun = true;
228                         }else {
229                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
230                         }
231                 }
232                 
233                 //run last label if you need to
234                 if (needToRun == true)  {
235                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } }  lookupFloat.clear();
236                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
237                         
238                         process(lookupFloat);
239                         
240                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } lookupFloat.clear();
241                 }       
242                 
243                 for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } } lookupFloat.clear();
244                 delete input;
245                 
246                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
247                 
248                 m->mothurOutEndLine();
249                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
250                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
251                 m->mothurOutEndLine();
252                 
253                 return 0;
254         }
255         catch(exception& e) {
256                 m->errorOut(e, "PCACommand", "execute");
257                 exit(1);
258         }
259 }
260 //**********************************************************************************************************************
261 vector< vector<double> > PCACommand::createMatrix(vector<SharedRAbundFloatVector*> lookupFloat){
262         try {
263                 vector< vector<double> > matrix; matrix.resize(lookupFloat.size());
264                 
265                 //fill matrix with shared files relative abundances
266                 for (int i = 0; i < lookupFloat.size(); i++) {
267                         for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) {
268                                 matrix[i].push_back(lookupFloat[i]->getAbundance(j));
269                         }
270                 }
271                 
272                 vector< vector<double> > transposeMatrix; transposeMatrix.resize(matrix[0].size());
273                 for (int i = 0; i < transposeMatrix.size(); i++) {
274                         for (int j = 0; j < matrix.size(); j++) {
275                                 transposeMatrix[i].push_back(matrix[j][i]);
276                         }
277                 }
278                 
279                 matrix = linearCalc.matrix_mult(matrix, transposeMatrix);
280                 
281                 return matrix;
282         }
283         catch(exception& e) {
284                 m->errorOut(e, "PCACommand", "createMatrix");   
285                 exit(1);
286         }
287 }
288 //**********************************************************************************************************************
289 int PCACommand::process(vector<SharedRAbundFloatVector*>& lookupFloat){
290         try {
291                 m->mothurOut("\nProcessing " + lookupFloat[0]->getLabel()); m->mothurOutEndLine();
292                 
293                 vector< vector<double> > matrix; matrix.resize(lookupFloat.size());
294                 
295                 //fill matrix with shared files relative abundances
296                 for (int i = 0; i < lookupFloat.size(); i++) {
297                         for (int j = 0; j < lookupFloat[i]->getNumBins(); j++) {
298                                 matrix[i].push_back(lookupFloat[i]->getAbundance(j));
299                         }
300                 }
301                 
302                 vector< vector<double> > transposeMatrix; transposeMatrix.resize(matrix[0].size());
303                 for (int i = 0; i < transposeMatrix.size(); i++) {
304                         for (int j = 0; j < matrix.size(); j++) {
305                                 transposeMatrix[i].push_back(matrix[j][i]);
306                         }
307                 }
308                 
309                 matrix = linearCalc.matrix_mult(matrix, transposeMatrix);               
310                 
311                 double offset = 0.0000;
312                 vector<double> d;
313                 vector<double> e;
314                 vector<vector<double> > G = matrix;
315                 vector<vector<double> > copy_G;
316                         
317                 for(int count=0;count<2;count++){
318                         linearCalc.tred2(G, d, e);                              if (m->control_pressed) { return 0; }
319                         linearCalc.qtli(d, e, G);                               if (m->control_pressed) { return 0; }
320                         offset = d[d.size()-1];
321                         if(offset > 0.0) break;
322                 } 
323                 
324                 if (m->control_pressed) { return 0; }
325                 
326                 string fbase = outputDir + m->getRootName(m->getSimpleName(inputFile));
327                 string outputFileName = fbase + lookupFloat[0]->getLabel();
328                 output(outputFileName, globaldata->Groups, G, d);
329                 
330                 if (metric) {   
331                         
332                         for (int i = 1; i < 4; i++) {
333                                 
334                                 vector< vector<double> > EuclidDists = linearCalc.calculateEuclidianDistance(G, i); //G is the pcoa file
335                                 
336                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
337                                 
338                                 double corr = linearCalc.calcPearson(EuclidDists, matrix); //G is the pcoa file, D is the users distance matrix
339                                 
340                                 m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine();
341                                 
342                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
343                         }
344                 }
345                 
346                 return 0;
347         }
348         catch(exception& e) {
349                 m->errorOut(e, "PCACommand", "process");        
350                 exit(1);
351         }
352 }
353 /*********************************************************************************************************************************/
354
355 void PCACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
356         try {
357                 int rank = name_list.size();
358                 double dsum = 0.0000;
359                 for(int i=0;i<rank;i++){
360                         dsum += d[i];
361                         for(int j=0;j<rank;j++){
362                                 if(d[j] >= 0)   {       G[i][j] *= pow(d[j],0.5);       }
363                                 else                    {       G[i][j] = 0.00000;                      }
364                         }
365                 }
366                 
367                 ofstream pcaData((fnameRoot+".pca").c_str(), ios::trunc);
368                 pcaData.setf(ios::fixed, ios::floatfield);
369                 pcaData.setf(ios::showpoint);   
370                 outputNames.push_back(fnameRoot+".pca");
371                 outputTypes["pca"].push_back(fnameRoot+".pca");
372                 
373                 ofstream pcaLoadings((fnameRoot+".pca.loadings").c_str(), ios::trunc);
374                 pcaLoadings.setf(ios::fixed, ios::floatfield);
375                 pcaLoadings.setf(ios::showpoint);
376                 outputNames.push_back(fnameRoot+".pca.loadings");
377                 outputTypes["loadings"].push_back(fnameRoot+".pca.loadings");   
378                 
379                 pcaLoadings << "axis\tloading\n";
380                 for(int i=0;i<rank;i++){
381                         pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl;
382                 }
383                 
384                 pcaData << "group";
385                 for(int i=0;i<rank;i++){
386                         pcaData << '\t' << "axis" << i+1;
387                 }
388                 pcaData << endl;
389                 
390                 for(int i=0;i<rank;i++){
391                         pcaData << name_list[i] << '\t';
392                         for(int j=0;j<rank;j++){
393                                 pcaData << G[i][j] << '\t';
394                         }
395                         pcaData << endl;
396                 }
397         }
398         catch(exception& e) {
399                 m->errorOut(e, "PCACommand", "output");
400                 exit(1);
401         }
402 }
403 /*********************************************************************************************************************************/
404
405