6 * Created by westcott on 1/4/10.
7 * Copyright 2010 Schloss Lab. All rights reserved.
11 #include "pcoacommand.h"
13 //**********************************************************************************************************************
14 vector<string> PCOACommand::getValidParameters(){
16 string Array[] = {"phylip", "metric","outputdir","inputdir"};
17 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
21 m->errorOut(e, "PCOACommand", "getValidParameters");
25 //**********************************************************************************************************************
26 PCOACommand::PCOACommand(){
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["pcoa"] = tempOutNames;
32 outputTypes["loadings"] = tempOutNames;
35 m->errorOut(e, "PCOACommand", "PCOACommand");
39 //**********************************************************************************************************************
40 vector<string> PCOACommand::getRequiredParameters(){
42 string Array[] = {"phylip"};
43 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
47 m->errorOut(e, "PCOACommand", "getRequiredParameters");
51 //**********************************************************************************************************************
52 vector<string> PCOACommand::getRequiredFiles(){
54 vector<string> myArray;
58 m->errorOut(e, "PCOACommand", "getRequiredFiles");
62 //**********************************************************************************************************************
64 PCOACommand::PCOACommand(string option) {
68 //allow user to run help
69 if(option == "help") { help(); abort = true; }
72 //valid paramters for this command
73 string Array[] = {"phylip","metric","outputdir", "inputdir"};
74 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
76 OptionParser parser(option);
77 map<string, string> parameters = parser. getParameters();
79 ValidParameters validParameter;
80 map<string, string>::iterator it;
82 //check to make sure all parameters are valid for command
83 for (it = parameters.begin(); it != parameters.end(); it++) {
84 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
86 //if the user changes the input directory command factory will send this info to us in the output parameter
87 string inputDir = validParameter.validFile(parameters, "inputdir", false);
88 if (inputDir == "not found"){ inputDir = ""; }
91 it = parameters.find("phylip");
92 //user has given a template file
93 if(it != parameters.end()){
94 path = m->hasPath(it->second);
95 //if the user has not given a path then, add inputdir. else leave path alone.
96 if (path == "") { parameters["phylip"] = inputDir + it->second; }
100 //initialize outputTypes
101 vector<string> tempOutNames;
102 outputTypes["pcoa"] = tempOutNames;
103 outputTypes["loadings"] = tempOutNames;
105 //required parameters
106 phylipfile = validParameter.validFile(parameters, "phylip", true);
107 if (phylipfile == "not open") { abort = true; }
108 else if (phylipfile == "not found") { phylipfile = ""; abort = true; }
109 else { filename = phylipfile; }
111 //if the user changes the output directory command factory will send this info to us in the output parameter
112 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
114 outputDir += m->hasPath(phylipfile); //if user entered a file with a path then preserve it
117 //error checking on files
118 if (phylipfile == "") { m->mothurOut("You must provide a distance file before running the pcoa command."); m->mothurOutEndLine(); abort = true; }
120 string temp = validParameter.validFile(parameters, "metric", false); if (temp == "not found"){ temp = "T"; }
121 metric = m->isTrue(temp);
125 catch(exception& e) {
126 m->errorOut(e, "PCOACommand", "PCOACommand");
130 //**********************************************************************************************************************
131 void PCOACommand::help(){
134 m->mothurOut("The pcoa command parameters are phylip and metric"); m->mothurOutEndLine();
135 m->mothurOut("The phylip parameter allows you to enter your distance file."); m->mothurOutEndLine();
136 m->mothurOut("The metric parameter allows indicate you if would like the pearson correlation coefficient calculated. Default=True"); m->mothurOutEndLine();
137 m->mothurOut("Example pcoa(phylip=yourDistanceFile).\n");
138 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourDistanceFile).\n\n");
140 catch(exception& e) {
141 m->errorOut(e, "PCOACommand", "help");
145 //**********************************************************************************************************************
146 PCOACommand::~PCOACommand(){}
147 //**********************************************************************************************************************
148 int PCOACommand::execute(){
151 if (abort == true) { return 0; }
153 cout.setf(ios::fixed, ios::floatfield);
154 cout.setf(ios::showpoint);
155 cerr.setf(ios::fixed, ios::floatfield);
156 cerr.setf(ios::showpoint);
158 vector<string> names;
159 vector<vector<double> > D;
161 fbase = outputDir + m->getRootName(m->getSimpleName(filename));
163 read(filename, names, D);
165 if (m->control_pressed) { return 0; }
167 double offset = 0.0000;
170 vector<vector<double> > G = D;
171 vector<vector<double> > copy_G;
173 m->mothurOut("\nProcessing...\n\n");
175 for(int count=0;count<2;count++){
176 recenter(offset, D, G); if (m->control_pressed) { return 0; }
177 linearCalc.tred2(G, d, e); if (m->control_pressed) { return 0; }
178 linearCalc.qtli(d, e, G); if (m->control_pressed) { return 0; }
179 offset = d[d.size()-1];
180 if(offset > 0.0) break;
183 if (m->control_pressed) { return 0; }
185 output(fbase, names, G, d);
187 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
191 for (int i = 1; i < 4; i++) {
193 vector< vector<double> > EuclidDists = linearCalc.calculateEuclidianDistance(G, i); //G is the pcoa file
195 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
197 double corr = linearCalc.calcPearson(EuclidDists, D); //G is the pcoa file, D is the users distance matrix
199 m->mothurOut("Pearson's coefficient using " + toString(i) + " axis: " + toString(corr)); m->mothurOutEndLine();
201 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
205 m->mothurOutEndLine();
206 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
207 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
208 m->mothurOutEndLine();
212 catch(exception& e) {
213 m->errorOut(e, "PCOACommand", "execute");
217 /*********************************************************************************************************************************/
219 void PCOACommand::get_comment(istream& f, char begin, char end){
222 while(d != end){ d = f.get(); }
225 catch(exception& e) {
226 m->errorOut(e, "PCOACommand", "get_comment");
231 /*********************************************************************************************************************************/
233 int PCOACommand::read_phylip(istream& f, int square_m, vector<string>& name_list, vector<vector<double> >& d){
241 name_list.resize(rank);
244 for(int i=0;i<rank;i++)
246 for(int i=0;i<rank;i++) {
248 // cout << i << "\t" << name_list[i] << endl;
249 for(int j=0;j<rank;j++) {
250 if (m->control_pressed) { return 0; }
253 if (d[i][j] == -0.0000)
258 else if(square_m == 2){
259 for(int i=0;i<rank;i++){
264 for(int i=1;i<rank;i++){
267 for(int j=0;j<i;j++){
268 if (m->control_pressed) { return 0; }
270 if (d[i][j] == -0.0000)
279 catch(exception& e) {
280 m->errorOut(e, "PCOACommand", "read_phylip");
286 /*********************************************************************************************************************************/
288 void PCOACommand::read(string fname, vector<string>& names, vector<vector<double> >& D){
291 m->openInputFile(fname, f);
293 //check whether matrix is square
299 f >> numSeqs >> name;
301 while((d=f.get()) != EOF){
303 //is d a number meaning its square
309 //is d a line return meaning its lower triangle
317 //reopen to get back to beginning
318 m->openInputFile(fname, f);
319 read_phylip(f, q, names, D);
321 catch(exception& e) {
322 m->errorOut(e, "PCOACommand", "read");
327 /*********************************************************************************************************************************/
329 void PCOACommand::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
333 vector<vector<double> > A(rank);
334 vector<vector<double> > C(rank);
335 for(int i=0;i<rank;i++){
340 double scale = -1.0000 / (double) rank;
342 for(int i=0;i<rank;i++){
344 C[i][i] = 1.0000 + scale;
345 for(int j=i+1;j<rank;j++){
346 A[i][j] = A[j][i] = -0.5 * D[i][j] * D[i][j] + offset;
347 C[i][j] = C[j][i] = scale;
351 A = linearCalc.matrix_mult(C,A);
352 G = linearCalc.matrix_mult(A,C);
354 catch(exception& e) {
355 m->errorOut(e, "PCOACommand", "recenter");
361 /*********************************************************************************************************************************/
363 void PCOACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
365 int rank = name_list.size();
366 double dsum = 0.0000;
367 for(int i=0;i<rank;i++){
369 for(int j=0;j<rank;j++){
370 if(d[j] >= 0) { G[i][j] *= pow(d[j],0.5); }
371 else { G[i][j] = 0.00000; }
375 ofstream pcaData((fnameRoot+"pcoa").c_str(), ios::trunc);
376 pcaData.setf(ios::fixed, ios::floatfield);
377 pcaData.setf(ios::showpoint);
378 outputNames.push_back(fnameRoot+"pcoa");
379 outputTypes["pcoa"].push_back(fnameRoot+"pcoa");
381 ofstream pcaLoadings((fnameRoot+"pcoa.loadings").c_str(), ios::trunc);
382 pcaLoadings.setf(ios::fixed, ios::floatfield);
383 pcaLoadings.setf(ios::showpoint);
384 outputNames.push_back(fnameRoot+"pcoa.loadings");
385 outputTypes["loadings"].push_back(fnameRoot+"pcoa.loadings");
387 pcaLoadings << "axis\tloading\n";
388 for(int i=0;i<rank;i++){
389 pcaLoadings << i+1 << '\t' << d[i] * 100.0 / dsum << endl;
393 for(int i=0;i<rank;i++){
394 pcaData << '\t' << "axis" << i+1;
398 for(int i=0;i<rank;i++){
399 pcaData << name_list[i] << '\t';
400 for(int j=0;j<rank;j++){
401 pcaData << G[i][j] << '\t';
406 catch(exception& e) {
407 m->errorOut(e, "PCOACommand", "output");
412 /*********************************************************************************************************************************/