]> git.donarmstrong.com Git - mothur.git/commitdiff
working on nmds
authorwestcott <westcott>
Fri, 21 Jan 2011 12:54:26 +0000 (12:54 +0000)
committerwestcott <westcott>
Fri, 21 Jan 2011 12:54:26 +0000 (12:54 +0000)
linearalgebra.cpp
nmdscommand.cpp
pcacommand.cpp
pcoacommand.cpp

index 6b56597f10501b4720d77b28ca08961d2c35554e..39e0848003245351d86e9ed1fe1b1c238ff77213 100644 (file)
@@ -329,45 +329,79 @@ vector< vector<double> > LinearAlgebra::calculateEuclidianDistance(vector< vecto
 double LinearAlgebra::calcPearson(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
        try {
                
+       /*      euclidDists.clear();
+               userDists.clear();
+               
+               euclidDists.resize(1);
+               userDists.resize(1);
+               
+               userDists[0].push_back(0.3070833);
+               userDists[0].push_back(0.3244475);
+               userDists[0].push_back(0.6055993);
+               userDists[0].push_back(0.3372481);
+               userDists[0].push_back(0.9151715);
+               userDists[0].push_back(0.6182255);
+               userDists[0].push_back(0.7748142);
+               userDists[0].push_back(0.08554735);
+               userDists[0].push_back(0.6343481);
+               userDists[0].push_back(0.4049274);
+               
+               euclidDists[0].push_back(0.3342815);
+               euclidDists[0].push_back(0.3173829);
+               euclidDists[0].push_back(0.6852404);
+               euclidDists[0].push_back(0.7819186);
+               euclidDists[0].push_back(0.5705242);
+               euclidDists[0].push_back(0.8007263);
+               euclidDists[0].push_back(0.8561724);
+               euclidDists[0].push_back(0.4901089);
+               euclidDists[0].push_back(0.7027247);
+               euclidDists[0].push_back(0.7669696);*/
+               
+               
                //find average for - X
+               int count = 0;
                vector<float> averageEuclid; averageEuclid.resize(euclidDists.size(), 0.0);
                for (int i = 0; i < euclidDists.size(); i++) {
                        for (int j = 0; j < euclidDists[i].size(); j++) {
                                averageEuclid[i] += euclidDists[i][j];  
+                               count++;
                        }
                }
-               for (int i = 0; i < averageEuclid.size(); i++) {  averageEuclid[i] = averageEuclid[i] / (float) euclidDists.size();   }
-               
+               for (int i = 0; i < averageEuclid.size(); i++) {  averageEuclid[i] = averageEuclid[i] / (float) count;   }
+                       
                //find average for - Y
+               count = 0;
                vector<float> averageUser; averageUser.resize(userDists.size(), 0.0);
                for (int i = 0; i < userDists.size(); i++) {
                        for (int j = 0; j < userDists[i].size(); j++) {
-                               averageUser[i] += userDists[i][j];  
+                               averageUser[i] += userDists[i][j]; 
+                               count++;
                        }
                }
-               for (int i = 0; i < averageUser.size(); i++) {  averageUser[i] = averageUser[i] / (float) userDists.size();  }
-               
+               for (int i = 0; i < averageUser.size(); i++) {  averageUser[i] = averageUser[i] / (float) count;  }
+
                double numerator = 0.0;
                double denomTerm1 = 0.0;
                double denomTerm2 = 0.0;
                
                for (int i = 0; i < euclidDists.size(); i++) {
                        
-                       for (int k = 0; k < i; k++) {
+                       for (int k = 0; k < euclidDists[i].size(); k++) {
                                
                                float Yi = userDists[i][k];
                                float Xi = euclidDists[i][k];
                                
-                               numerator += ((Xi - averageEuclid[k]) * (Yi - averageUser[k]));
-                               denomTerm1 += ((Xi - averageEuclid[k]) * (Xi - averageEuclid[k]));
-                               denomTerm2 += ((Yi - averageUser[k]) * (Yi - averageUser[k]));
+                               numerator += ((Xi - averageEuclid[i]) * (Yi - averageUser[i]));
+                               denomTerm1 += ((Xi - averageEuclid[i]) * (Xi - averageEuclid[i]));
+                               denomTerm2 += ((Yi - averageUser[i]) * (Yi - averageUser[i]));
                        }
                }
                
                double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
                double r = numerator / denom;
-               
+
                return r;
+               
        }
        catch(exception& e) {
                m->errorOut(e, "LinearAlgebra", "calculateEuclidianDistance");
index cfd70c2b246beb62da1d651c0b23f2a6005f2ae6..4adb57c6147c544ab9284996d85a8bf4db9e55e2 100644 (file)
@@ -30,6 +30,7 @@ NMDSCommand::NMDSCommand(){
                vector<string> tempOutNames;
                outputTypes["nmds"] = tempOutNames;
                outputTypes["stress"] = tempOutNames;
+               outputTypes["iters"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "NMDSCommand", "NMDSCommand");
@@ -108,6 +109,7 @@ NMDSCommand::NMDSCommand(string option)  {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["nmds"] = tempOutNames;
+                       outputTypes["iters"] = tempOutNames;
                        outputTypes["stress"] = tempOutNames;
                        
                        //required parameters
@@ -193,25 +195,28 @@ int NMDSCommand::execute(){
                vector< vector<double> > axes;
                if (axesfile != "") {  axes = readAxes(names);          }
                
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.iters";
+               string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "stress.nmds";
+               outputNames.push_back(outputFileName); outputTypes["iters"].push_back(outputFileName);
+               outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
+               
+               ofstream out, out2;
+               m->openOutputFile(outputFileName, out);
+               m->openOutputFile(stressFileName, out2);
+               
+               out2.setf(ios::fixed, ios::floatfield);
+               out2.setf(ios::showpoint);
+               out.setf(ios::fixed, ios::floatfield);
+               out.setf(ios::showpoint);
+               
+               out2 << "Dimension\tIter\tStress\tCorr" << endl;
+               
+               double bestStress = 10000000;
+               vector< vector<double> > bestConfig;
+               
                for (int i = mindim; i <= maxdim; i++) {
                        m->mothurOut("Processing Dimension: " + toString(i)); m->mothurOutEndLine();
                        
-                       string outputFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".nmds";
-                       string stressFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "dim" + toString(i) + ".stress.nmds";
-                       outputNames.push_back(outputFileName); outputTypes["nmds"].push_back(outputFileName);
-                       outputNames.push_back(stressFileName); outputTypes["stress"].push_back(stressFileName);
-                       
-                       ofstream out, out2;
-                       m->openOutputFile(outputFileName, out);
-                       m->openOutputFile(stressFileName, out2);
-                       
-                       out2.setf(ios::fixed, ios::floatfield);
-                       out2.setf(ios::showpoint);
-                       out.setf(ios::fixed, ios::floatfield);
-                       out.setf(ios::showpoint);
-                       
-                       out2 << "Iter\tStress\tCorr" << endl;
-                       
                        for (int j = 0; j < iters; j++) {
                                m->mothurOut(toString(j+1)); m->mothurOutEndLine(); 
                                
@@ -231,25 +236,47 @@ int NMDSCommand::execute(){
                                if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
                                
                                //calc correlation between original distances and euclidean distances from this config
-                               double corr = linearCalc.calcPearson(matrix, newEuclid);
+                               double corr = linearCalc.calcPearson(newEuclid, matrix);
                                corr *= corr;
                                if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
                                
                                //output results
                                out << "Config" << (j+1) << '\t';
-                               for (int k = 0; k < i; k++) { out << "X" << (k+1) << '\t'; }
+                               for (int k = 0; k < i; k++) { out << "axis" << (k+1) << '\t'; }
                                out << endl;
-                               out2 << (j+1) << '\t' << stress << '\t' << corr << endl;
+                               out2 << i << '\t' << (j+1) << '\t' << stress << '\t' << corr << endl;
                                
                                output(endConfig, names, out);
                                
+                               //save best
+                               if (stress < bestStress) {
+                                       bestStress = stress;
+                                       bestConfig = endConfig;
+                               }
+                               
                                if (m->control_pressed) { out.close(); out2.close(); for (int k = 0; k < outputNames.size(); k++) {     remove(outputNames[k].c_str()); } return 0; }
-
                        }
-                       
-                       out.close(); out2.close();
                }
                
+               out.close(); out2.close();
+               
+               //output best config
+               string BestFileName = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "nmds.axes";
+               outputNames.push_back(BestFileName); outputTypes["nmds"].push_back(BestFileName);
+               
+               ofstream outBest;
+               m->openOutputFile(BestFileName, outBest);
+               outBest.setf(ios::fixed, ios::floatfield);
+               outBest.setf(ios::showpoint);
+               
+               outBest << '\t';
+               for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; }
+               outBest << endl;
+               
+               output(bestConfig, names, outBest);
+               
+               outBest.close();
+               
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
                
                m->mothurOutEndLine();
index 0842c53163a51da38800598281cde5c30d3e6778..89e1cb71f282aa2f2606b84ae3aba03da5c74f6b 100644 (file)
@@ -364,11 +364,11 @@ void PCACommand::output(string fnameRoot, vector<string> name_list, vector<vecto
                        }
                }
                
-               ofstream pcaData((fnameRoot+".pca").c_str(), ios::trunc);
+               ofstream pcaData((fnameRoot+".pca.axes").c_str(), ios::trunc);
                pcaData.setf(ios::fixed, ios::floatfield);
                pcaData.setf(ios::showpoint);   
-               outputNames.push_back(fnameRoot+".pca");
-               outputTypes["pca"].push_back(fnameRoot+".pca");
+               outputNames.push_back(fnameRoot+".pca.axes");
+               outputTypes["pca"].push_back(fnameRoot+".pca.axes");
                
                ofstream pcaLoadings((fnameRoot+".pca.loadings").c_str(), ios::trunc);
                pcaLoadings.setf(ios::fixed, ios::floatfield);
index 6777b5a56cb1182654c66bf6d065f9f973a10525..fbcb7e08779d14f28277d8b5e8d0a8ce3b088174 100644 (file)
@@ -277,11 +277,11 @@ void PCOACommand::output(string fnameRoot, vector<string> name_list, vector<vect
                        }
                }
                
-               ofstream pcaData((fnameRoot+"pcoa").c_str(), ios::trunc);
+               ofstream pcaData((fnameRoot+"pcoa.axes").c_str(), ios::trunc);
                pcaData.setf(ios::fixed, ios::floatfield);
                pcaData.setf(ios::showpoint);   
-               outputNames.push_back(fnameRoot+"pcoa");
-               outputTypes["pcoa"].push_back(fnameRoot+"pcoa");
+               outputNames.push_back(fnameRoot+"pcoa.axes");
+               outputTypes["pcoa"].push_back(fnameRoot+"pcoa.axes");
                
                ofstream pcaLoadings((fnameRoot+"pcoa.loadings").c_str(), ios::trunc);
                pcaLoadings.setf(ios::fixed, ios::floatfield);