]> git.donarmstrong.com Git - mothur.git/blobdiff - kruskalwalliscommand.cpp
changes while testing
[mothur.git] / kruskalwalliscommand.cpp
index 263b1c55c1517171f02b70a6d5232277d4ee9861..bf4fafa950f2f180227f61ab41e1377008927c82 100644 (file)
@@ -4,20 +4,16 @@
  *
  * Created on June 26, 2012, 11:06 AM
  */
+
 #include "kruskalwalliscommand.h"
 
-//**********************************************************************************************************************
-class groupRank {
-public:
-    string group;
-    double value;
-    double rank;
-};
 //**********************************************************************************************************************
 vector<string> KruskalWallisCommand::setParameters(){  
        try {
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+        CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pgroups);
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);     
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -42,24 +38,19 @@ string KruskalWallisCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string KruskalWallisCommand::getOutputFileNameTag(string type, string inputName=""){   
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
-        
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "summary") {  outputFileName =  "cooccurence.summary"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "KruskalWallisCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+string KruskalWallisCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "summary") {  pattern = "[filename],cooccurence.summary"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "KruskalWallisCommand", "getOutputPattern");
+        exit(1);
+    }
 }
 //**********************************************************************************************************************
 KruskalWallisCommand::KruskalWallisCommand(){  
@@ -97,8 +88,27 @@ KruskalWallisCommand::KruskalWallisCommand(string option) {
                        for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
-
-                       
+            
+            //get shared file
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
+                       else if (sharedfile == "not found") { 
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile(); 
+                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setSharedFile(sharedfile); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
+                    
+            groups = validParameter.validFile(parameters, "groups", false);   
+            if (groups == "not found") { groups = "";   }
+            else { 
+            m->splitAtDash(groups, Groups); 
+            }   
+            m->setGroups(Groups);
+                               
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
@@ -130,14 +140,56 @@ int KruskalWallisCommand::execute(){
        try {
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
+        InputData* input = new InputData(sharedfile, "sharedfile");
+        vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
+        
+       
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+
+        ofstream out;
+        map<string,string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+               string outputFileName = getOutputFileName("summary",variables);
+        m->openOutputFile(outputFileName, out);
+        outputNames.push_back(outputFileName);  outputTypes["summary"].push_back(outputFileName);
+        out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+        out << "H\tpvalue\n";
+        
         //math goes here
         
-        int N;
-        double ss, H;
+        int N = m->getNumGroups();
+        double H;
         double tmp = 0.0;
+        vector<groupRank> vec;
+        vector<string> groups = m->getGroups();
+        string group;
+        int count;
+        double sum;
                 
         //merge all groups into a vector
+        
+        
+        
         //rank function here
+        assignRank(vec);
+        
+        //populate counts and ranSums vectors
+        for (int i=0;i<N;i++) {
+            count = 0;
+            sum = 0;
+            group = groups[i];
+            for(int j;j<vec.size();j++) {
+                if (vec[j].group == group) {
+                    count++;
+                    sum = sum + vec[j].rank;
+                }
+            }
+            counts[i] = count;
+            rankSums[i] = sum;
+        }
         
         //test statistic
         for (int i=0;i<N;i++) { tmp = tmp + (pow(rankSums[i],2) / counts[i]); }
@@ -152,8 +204,6 @@ int KruskalWallisCommand::execute(){
         
         //p-value calculation
         
-        
-        
                return 0;
        }
        catch(exception& e) {
@@ -162,22 +212,40 @@ int KruskalWallisCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-multimap<double,double> KruskalWallisCommand::getRank(vector<groupRank> vec) {
+void KruskalWallisCommand::assignRank(vector<groupRank> &vec) {
     try {
-        multimap<double,double> rankMap;
         double rank = 1;
-        double previous;
-        double tie = 0.0;
-        int tiecount = 0;
+        double numRanks, avgRank, j;
+        vector<groupRank>::iterator it, oldit;
 
-        sort (vec.begin(), vec.end());
+        sort (vec.begin(), vec.end(), comparevalue);
+
+        it = vec.begin();
+
+        while ( it != vec.end() ) {
+            j = rank;
+            oldit = it;
+            if (!equalvalue(*it, *(it+1))) {
+                (*it).rank = rank; 
+                rank = rank+1; 
+                it++; }
+            else {
+                while(equalrank(*it, *(it+1))) {
+                    j = j + (j+1);
+                    rank++;
+                    it++;
+                }
+                numRanks = double (distance(oldit, it));
+                avgRank = j / numRanks;
+                while(oldit != it) {
+                    (*oldit).rank = avgRank;
+                    oldit++;
+                }
+            }
 
-        for (int i=0;i<vec.size();i++) {
-            if (vec[i] != previous) { rankMap[rank] = vec[i]; }
-            else {tie = tie + rank; tiecount++;}
-            rank++;
-            previous = vec[i];
         }
+        
+
     }
     catch(exception& e) {
                m->errorOut(e, "KruskalWallisCommand", "getRank");
@@ -186,7 +254,10 @@ multimap<double,double> KruskalWallisCommand::getRank(vector<groupRank> vec) {
     
 }
 //**********************************************************************************************************************
-
+void KruskalWallisCommand::assignValue(vector<groupRank> &vec) {
+    
+}
 //**********************************************************************************************************************
 //**********************************************************************************************************************
-//**********************************************************************************************************************
\ No newline at end of file
+//**********************************************************************************************************************
+