]> git.donarmstrong.com Git - mothur.git/commitdiff
added kruskal.wallis command. added worked on make.lefse. working of lefse command...
authorSarah Westcott <mothur.westcott@gmail.com>
Mon, 24 Jun 2013 18:31:29 +0000 (14:31 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Mon, 24 Jun 2013 18:31:29 +0000 (14:31 -0400)
17 files changed:
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
designmap.cpp [new file with mode: 0644]
designmap.h [new file with mode: 0644]
kruskalwalliscommand.cpp
kruskalwalliscommand.h
lefsecommand.cpp
lefsecommand.h
linearalgebra.cpp
linearalgebra.h
makecontigscommand.cpp
makelefsecommand.cpp
mothur.h
mothurout.cpp
mothurout.h
setlogfilecommand.cpp
trimoligos.cpp

index 60a929d4ac7ce6ab268e31344e38bedcc20b65f8..b09bab447e3307bc279189d5e5b5dae30a677e7a 100644 (file)
@@ -54,6 +54,7 @@
                A774104814696F320098E6AC /* myseqdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774104614696F320098E6AC /* myseqdist.cpp */; };
                A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
                A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
+               A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77916E6176F7F7600EEFE18 /* designmap.cpp */; };
                A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
                A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; };
                A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; };
                A77410F514697C300098E6AC /* seqnoise.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqnoise.h; sourceTree = "<group>"; };
                A778FE69134CA6CA00C0BA33 /* getcommandinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcommandinfocommand.h; sourceTree = "<group>"; };
                A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
+               A77916E6176F7F7600EEFE18 /* designmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = designmap.cpp; sourceTree = "<group>"; };
+               A77916E7176F7F7600EEFE18 /* designmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = designmap.h; sourceTree = "<group>"; };
                A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
                A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
                A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparcccommand.h; sourceTree = "<group>"; };
                                A7E9B6BD12D37EC400DA6239 /* database.cpp */,
                                A7E9B6BE12D37EC400DA6239 /* database.hpp */,
                                A7E9B6BF12D37EC400DA6239 /* datavector.hpp */,
+                               A77916E7176F7F7600EEFE18 /* designmap.h */,
+                               A77916E6176F7F7600EEFE18 /* designmap.cpp */,
                                A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */,
                                A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
                                A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */,
                                A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */,
                                A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */,
                                A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */,
+                               A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 407b67473566d880e951e87dabb35982ec367885..41a67297c4199ab819ef7f0498dfd27e6debd515 100644 (file)
 #include "makelookupcommand.h"
 #include "renameseqscommand.h"
 #include "makelefsecommand.h"
+#include "lefsecommand.h"
+#include "kruskalwalliscommand.h"
 
 /*******************************************************/
 
@@ -315,6 +317,8 @@ CommandFactory::CommandFactory(){
     commands["make.lookup"]         = "make.lookup";
     commands["rename.seqs"]         = "rename.seqs";
     commands["make.lefse"]          = "make.lefse";
+    commands["lefse"]               = "lefse";
+    commands["kruskal.wallis"]      = "kruskal.wallis";
     
 
 }
@@ -540,6 +544,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "make.lookup")                  {       command = new MakeLookupCommand(optionString);                          }
         else if(commandName == "rename.seqs")                  {       command = new RenameSeqsCommand(optionString);                          }
         else if(commandName == "make.lefse")                   {       command = new MakeLefseCommand(optionString);                           }
+        else if(commandName == "lefse")                 {      command = new LefseCommand(optionString);                   }
+        else if(commandName == "kruskal.wallis")        {      command = new KruskalWallisCommand(optionString);           }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -706,6 +712,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "make.lookup")                  {       pipecommand = new MakeLookupCommand(optionString);                              }
         else if(commandName == "rename.seqs")                  {       pipecommand = new RenameSeqsCommand(optionString);                              }
         else if(commandName == "make.lefse")                   {       pipecommand = new MakeLefseCommand(optionString);                               }
+        else if(commandName == "lefse")                 {      pipecommand = new LefseCommand(optionString);                   }
+        else if(commandName == "kruskal.wallis")        {      pipecommand = new KruskalWallisCommand(optionString);           }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -858,6 +866,8 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "make.lookup")                  {       shellcommand = new MakeLookupCommand();                         }
         else if(commandName == "rename.seqs")                  {       shellcommand = new RenameSeqsCommand();                         }
         else if(commandName == "make.lefse")                   {       shellcommand = new MakeLefseCommand();                          }
+        else if(commandName == "lefse")                 {      shellcommand = new LefseCommand();                  }
+        else if(commandName == "kruskal.wallis")        {      shellcommand = new KruskalWallisCommand();          }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
diff --git a/designmap.cpp b/designmap.cpp
new file mode 100644 (file)
index 0000000..dff9700
--- /dev/null
@@ -0,0 +1,502 @@
+//
+//  designmap.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 6/17/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "designmap.h"
+
+/************************************************************/
+DesignMap::DesignMap(string file) {
+    try {
+        m = MothurOut::getInstance();
+        defaultClass = "not found";
+        read(file);
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "DesignMap");
+               exit(1);
+       }
+}
+/************************************************************/
+int DesignMap::read(string file) {
+    try {
+        ifstream in;
+        m->openInputFile(file, in);
+        
+        string headers = m->getline(in); m->gobble(in);
+        vector<string> columnHeaders = m->splitWhiteSpace(headers);
+        
+        namesOfCategories.clear();
+        indexCategoryMap.clear();
+        indexNameMap.clear();
+        designMap.clear();
+        map<int, string> originalGroupIndexes;
+        for (int i = 1; i < columnHeaders.size(); i++) {  namesOfCategories.push_back(columnHeaders[i]);  originalGroupIndexes[i-1] = columnHeaders[i]; }
+        if (columnHeaders.size() > 1) { defaultClass = columnHeaders[1]; }
+        
+        //sort groups to keep consistent with how we store the groups in groupmap
+        sort(namesOfCategories.begin(), namesOfCategories.end());
+        for (int i = 0; i < namesOfCategories.size(); i++) {  indexCategoryMap[namesOfCategories[i]] = i; }
+        int numCategories = namesOfCategories.size();
+        
+        bool error = false;
+        string group;
+        totalCategories.resize(numCategories);
+        int count = 0;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            in >> group; m->gobble(in); 
+            if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + "\n"); }
+            
+            //if group info, then read it
+            vector<string> categoryValues; categoryValues.resize(numCategories, "not found");
+            for (int i = 0; i < numCategories; i++) {
+                int thisIndex = indexCategoryMap[originalGroupIndexes[i]]; //find index of this category because we sort the values.
+                string temp = "not found";
+                in >> temp; categoryValues[thisIndex] = temp; m->gobble(in);
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: value = " + temp + "\n"); }
+                
+                //do we have this value for this category already
+                map<string, int>::iterator it = totalCategories[thisIndex].find(temp);
+                if (it == totalCategories[thisIndex].end()) { totalCategories[thisIndex][temp] = 1; }
+                else {  totalCategories[thisIndex][temp]++; }
+            }
+                           
+            
+            map<string, int>::iterator it = indexNameMap.find(group);
+            if (it == indexNameMap.end()) {
+                indexNameMap[group] = count;
+                designMap.push_back(categoryValues);
+                count++;
+            }else {
+                error = true;
+                m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct."); m->mothurOutEndLine();
+            }
+        }
+        in.close();
+        
+        //sanity check
+        for (int i = 0; i < totalCategories.size(); i++) {
+            map<string, int>::iterator it = totalCategories[i].find(namesOfCategories[i]);
+            if (it != totalCategories[i].end()) { //we may have an old style design file since category name matches a value
+                m->mothurOut("\n[WARNING]: Your design file has a category and value for that category named " + namesOfCategories[i] + ". Perhaps you are using an old style design file without headers? If so, please correct."); m->mothurOutEndLine();
+            }
+        }
+        
+        if (error) { m->control_pressed = true; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "read");
+               exit(1);
+       }
+}
+/************************************************************/
+////groupName, returns first categories value. 
+string DesignMap::get(string groupName) {
+    try {
+        string value = "not found";
+        
+        map<string, int>::iterator it2 = indexNameMap.find(groupName);
+        if (it2 == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+        }else {
+            return designMap[it2->second][0];
+        }
+       
+        return value;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "get");
+               exit(1);
+       }
+}
+/************************************************************/
+////categoryName, returns category values.
+vector<string> DesignMap::getValues(string catName) {
+    try {
+        vector<string> values;
+        
+        map<string, int>::iterator it2 = indexCategoryMap.find(catName);
+        if (it2 == indexCategoryMap.end()) {
+            m->mothurOut("[ERROR]: category " + catName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+        }else {
+            for (map<string, int>::iterator it = totalCategories[it2->second].begin(); it != totalCategories[it2->second].end(); it++) {
+                values.push_back(it->first);
+            }
+        }
+        
+        return values;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "getValues");
+               exit(1);
+       }
+}
+
+/************************************************************/
+////groupName, category returns value. example F000132, sex -> male
+string DesignMap::get(string groupName, string categoryName) {
+    try {
+        string value = "not found";
+        map<string, int>::iterator it = indexCategoryMap.find(categoryName);
+        if (it == indexCategoryMap.end()) {
+            m->mothurOut("[ERROR]: category " + categoryName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+        }else {
+            map<string, int>::iterator it2 = indexNameMap.find(groupName);
+            if (it2 == indexNameMap.end()) {
+                m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+            }else {
+                return designMap[it2->second][it->second];
+            }
+        }
+        return value;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "get");
+               exit(1);
+       }
+}
+/************************************************************/
+//add group, assumes order is correct
+int DesignMap::push_back(string group, vector<string> values) {
+    try {
+        map<string, int>::iterator it = indexNameMap.find(group);
+        if (it == indexNameMap.end()) {
+            if (values.size() != getNumCategories()) {  m->mothurOut("[ERROR]: Your design file has a " + toString(getNumCategories()) + " categories and " + group + " has " + toString(values.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true;  return 0; }
+            
+            for (int i = 0; i < values.size(); i++) {
+                //do we have this value for this category already
+                map<string, int>::iterator it = totalCategories[i].find(values[i]);
+                if (it == totalCategories[i].end()) { totalCategories[i][values[i]] = 1; }
+                else {  totalCategories[i][values[i]]++; }
+            }
+            int count = indexNameMap.size();
+            indexNameMap[group] = count;
+            designMap.push_back(values);
+        }else {
+            m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "push_back");
+               exit(1);
+       }
+}
+/************************************************************/
+//set values for group, does not need to set all values. assumes group is in table already
+int DesignMap::set(string group, map<string, string> values) {
+    try {
+        map<string, int>::iterator it = indexNameMap.find(group);
+        if (it != indexNameMap.end()) {
+            for (map<string, string>::iterator it2 = values.begin(); it2 != values.end(); it2++) {
+                
+                map<string, int>::iterator it3 = indexCategoryMap.find(it2->first); //do we have this category
+                if (it3 == indexCategoryMap.end()) {
+                     m->mothurOut("[ERROR]: Your design file does not contain a category called " + it2->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+                }else {
+                    string oldCategory = designMap[it->second][it3->second];
+                    //adjust totals for old category
+                    int oldCount = totalCategories[it3->second][oldCategory];
+                    if (oldCount == 1) { totalCategories[it3->second].erase(oldCategory); }
+                    else {  totalCategories[it3->second][oldCategory]--; }
+                    
+                    designMap[it->second][it3->second] = it2->second; //reset value
+                    
+                    //adjust totals for new category
+                    map<string, int>::iterator it4 = totalCategories[it3->second].find(it2->second);
+                    if (it4 == totalCategories[it3->second].end()) { totalCategories[it3->second][it2->second] = 1; }
+                    else {  totalCategories[it3->second][it2->second]++; }
+                }
+            }
+        }else {
+            m->mothurOut("[ERROR]: Your design file does not contain a group named " + group + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "set");
+               exit(1);
+       }
+}
+/************************************************************/
+//get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+//  map<treatment - > early, late>, <sex -> male> would return 1. Only one group is male and from early or late.
+int DesignMap::getNumUnique(map<string, vector<string> > selected) {
+    try {
+        int num = 0;
+        
+        //get indexes of categories
+        vector<int> indexes;
+        for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+            map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+            if (it2 == indexCategoryMap.end()) {
+                m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0;
+            }else { indexes.push_back(it2->second); }
+        }
+        
+        for (int i = 0; i < designMap.size(); i++) {
+            bool hasAll = true; //innocent til proven guilty
+            int count = 0;
+            for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+                //check category to see if this group meets the requirements
+                if (!m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAll = false; it = selected.end(); }
+                count++;
+            }
+            if (hasAll) { num++; }
+        }
+        
+        return num;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "getNumUnique");
+               exit(1);
+       }
+}
+/************************************************************/
+//get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+//  map<treatment - > early, late>, <sex -> male> would return 3. All three group have are either male or from early or late.
+int DesignMap::getNumShared(map<string, vector<string> > selected) {
+    try {
+        int num = 0;
+        
+        //get indexes of categories
+        vector<int> indexes;
+        for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+            map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+            if (it2 == indexCategoryMap.end()) {
+                m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0;
+            }else { indexes.push_back(it2->second); }
+        }
+        
+        for (int i = 0; i < designMap.size(); i++) {
+            bool hasAny = false; //guilty til proven innocent
+            int count = 0;
+            for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+                //check category to see if this group meets the requirements
+                if (m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAny = true; it = selected.end(); }
+                count++;
+            }
+            if (hasAny) { num++; }
+        }
+
+        
+        return num;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "getNumShared");
+               exit(1);
+       }
+}
+
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+//  map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+vector<string> DesignMap::getNamesUnique(map<string, vector<string> > selected) {
+    try {
+        vector<string> names;
+        
+        //get indexes of categories
+        vector<int> indexes;
+        for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+            map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+            if (it2 == indexCategoryMap.end()) {
+                m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+            }else { indexes.push_back(it2->second); }
+        }
+        
+        //map int to name
+        map<int, string> reverse;
+        for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+            reverse[it->second] = it->first;
+        }
+        
+        for (int i = 0; i < designMap.size(); i++) {
+            bool hasAll = true; //innocent til proven guilty
+            int count = 0;
+            for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+                //check category to see if this group meets the requirements
+                if (!m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAll = false; it = selected.end(); }
+                count++;
+            }
+            if (hasAll) {
+                map<int, string>::iterator it = reverse.find(i);
+                if (it == reverse.end()) {
+                    m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+                }else { names.push_back(it->second); }
+            }
+        }
+
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "getNamesUnique");
+               exit(1);
+       }
+}
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+//  map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+vector<string> DesignMap::getNamesShared(map<string, vector<string> > selected) {
+    try {
+        vector<string> names;
+        
+        //get indexes of categories
+        vector<int> indexes;
+        for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+            map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+            if (it2 == indexCategoryMap.end()) {
+                m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+            }else { indexes.push_back(it2->second); }
+        }
+        
+        //map int to name
+        map<int, string> reverse;
+        for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+            reverse[it->second] = it->first;
+        }
+        
+        for (int i = 0; i < designMap.size(); i++) {
+            bool hasAny = false; 
+            int count = 0;
+            for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+                //check category to see if this group meets the requirements
+                if (m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAny = true; it = selected.end(); }
+                count++;
+            }
+            if (hasAny) {
+                map<int, string>::iterator it = reverse.find(i);
+                if (it == reverse.end()) {
+                    m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+                }else { names.push_back(it->second); }
+            }
+        }
+        
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "getNamesShared");
+               exit(1);
+       }
+}
+
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+//  map<treatment - > early, late>, <sex -> male> would return F000132, F000142, F000138. All three group have are either male or from early or late.
+
+vector<string> DesignMap::getNames(string category, string value) {
+    try {
+        vector<string> names;
+        
+        map<string, int>::iterator it = indexCategoryMap.find(category);
+        if (it == indexCategoryMap.end()) {
+            m->mothurOut("[ERROR]: category " + category + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+        }else {
+            int column = it->second;
+            
+            //map int to name
+            map<int, string> reverse;
+            for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
+                reverse[it2->second] = it2->first;
+            }
+            
+            for (int i = 0; i < designMap.size(); i++) {
+                if (designMap[i][column] == value) {
+                    map<int, string>::iterator it2 = reverse.find(i);
+                    if (it2 == reverse.end()) {
+                        m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+                    }else { names.push_back(it2->second); }
+                }
+            }
+        }
+        return names;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "getNames");
+               exit(1);
+       }
+}
+
+/************************************************************/
+int DesignMap::print(ofstream& out) {
+    try {
+       
+               out << "group\t";
+        for (int i = 0; i < namesOfCategories.size(); i++) { out << namesOfCategories[i] << '\t'; }
+        out << endl;
+        
+        map<int, string> reverse; //use this to preserve order
+        for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
+        
+        for (int i = 0; i < designMap.size(); i++) {
+            map<int, string>::iterator itR = reverse.find(i);
+            
+            if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+                out << itR->second  << '\t';
+                
+                for (int j = 0; j < namesOfCategories.size(); j++) {
+                    out << designMap[i][j] << '\t';
+                }
+                out << endl;
+            }
+        }
+        out.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "print");
+               exit(1);
+       }
+}
+/************************************************************/
+//print specific categories
+int DesignMap::print(ofstream& out, vector<string> cats) {
+    try {
+        
+               out << "group\t";
+        for (int i = 0; i < namesOfCategories.size(); i++) { if (m->inUsersGroups(namesOfCategories[i], cats)) { out << namesOfCategories[i] << '\t'; } }
+        out << endl;
+        
+        map<int, string> reverse; //use this to preserve order
+        for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
+        
+        for (int i = 0; i < designMap.size(); i++) {
+            map<int, string>::iterator itR = reverse.find(i);
+            
+            if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+                out << itR->second  << '\t';
+                
+                for (int j = 0; j < namesOfCategories.size(); j++) {
+                    if (m->inUsersGroups(namesOfCategories[i], cats)) {
+                        out << designMap[i][j] << '\t';
+                    }
+                }
+                out << endl;
+            }
+        }
+        out.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "DesignMap", "print");
+               exit(1);
+       }
+}
+
+/************************************************************/
+
diff --git a/designmap.h b/designmap.h
new file mode 100644 (file)
index 0000000..b6dec3b
--- /dev/null
@@ -0,0 +1,70 @@
+//
+//  designmap.h
+//  Mothur
+//
+//  Created by SarahsWork on 6/17/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__designmap__
+#define __Mothur__designmap__
+
+#include "mothurout.h"
+
+/* This class is a representation of the design file.  
+ group    treatment    sex        age
+ F000142    Early       female    young
+ F000132    Late        male        old
+ F000138    Mid         male        old
+ */
+
+class DesignMap {
+public:
+       DesignMap() { m = MothurOut::getInstance(); defaultClass = "not found"; }
+       DesignMap(string); 
+       ~DesignMap() {}
+    
+    int read(string);
+    string get(string, string); //groupName, category returns value. example F000132, sex -> male
+    string get(string); //groupName, returns first categories value. example F000132, -> late
+    vector<string> getValues(string); //categoryName, returns values. example treatment, -> early,late,mid
+       int set(string, map<string, string>); //groupName, map<category, value>
+    int push_back(string, vector<string>); //groupName, vector<value> - assumes you put values in order of getNamesOfCategories
+       vector<string> getNamesOfCategories() {
+               sort(namesOfCategories.begin(), namesOfCategories.end());
+        return namesOfCategories;
+       }
+    int getNumCategories() { return namesOfCategories.size(); }
+       int getNum()  {  return designMap.size();  }
+    int getNumUnique(map<string, vector<string> >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+    //  map<treatment - > early, late>, <sex -> male> would return 1. Only one group is male and from early or late.
+    int getNumShared(map<string, vector<string> >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+    //  map<treatment - > early, late>, <sex -> male> would return 3. All three group have are either male or from early or late.
+
+    vector<string> getNames();
+    vector<string> getNames(string, string); //get names group from category and value.
+       vector<string> getNamesUnique(map<string, vector<string> >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+    //  map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+    vector<string> getNamesShared(map<string, vector<string> >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+    //  map<treatment - > early, late>, <sex -> male> would return F000132, F000142, F000138. All three group have are either male or from early or late.
+
+    int print(ofstream&);
+    int print(ofstream&, vector<string>); //print certain categories
+    
+    string getDefaultClass() { return defaultClass; }
+    
+private:
+    string defaultClass;
+       vector<string> namesOfCategories;
+       MothurOut* m;
+       vector< vector<string> > designMap;
+    vector< map<string, int> > totalCategories;  //for each category, total groups assigned to it.   vector[0] early -> 1, vector[1] male -> 2
+    map<string, int> indexNameMap;
+    map<string, int> indexCategoryMap;
+};
+
+
+#endif /* defined(__Mothur__designmap__) */
index bf4fafa950f2f180227f61ab41e1377008927c82..805d21e51d5331576185f6d28128af8dad7cbae3 100644 (file)
@@ -6,14 +6,19 @@
  */
 
 #include "kruskalwalliscommand.h"
+#include "linearalgebra.h"
 
 //**********************************************************************************************************************
-vector<string> KruskalWallisCommand::setParameters(){  
+vector<string> KruskalWallisCommand::setParameters(){
        try {
+        CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
+        CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
+        //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-        CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pgroups);
-        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);     
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -25,12 +30,16 @@ vector<string> KruskalWallisCommand::setParameters(){
        }
 }
 //**********************************************************************************************************************
-string KruskalWallisCommand::getHelpString(){  
+string KruskalWallisCommand::getHelpString(){
        try {
                string helpString = "";
-               helpString += "The kruskalwallis command parameter options are \n";
-        helpString += "Kruskal–Wallis one-way analysis of variance is a non-parametric method for testing whether samples originate from the same distribution.";
-               return helpString;
+               helpString += "The kruskal.wallis command allows you to ....\n";
+               helpString += "The kruskal.wallis command parameters are: shared, design, class,  label and classes.\n";
+               helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
+        helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old.  class=treatment<Early|Late>-age<young|old>.\n";
+        helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
+               helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n";
+        return helpString;
        }
        catch(exception& e) {
                m->errorOut(e, "KruskalWallisCommand", "getHelpString");
@@ -42,7 +51,7 @@ string KruskalWallisCommand::getOutputPattern(string type) {
     try {
         string pattern = "";
         
-        if (type == "summary") {  pattern = "[filename],cooccurence.summary"; } 
+        if (type == "kruskall-wallis") {  pattern = "[filename],[distance],kruskall_wallis"; }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -53,13 +62,12 @@ string KruskalWallisCommand::getOutputPattern(string type) {
     }
 }
 //**********************************************************************************************************************
-KruskalWallisCommand::KruskalWallisCommand(){  
+KruskalWallisCommand::KruskalWallisCommand(){
        try {
-               abort = true; calledHelp = true; 
+               abort = true; calledHelp = true;
                setParameters();
         vector<string> tempOutNames;
-               outputTypes["summary"] = tempOutNames;
-
+        outputTypes["kruskall-wallis"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
@@ -67,68 +75,95 @@ KruskalWallisCommand::KruskalWallisCommand(){
        }
 }
 //**********************************************************************************************************************
-KruskalWallisCommand::KruskalWallisCommand(string option) {
+KruskalWallisCommand::KruskalWallisCommand(string option)  {
        try {
-               abort = false; calledHelp = false;   
-                               
+               abort = false; calledHelp = false;
+        allLines = 1;
+               
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
                else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
+                       //valid paramters for this command
                        vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
-                       map<string,string>::iterator it;
                        
                        ValidParameters validParameter;
-                       
+                       map<string,string>::iterator it;
                        //check to make sure all parameters are valid for command
-                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
+                       
+                       vector<string> tempOutNames;
+            outputTypes["kruskall-wallis"] = tempOutNames;
             
-            //get shared file
-                       sharedfile = validParameter.validFile(parameters, "shared", true);
-                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
-                       else if (sharedfile == "not found") { 
-                               //if there is a current shared file, use it
-                               sharedfile = m->getSharedFile(); 
-                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
-                       }else { m->setSharedFile(sharedfile); }
-            
-            //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
-                    
-            groups = validParameter.validFile(parameters, "groups", false);   
-            if (groups == "not found") { groups = "";   }
-            else { 
-            m->splitAtDash(groups, Groups); 
-            }   
-            m->setGroups(Groups);
-                               
-                       //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
                        if (inputDir == "not found"){   inputDir = "";          }
                        else {
-                               string path;
-                               it = parameters.find("shared");
+                
+                string path;
+                               it = parameters.find("design");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["desing"] = inputDir + it->second;           }
+                               }
+                               
+                it = parameters.find("shared");
                                //user has given a template file
-                               if(it != parameters.end()){ 
+                               if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
+            }
+            
+            //get shared file, it is required
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+                       else if (sharedfile == "not found") {
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile();
+                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setSharedFile(sharedfile); }
+            
+            //get shared file, it is required
+                       designfile = validParameter.validFile(parameters, "design", true);
+                       if (designfile == "not open") { designfile = ""; abort = true; }
+                       else if (designfile == "not found") {
+                               //if there is a current shared file, use it
+                               designfile = m->getDesignFile();
+                               if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setDesignFile(designfile); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
+                               outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
                        }
-               
-            vector<string> tempOutNames;
-            outputTypes["summary"] = tempOutNames;
-
-
+            
+            string label = validParameter.validFile(parameters, "label", false);
+                       if (label == "not found") { label = ""; }
+                       else {
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+            
+            mclass = validParameter.validFile(parameters, "class", false);
+                       if (mclass == "not found") { mclass = ""; }
+            
+            classes = validParameter.validFile(parameters, "classes", false);
+                       if (classes == "not found") { classes = ""; }
+            
                }
-
+               
        }
        catch(exception& e) {
                m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
@@ -136,128 +171,165 @@ KruskalWallisCommand::KruskalWallisCommand(string option) {
        }
 }
 //**********************************************************************************************************************
+
 int KruskalWallisCommand::execute(){
        try {
+               
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
-        InputData* input = new InputData(sharedfile, "sharedfile");
-        vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
-               string lastLabel = lookup[0]->getLabel();
-        
-       
-               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
-               set<string> processedLabels;
-               set<string> userLabels = labels;
-
-        ofstream out;
-        map<string,string> variables;
-        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
-               string outputFileName = getOutputFileName("summary",variables);
-        m->openOutputFile(outputFileName, out);
-        outputNames.push_back(outputFileName);  outputTypes["summary"].push_back(outputFileName);
-        out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-        out << "H\tpvalue\n";
-        
-        //math goes here
+        DesignMap designMap(designfile);
+        //if user set classes set groups=those classes
+        if (classes != "") {
+            map<string, vector<string> > thisClasses = m->parseClasses(classes);
+            vector<string> groups = designMap.getNamesUnique(thisClasses);
+            if (groups.size() != 0) { m->setGroups(groups); }
+            else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
+        }
         
-        int N = m->getNumGroups();
-        double H;
-        double tmp = 0.0;
-        vector<groupRank> vec;
-        vector<string> groups = m->getGroups();
-        string group;
-        int count;
-        double sum;
-                
-        //merge all groups into a vector
+        //if user did not select class use first column
+        if (mclass == "") {  mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
         
+        InputData input(sharedfile, "sharedfile");
+        vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+        string lastLabel = lookup[0]->getLabel();
         
+        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        set<string> processedLabels;
+        set<string> userLabels = labels;
         
-        //rank function here
-        assignRank(vec);
         
-        //populate counts and ranSums vectors
-        for (int i=0;i<N;i++) {
-            count = 0;
-            sum = 0;
-            group = groups[i];
-            for(int j;j<vec.size();j++) {
-                if (vec[j].group == group) {
-                    count++;
-                    sum = sum + vec[j].rank;
-                }
+        //as long as you are not at the end of the file or done wih the lines you want
+        while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+            
+            if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
+            
+            if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup, designMap);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
             }
-            counts[i] = count;
-            rankSums[i] = sum;
+            
+            if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = lookup[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup, designMap);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                lookup[0]->setLabel(saveLabel);
+            }
+            
+            lastLabel = lookup[0]->getLabel();
+            //prevent memory leak
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+            
+            if (m->control_pressed) { return 0; }
+            
+            //get next line to process
+            lookup = input.getSharedRAbundVectors();
         }
         
-        //test statistic
-        for (int i=0;i<N;i++) { tmp = tmp + (pow(rankSums[i],2) / counts[i]); }
-        
-        H = (12 / (N*(N+1))) * tmp - (3*(N+1));
-        
-        //ss = tmp - pow(accumulate(rankSums.begin(), rankSums.end(), 0), 2);
+        if (m->control_pressed) {  return 0; }
         
-        //H = ss / ( (N * (N + 1))/12 );
-        
-        //correction for ties?
+        //output error messages about any remaining user labels
+        set<string>::iterator it;
+        bool needToRun = false;
+        for (it = userLabels.begin(); it != userLabels.end(); it++) {
+            m->mothurOut("Your file does not include the label " + *it);
+            if (processedLabels.count(lastLabel) != 1) {
+                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                needToRun = true;
+            }else {
+                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+            }
+        }
         
-        //p-value calculation
+        //run last label if you need to
+        if (needToRun == true)  {
+            for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+            lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            process(lookup, designMap);
+            
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+        }
         
-               return 0;
-       }
+               
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
        catch(exception& e) {
                m->errorOut(e, "KruskalWallisCommand", "execute");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-void KruskalWallisCommand::assignRank(vector<groupRank> &vec) {
-    try {
-        double rank = 1;
-        double numRanks, avgRank, j;
-        vector<groupRank>::iterator it, oldit;
-
-        sort (vec.begin(), vec.end(), comparevalue);
-
-        it = vec.begin();
-
-        while ( it != vec.end() ) {
-            j = rank;
-            oldit = it;
-            if (!equalvalue(*it, *(it+1))) {
-                (*it).rank = rank; 
-                rank = rank+1; 
-                it++; }
-            else {
-                while(equalrank(*it, *(it+1))) {
-                    j = j + (j+1);
-                    rank++;
-                    it++;
-                }
-                numRanks = double (distance(oldit, it));
-                avgRank = j / numRanks;
-                while(oldit != it) {
-                    (*oldit).rank = avgRank;
-                    oldit++;
-                }
-            }
 
+int KruskalWallisCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+       try {
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = lookup[0]->getLabel();
+               string outputFileName = getOutputFileName("kruskall-wallis",variables);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
+        out << "OTULabel\tKW\tPvalue\n";
+        
+        int numBins = lookup[0]->getNumBins();
+        //sanity check to make sure each treatment has a group in the shared file
+        set<string> treatments;
+        for (int j = 0; j < lookup.size(); j++) {
+            string group = lookup[j]->getGroup();
+            string treatment = designMap.get(group, mclass); //get value for this group in this category
+            treatments.insert(treatment);
         }
+        if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
         
-
+        LinearAlgebra linear;
+        for (int i = 0; i < numBins; i++) {
+            if (m->control_pressed) { break; }
+            
+            vector<spearmanRank> values;
+            for (int j = 0; j < lookup.size(); j++) {
+                string group = lookup[j]->getGroup();
+                string treatment = designMap.get(group, mclass); //get value for this group in this category
+                spearmanRank temp(treatment, lookup[j]->getAbundance(i));
+                values.push_back(temp);
+            }
+            
+            double pValue = 0.0;
+            double H = linear.calcKruskalWallis(values, pValue);
+            
+            //output H and signifigance
+            out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
+        }
+        out.close();
+                
+        return 0;
     }
-    catch(exception& e) {
-               m->errorOut(e, "KruskalWallisCommand", "getRank");
+       catch(exception& e) {
+               m->errorOut(e, "KruskalWallisCommand", "process");
                exit(1);
        }
-    
-}
-//**********************************************************************************************************************
-void KruskalWallisCommand::assignValue(vector<groupRank> &vec) {
-    
 }
 //**********************************************************************************************************************
-//**********************************************************************************************************************
-//**********************************************************************************************************************
+
 
index feefebd58fdb5650f41e9642a63e6dc68d011a24..a82d208cf15ea424413fdae4eee7c666868f4b35 100644 (file)
@@ -10,8 +10,7 @@
 
 #include "command.hpp"
 #include "inputdata.h"
-#include "sharedrabundvector.h"
-
+#include "designmap.h"
 
 class KruskalWallisCommand : public Command {
    
@@ -22,11 +21,11 @@ public:
        ~KruskalWallisCommand(){}
        
        vector<string> setParameters();
-       string getCommandName()                 { return "kruskalwallis";                       }
+       string getCommandName()                 { return "kruskal.wallis";                      }
        string getCommandCategory()             { return "Hypothesis Testing";  }
     string getOutputPattern(string);   
        string getHelpString(); 
-       string getCitation() { return "http://www.mothur.org/wiki/kruskalwallis"; }
+       string getCitation() { return "http://www.mothur.org/wiki/Kruskal.wallis"; }
        string getDescription()         { return "Non-parametric method for testing whether samples originate from the same distribution."; }
     
     struct groupRank {
@@ -42,22 +41,12 @@ public:
     
     
 private:
-    string outputDir, sharedfile, groups;
-    bool abort;
+    bool abort, allLines;
+    string outputDir, sharedfile, designfile, mclass, classes;
+    vector<string> outputNames;
     set<string> labels;
-    vector<string> outputNames, Groups;
-    vector<int> counts;
-    vector<double> rankSums;
-    vector<double> rankMeans;
-    
-  
-        
-    static bool comparevalue(const groupRank &a, const groupRank &b) { return a.value < b.value; }
-    static bool equalvalue(const groupRank &a, const groupRank &b) { return a.value == b.value; }
-    static bool comparerank(const groupRank &a, const groupRank &b) { return a.rank < b.rank; }
-    static bool equalrank(const groupRank &a, const groupRank &b) { return a.rank == b.rank; }
-    static bool equalgroup(const groupRank &a, const groupRank &b) { return a.group == b.group; }
     
+    int process(vector<SharedRAbundVector*>&, DesignMap&);    
 };
 
 #endif /* KRUSKALWALLISCOMMAND_H */
index 455019b4e456d7996ee2471eabf28a08f96ac87c..e894d5781346802cc5250f0fb956a9d853b0017e 100644 (file)
@@ -7,3 +7,483 @@
 //
 
 #include "lefsecommand.h"
+#include "linearalgebra.h"
+
+//**********************************************************************************************************************
+vector<string> LefseCommand::setParameters(){
+       try {
+        CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+        CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
+        CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
+        CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
+        CommandParameter palpha("anova_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
+        CommandParameter pwalpha("wilcoxon_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
+        //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LefseCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string LefseCommand::getHelpString(){
+       try {
+               string helpString = "";
+               helpString += "The lefse command allows you to ....\n";
+               helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, wilcoxon_alpha, anova_alpha.\n";
+               helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
+        helpString += "The subclass parameter is used to indicate the .....If none is provided second category is used, or if only one category subclass is ignored. \n";
+        helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old.  class=treatment<Early|Late>-age<young|old>.\n";
+        helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
+               helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n";
+        return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LefseCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string LefseCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "summary") {  pattern = "[filename],[distance],lefse_summary"; }
+        else if (type == "kruskall-wallis") {  pattern = "[filename],[distance],kruskall_wallis"; }
+        else if (type == "wilcoxon") {  pattern = "[filename],[distance],wilcoxon"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "LefseCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+LefseCommand::LefseCommand(){
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["summary"] = tempOutNames;
+        outputTypes["kruskall-wallis"] = tempOutNames;
+        outputTypes["wilcoxon"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LefseCommand", "LefseCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+LefseCommand::LefseCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;
+        allLines = 1;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       vector<string> tempOutNames;
+            outputTypes["summary"] = tempOutNames;
+            outputTypes["kruskall-wallis"] = tempOutNames;
+            outputTypes["wilcoxon"] = tempOutNames;
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                
+                string path;
+                               it = parameters.find("design");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["desing"] = inputDir + it->second;           }
+                               }
+                               
+                it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+            }
+                    
+            //get shared file, it is required
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+                       else if (sharedfile == "not found") {
+                               //if there is a current shared file, use it
+                               sharedfile = m->getSharedFile();
+                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setSharedFile(sharedfile); }
+            
+            //get shared file, it is required
+                       designfile = validParameter.validFile(parameters, "design", true);
+                       if (designfile == "not open") { designfile = ""; abort = true; }
+                       else if (designfile == "not found") {
+                               //if there is a current shared file, use it
+                               designfile = m->getDesignFile();
+                               if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }else { m->setDesignFile(designfile); }
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
+                               outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+                       }
+            
+            string label = validParameter.validFile(parameters, "label", false);
+                       if (label == "not found") { label = ""; }
+                       else {
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+            
+            mclass = validParameter.validFile(parameters, "class", false);
+                       if (mclass == "not found") { mclass = ""; }
+                       
+            subclass = validParameter.validFile(parameters, "subclass", false);
+                       if (subclass == "not found") { subclass = ""; }
+            
+            classes = validParameter.validFile(parameters, "classes", false);
+                       if (classes == "not found") { classes = ""; }
+            
+            string temp = validParameter.validFile(parameters, "anova_alpha", false);
+                       if (temp == "not found") { temp = "0.05"; }
+                       m->mothurConvert(temp, anovaAlpha);
+            
+            temp = validParameter.validFile(parameters, "wilcoxon_alpha", false);
+                       if (temp == "not found") { temp = "0.05"; }
+                       m->mothurConvert(temp, wilcoxonAlpha);
+
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LefseCommand", "LefseCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int LefseCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        DesignMap designMap(designfile);
+        //if user set classes set groups=those classes
+        if (classes != "") {
+            map<string, vector<string> > thisClasses = m->parseClasses(classes);
+            vector<string> groups = designMap.getNamesUnique(thisClasses);
+            if (groups.size() != 0) { m->setGroups(groups); }
+            else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
+        }
+        
+        //if user did not select class use first column
+        if (mclass == "") {  mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
+        
+        InputData input(sharedfile, "sharedfile");
+        vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+        string lastLabel = lookup[0]->getLabel();
+        
+        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+        set<string> processedLabels;
+        set<string> userLabels = labels;
+        
+        
+        //as long as you are not at the end of the file or done wih the lines you want
+        while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+            
+            if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
+            
+            if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+                
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup, designMap);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+            }
+            
+            if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                string saveLabel = lookup[0]->getLabel();
+                
+                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+                lookup = input.getSharedRAbundVectors(lastLabel);
+                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                
+                process(lookup, designMap);
+                
+                processedLabels.insert(lookup[0]->getLabel());
+                userLabels.erase(lookup[0]->getLabel());
+                
+                //restore real lastlabel to save below
+                lookup[0]->setLabel(saveLabel);
+            }
+            
+            lastLabel = lookup[0]->getLabel();
+            //prevent memory leak
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+            
+            if (m->control_pressed) { return 0; }
+            
+            //get next line to process
+            lookup = input.getSharedRAbundVectors();
+        }
+        
+        if (m->control_pressed) {  return 0; }
+        
+        //output error messages about any remaining user labels
+        set<string>::iterator it;
+        bool needToRun = false;
+        for (it = userLabels.begin(); it != userLabels.end(); it++) {
+            m->mothurOut("Your file does not include the label " + *it);
+            if (processedLabels.count(lastLabel) != 1) {
+                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                needToRun = true;
+            }else {
+                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+            }
+        }
+        
+        //run last label if you need to
+        if (needToRun == true)  {
+            for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+            lookup = input.getSharedRAbundVectors(lastLabel);
+            
+            m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+            process(lookup, designMap);
+            
+            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+        }
+                
+               
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LefseCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int LefseCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+       try {
+        //run kruskal wallis on each otu
+        vector<int> significantOtuLabels = runKruskalWallis(lookup, designMap);
+        
+        //check for subclass
+        if (subclass != "") {  significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels);  }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LefseCommand", "process");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+       try {
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = lookup[0]->getLabel();
+               string outputFileName = getOutputFileName("kruskall-wallis",variables);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
+        out << "OTULabel\tKW\tPvalue\n";
+        
+        vector<int> significantOtuLabels;
+        int numBins = lookup[0]->getNumBins();
+        //sanity check to make sure each treatment has a group in the shared file
+        set<string> treatments;
+        for (int j = 0; j < lookup.size(); j++) {
+            string group = lookup[j]->getGroup();
+            string treatment = designMap.get(group, mclass); //get value for this group in this category
+            treatments.insert(treatment);
+        }
+        if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
+        
+        LinearAlgebra linear;
+        for (int i = 0; i < numBins; i++) {
+            if (m->control_pressed) { break; }
+            
+            vector<spearmanRank> values;
+            for (int j = 0; j < lookup.size(); j++) {
+                string group = lookup[j]->getGroup();
+                string treatment = designMap.get(group, mclass); //get value for this group in this category
+                spearmanRank temp(treatment, lookup[j]->getAbundance(i));
+                values.push_back(temp);
+            }
+            
+            double pValue = 0.0;
+            double H = linear.calcKruskalWallis(values, pValue);
+            
+            //output H and signifigance
+            out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
+            
+            if (pValue < anovaAlpha) {  significantOtuLabels.push_back(i);  }
+        }
+        out.close();
+        
+        return significantOtuLabels;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LefseCommand", "runKruskalWallis");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) {
+    try {
+        vector<int> significantOtuLabels;
+        //if it exists and meets the following requirements run Wilcoxon
+        /*
+         1. Subclass members all belong to same main class
+         2. Number of groups in each subclass is the same
+         3. anything else??
+         
+         */
+        vector<string> subclasses;
+        map<string, string> subclass2Class;
+        map<string, int> subclassCounts;
+        map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old.  Saves time below.
+        bool error = false;
+        for (int j = 0; j < lookup.size(); j++) {
+            string group = lookup[j]->getGroup();
+            string treatment = designMap.get(group, mclass); //get value for this group in this category
+            string thisSub = designMap.get(group, subclass);
+            map<string, string>::iterator it = subclass2Class.find(thisSub);
+            if (it == subclass2Class.end()) {
+                subclass2Class[thisSub] = treatment;
+                subclassCounts[thisSub] = 1;
+                vector<int> temp; temp.push_back(j);
+                subClass2GroupIndex[thisSub] = temp;
+            }
+            else {
+                subclassCounts[thisSub]++;
+                subClass2GroupIndex[thisSub].push_back(j);
+                if (it->second != treatment) {
+                    error = true;
+                    m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n");
+                }
+            }
+        }
+        
+        if (error) { return significantOtuLabels; }
+        else { //check counts to make sure subclasses are the same size
+            set<int> counts;
+            for (map<string, int>::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); }
+            if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n");
+                return significantOtuLabels;  }
+        }
+        
+        int numBins = lookup[0]->getNumBins();
+        vector<compGroup> comp;
+        //find comparisons and fill comp
+        map<string, int>::iterator itB;
+        for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){
+            itB = it;itB++;
+            for(itB;itB!=subclassCounts.end();itB++){
+                compGroup temp(it->first,itB->first);
+                comp.push_back(temp);
+            }                  
+        }
+
+        int numComp = comp.size();
+        if (numComp < 2) {  m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n");
+            return significantOtuLabels;  }
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = lookup[0]->getLabel();
+        string outputFileName = getOutputFileName("wilcoxon",variables);
+        
+        ofstream out;
+        m->openOutputFile(outputFileName, out);
+        outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName);
+        out << "OTULabel\tComparision\tWilcoxon\tPvalue\n";
+        
+        LinearAlgebra linear;
+        for (int i = 0; i < numBins; i++) {
+            if (m->control_pressed) { break; }
+            
+            if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis
+                
+                bool sig = false;
+                //for each subclass comparision
+                for (int j = 0; j < numComp; j++) {
+                    //fill x and y with this comparisons data
+                    vector<double> x; vector<double> y;
+                    
+                    //fill x and y
+                    vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass
+                    for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); }
+                    
+                    vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass
+                    for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); }
+                    
+                    double pValue = 0.0;
+                    double H = linear.calcWilcoxon(x, y, pValue);
+            
+                    //output H and signifigance
+                    out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl;
+                    
+                    //set sig - not sure how yet
+                }
+                if (sig) {  significantOtuLabels.push_back(i);  }
+            }
+        }
+        out.close();
+        
+        return significantOtuLabels;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "LefseCommand", "runWilcoxon");
+        exit(1);
+    }
+}
+
+//**********************************************************************************************************************
+
+
index 095ad48da125fbfe83f30aaff41eb62ec18c610f..c9c4092e99f6bb402145f2e3432753ce642c7c6e 100644 (file)
@@ -12,7 +12,7 @@
 #include "command.hpp"
 
 /* 
- Columns = groups, rows are OTUs, class = design??
+ Columns = groups, rows are OTUs, class = design
  
  From http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload
  Input data consist of a collection of m samples (columns) each made up of n numerical features (rows, typically normalized per-sample, red representing high values and green low). These samples are labeled with a class (taking two or more possible values) that represents the main biological hypothesis under investigation; they may also have one or more subclass labels reflecting within-class groupings.
  Step 3: the resulting subset of vectors is used to build a Linear Discriminant Analysis model from which the relative difference among classes is used to rank the features. The final output thus consists of a list of features that are discriminative with respect to the classes, consistent with the subclass grouping within classes, and ranked according to the effect size with which they differentiate classes.
 */
 
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "designmap.h"
+
+/**************************************************************************************************/
+
+class LefseCommand : public Command {
+public:
+    LefseCommand(string);
+    LefseCommand();
+    ~LefseCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "lefse";                       }
+    string getCommandCategory()                { return "OTU-Based Approaches";                }
+    
+    string getOutputPattern(string);
+       string getHelpString();
+    string getCitation() { return "http://www.mothur.org/wiki/Lefse"; }
+    string getDescription()            { return "brief description"; }
+    
+    int execute();
+    void help() { m->mothurOut(getHelpString()); }
+    
+private:
+    bool abort, allLines;
+    string outputDir, sharedfile, designfile, mclass, subclass, classes;
+    vector<string> outputNames;
+    set<string> labels;
+    float anovaAlpha, wilcoxonAlpha;
+    
+    int process(vector<SharedRAbundVector*>&, DesignMap&);
+    vector<int> runKruskalWallis(vector<SharedRAbundVector*>&, DesignMap&);
+    vector<int> runWilcoxon(vector<SharedRAbundVector*>&, DesignMap&, vector<int>);
+
+};
+
+/**************************************************************************************************/
+
+
+
+
 #endif /* defined(__Mothur__lefsecommand__) */
index e1b9470fdb218d94fd9f5fe9f36ccb23ac31ea8a..e4eaf481b843bd0ef27c41e77bd726b8adac2397 100644 (file)
@@ -805,7 +805,80 @@ double LinearAlgebra::calcSpearman(vector< vector<double> >& euclidDists, vector
                exit(1);
        }
 }
-
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pValue){
+       try {
+        double H;
+        set<string> treatments;
+        
+        //rank values
+        sort(values.begin(), values.end(), compareSpearman);
+        vector<spearmanRank*> ties;
+        int rankTotal = 0;
+        vector<int> TIES;
+        for (int j = 0; j < values.size(); j++) {
+            treatments.insert(values[j].name);
+            rankTotal += (j+1);
+            ties.push_back(&(values[j]));
+            
+            if (j != values.size()-1) { // you are not the last so you can look ahead
+                if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue
+                    if (ties.size() > 1) { TIES.push_back(ties.size()); }
+                    for (int k = 0; k < ties.size(); k++) {
+                        double thisrank = rankTotal / (double) ties.size();
+                        (*ties[k]).score = thisrank;
+                    }
+                    ties.clear();
+                    rankTotal = 0;
+                }
+            }else { // you are the last one
+                if (ties.size() > 1) { TIES.push_back(ties.size()); }
+                for (int k = 0; k < ties.size(); k++) {
+                    double thisrank = rankTotal / (double) ties.size();
+                    (*ties[k]).score = thisrank;
+                }
+            }
+        }
+        
+        
+        // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1)
+        map<string, double> sums;
+        map<string, double> counts;
+        for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; }
+        
+        for (int j = 0; j < values.size(); j++) {
+            sums[values[j].name] += values[j].score;
+            counts[values[j].name]+= 1.0;
+        }
+        
+        double middleTerm = 0.0;
+        for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) {
+            middleTerm += ((sums[*it]*sums[*it])/counts[*it]);
+        }
+        
+        double firstTerm = 12 / (double) (values.size()*(values.size()+1));
+        double lastTerm = 3 * (values.size()+1);
+        
+        H = firstTerm * middleTerm - lastTerm;
+        
+        //adjust for ties
+        if (TIES.size() != 0) {
+            double sum = 0.0;
+            for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
+            double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size()));
+            H /= result;
+        }
+        
+        //Numerical Recipes pg221
+        pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
+        
+        return H;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcKruskalWallis");
+               exit(1);
+       }
+}
 /*********************************************************************************************************************************/
 //assumes both matrices are square and the same size
 double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
@@ -1228,7 +1301,73 @@ double LinearAlgebra::calcKendallSig(double n, double r){
                exit(1);
        }
 }
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcWilcoxon(vector<double>& x, vector<double>& y, double& sig){
+       try {
+               if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
+               
+               double W = 0.0;
+        sig = 0.0;
+        
+        vector<double> signPairs;
+        vector<spearmanRank> absV;
+        for (int i = 0; i < x.size(); i++) {
+            if (m->control_pressed) { return W; }
+            double temp = y[i]-x[i];
+            double signV = sign(temp);
+            if (signV != 0) { // exclude zeros
+                spearmanRank member(toString(i), abs(temp));
+                absV.push_back(member);
+                signPairs.push_back(signV);
+            }
+        }
+        
+        //rank absV
+        //sort xscores
+               sort(absV.begin(), absV.end(), compareSpearman);
+               
+               //convert scores to ranks of x
+               vector<spearmanRank*> ties;
+               int rankTotal = 0;
+               for (int j = 0; j < absV.size(); j++) {
+            if (m->control_pressed) { return W; }
+                       rankTotal += (j+1);
+                       ties.push_back(&(absV[j]));
+            
+                       if (j != absV.size()-1) { // you are not the last so you can look ahead
+                               if (absV[j].score != absV[j+1].score) { // you are done with ties, rank them and continue
+                                       for (int k = 0; k < ties.size(); k++) {
+                                               float thisrank = rankTotal / (float) ties.size();
+                                               (*ties[k]).score = thisrank;
+                                       }
+                                       ties.clear();
+                                       rankTotal = 0;
+                               }
+                       }else { // you are the last one
+                               for (int k = 0; k < ties.size(); k++) {
+                                       float thisrank = rankTotal / (float) ties.size();
+                                       (*ties[k]).score = thisrank;
+                               }
+                       }
+               }
 
+        //sum ranks times sign
+        for (int i = 0; i < absV.size(); i++) {
+            if (m->control_pressed) { return W; }
+            W += (absV[i].score*signPairs[i]);
+        }
+        W = abs(W);
+        
+        //find zScore
+        cout << "still need to find sig!!" << endl;
+        
+               return W;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "calcWilcoxon");
+               exit(1);
+       }
+}
 /*********************************************************************************************************************************/
 double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
        try {
index e0933ee52564915d35bcf6e9f91f73c2cea80d2d..5819ea85d8deefba15ca1c45ccf03efd21c9321a 100644 (file)
@@ -29,6 +29,8 @@ public:
        double calcPearson(vector<vector<double> >&, vector<vector<double> >&);
        double calcSpearman(vector<vector<double> >&, vector<vector<double> >&);
        double calcKendall(vector<vector<double> >&, vector<vector<double> >&);
+    double calcKruskalWallis(vector<spearmanRank>&, double&);
+    double calcWilcoxon(vector<double>&, vector<double>&, double&);
        
        double calcPearson(vector<double>&, vector<double>&, double&);
        double calcSpearman(vector<double>&, vector<double>&, double&);
@@ -41,7 +43,7 @@ public:
     vector<double> solveEquations(vector<vector<double> >, vector<double>);
     vector<float> solveEquations(vector<vector<float> >, vector<float>);
     vector<vector<double> > getInverse(vector<vector<double> >);
-
+    
     
 private:
        MothurOut* m;
@@ -50,11 +52,11 @@ private:
     double betacf(const double, const double, const double);
     double betai(const double, const double, const double);
     double gammln(const double);
-    double gammp(const double, const double);
     double gammq(const double, const double);
     double gser(double&, const double, const double, double&);
     double gcf(double&, const double, const double, double&);
     double erfcc(double);
+    double gammp(const double, const double);
     
     double ran0(int&); //for testing 
     double ran1(int&); //for testing
index 9eb4b80a4d890d972c8085255bdb8300a0429c67..f888fbe4928a9526d34e9b8f25200fb32a329f6f 100644 (file)
@@ -1541,6 +1541,7 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
             }
             m->gobble(in);
             
+            if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ".\n"); }
             
             //check to make sure both are able to be opened
             ifstream in2;
index 8f2857e5e302b0587555a03e6a705082c47f9d45..dd44249281fa90fa6d5c6dbefaa1380e5462a215 100644 (file)
@@ -7,6 +7,7 @@
 //
 
 #include "makelefsecommand.h"
+#include "designmap.h"
 
 //**********************************************************************************************************************
 vector<string> MakeLefseCommand::setParameters(){
@@ -258,19 +259,22 @@ int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedR
         ofstream out;
         m->openOutputFile(outputFile, out);
 
-        GroupMap* designMap = NULL;
+        DesignMap* designMap = NULL;
         if (designfile != "") {
-            designMap = new GroupMap(designfile);
-            designMap->readDesignMap();
+            designMap = new DesignMap(designfile);
+            vector<string> categories = designMap->getNamesOfCategories();
             
-            out << "treatment\t";
-            for (int i = 0; i < lookup.size(); i++) {
-                string treatment = designMap->getGroup(lookup[i]->getGroup());
-                if (treatment == "not found") {
-                    m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); 
-                }else { out << treatment << '\t'; }
+            for (int j = 0; j < categories.size(); j++) {
+                out << categories[j] << "\t";
+                for (int i = 0; i < lookup.size(); i++) {
+                    if (m->control_pressed) { out.close(); delete designMap; return 0; }
+                    string value = designMap->get(lookup[i]->getGroup(), categories[j]);
+                    if (value == "not found") {
+                        m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true;
+                    }else { out << value << '\t'; }
+                }
+                out << endl;
             }
-            out << endl;
         }
         
         out << "group\t";
index dcdd76b21a883ca7a65b2780e9e395212464f925..32f4778a7c1ee19db25eeb4e1eafaf74fabcd617 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -173,6 +173,15 @@ struct seqPriorityNode {
        seqPriorityNode(int n, string s, string nm) : numIdentical(n), seq(s), name(nm) {}
        ~seqPriorityNode() {}
 };
+/************************************************************/
+struct compGroup {
+       string group1;
+       string group2;
+       compGroup() {}
+       compGroup(string s, string nm) : group1(s), group2(nm) {}
+    string getCombo() { return group1+"-"+group2; }
+       ~compGroup() {}
+};
 /***************************************************************/
 struct spearmanRank {
        string name;
@@ -209,7 +218,15 @@ inline bool compareDistLinePairs(distlinePair left, distlinePair right){
 //sorts lowest to highest
 inline bool compareSequenceDistance(seqDist left, seqDist right){
        return (left.dist < right.dist);        
-} 
+}
+//********************************************************************************************************************
+//returns sign of double
+inline double sign(double temp){
+       //find sign
+    if (temp > 0)       { return 1.0;   }
+    else if (temp < 0)  { return -1.0;  }
+    return 0;
+}
 /***********************************************************************/
 
 // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
index 96c7305b6a1fb50f42f9c2a650d0c854ccbb6f2c..1f1c96b6f203e4f1fa8b1dca2e018c735caa6b9f 100644 (file)
@@ -796,6 +796,39 @@ bool MothurOut::dirCheck(string& dirName){
        }       
     
 }
+//**********************************************************************************************************************
+
+map<string, vector<string> > MothurOut::parseClasses(string classes){
+       try {
+        map<string, vector<string> > parts;
+        
+        //treatment<Early|Late>-age<young|old>
+        vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
+        
+        for (int i = 0; i < pieces.size(); i++) {
+            string category = ""; string value = "";
+            bool foundOpen = false;
+            for (int j = 0; j < pieces[i].length(); j++) {
+                if (control_pressed) { return parts; }
+                
+                if (pieces[i][j] == '<')        { foundOpen = true;         }
+                else if (pieces[i][j] == '>')   { j += pieces[i].length();  }
+                else {
+                    if (!foundOpen) { category += pieces[i][j]; }
+                    else { value += pieces[i][j]; }
+                }
+            }
+            vector<string> values; splitAtChar(value, values, '|');
+            parts[category] = values;
+        }
+        
+        return parts;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "parseClasses");
+               exit(1);
+       }
+}
 /***********************************************************************/
 
 string MothurOut::hasPath(string longName){
index efb3823aa74fb162bc1e1d0090d3af958f89f777..77b1b6d4ec1efb758973d332aa8e4df00619f58b 100644 (file)
@@ -153,6 +153,7 @@ class MothurOut {
         string makeList(vector<string>&);
         bool isSubset(vector<string>, vector<string>); //bigSet, subset
         int checkName(string&);
+        map<string, vector<string> > parseClasses(string);
                
                //math operation
                int factorial(int num);
index 128c5671ed55712d2293e0b3c3959da17a7e9f05..41c8e743122399563b8367fda6a9d4a94f9bda86 100644 (file)
@@ -85,9 +85,14 @@ int SetLogFileCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               commandFactory = CommandFactory::getInstance();
-               
-               commandFactory->setLogfileName(name, append);
+        commandFactory = CommandFactory::getInstance();
+        
+        string directory = m->hasPath(name);
+        if (directory == "") {
+            commandFactory->setLogfileName(name, append);
+        }else if (m->dirCheck(directory)) {
+             commandFactory->setLogfileName(name, append);
+        }
                
                return 0;
        }
index 504a8b59e35b2e99567e7085ccd964b43d8d8361..ddc1053f50d3eba28064907e4c6f0a60a958b3ee 100644 (file)
@@ -855,6 +855,8 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
                 break;
             }
             
+            if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
+            
             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
                 group = it->first;
                 string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
@@ -1078,6 +1080,8 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
                 break;
             }
             
+            if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
+            
             if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
                 group = it->first;
                 if (!keepForward) {