A774104814696F320098E6AC /* myseqdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774104614696F320098E6AC /* myseqdist.cpp */; };
A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
+ A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77916E6176F7F7600EEFE18 /* designmap.cpp */; };
A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; };
A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; };
A77410F514697C300098E6AC /* seqnoise.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqnoise.h; sourceTree = "<group>"; };
A778FE69134CA6CA00C0BA33 /* getcommandinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcommandinfocommand.h; sourceTree = "<group>"; };
A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
+ A77916E6176F7F7600EEFE18 /* designmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = designmap.cpp; sourceTree = "<group>"; };
+ A77916E7176F7F7600EEFE18 /* designmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = designmap.h; sourceTree = "<group>"; };
A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparcccommand.h; sourceTree = "<group>"; };
A7E9B6BD12D37EC400DA6239 /* database.cpp */,
A7E9B6BE12D37EC400DA6239 /* database.hpp */,
A7E9B6BF12D37EC400DA6239 /* datavector.hpp */,
+ A77916E7176F7F7600EEFE18 /* designmap.h */,
+ A77916E6176F7F7600EEFE18 /* designmap.cpp */,
A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */,
A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */,
A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */,
A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */,
A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */,
A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */,
+ A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
#include "makelookupcommand.h"
#include "renameseqscommand.h"
#include "makelefsecommand.h"
+#include "lefsecommand.h"
+#include "kruskalwalliscommand.h"
/*******************************************************/
commands["make.lookup"] = "make.lookup";
commands["rename.seqs"] = "rename.seqs";
commands["make.lefse"] = "make.lefse";
+ commands["lefse"] = "lefse";
+ commands["kruskal.wallis"] = "kruskal.wallis";
}
else if(commandName == "make.lookup") { command = new MakeLookupCommand(optionString); }
else if(commandName == "rename.seqs") { command = new RenameSeqsCommand(optionString); }
else if(commandName == "make.lefse") { command = new MakeLefseCommand(optionString); }
+ else if(commandName == "lefse") { command = new LefseCommand(optionString); }
+ else if(commandName == "kruskal.wallis") { command = new KruskalWallisCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "make.lookup") { pipecommand = new MakeLookupCommand(optionString); }
else if(commandName == "rename.seqs") { pipecommand = new RenameSeqsCommand(optionString); }
else if(commandName == "make.lefse") { pipecommand = new MakeLefseCommand(optionString); }
+ else if(commandName == "lefse") { pipecommand = new LefseCommand(optionString); }
+ else if(commandName == "kruskal.wallis") { pipecommand = new KruskalWallisCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "make.lookup") { shellcommand = new MakeLookupCommand(); }
else if(commandName == "rename.seqs") { shellcommand = new RenameSeqsCommand(); }
else if(commandName == "make.lefse") { shellcommand = new MakeLefseCommand(); }
+ else if(commandName == "lefse") { shellcommand = new LefseCommand(); }
+ else if(commandName == "kruskal.wallis") { shellcommand = new KruskalWallisCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
--- /dev/null
+//
+// designmap.cpp
+// Mothur
+//
+// Created by SarahsWork on 6/17/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "designmap.h"
+
+/************************************************************/
+DesignMap::DesignMap(string file) {
+ try {
+ m = MothurOut::getInstance();
+ defaultClass = "not found";
+ read(file);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "DesignMap");
+ exit(1);
+ }
+}
+/************************************************************/
+int DesignMap::read(string file) {
+ try {
+ ifstream in;
+ m->openInputFile(file, in);
+
+ string headers = m->getline(in); m->gobble(in);
+ vector<string> columnHeaders = m->splitWhiteSpace(headers);
+
+ namesOfCategories.clear();
+ indexCategoryMap.clear();
+ indexNameMap.clear();
+ designMap.clear();
+ map<int, string> originalGroupIndexes;
+ for (int i = 1; i < columnHeaders.size(); i++) { namesOfCategories.push_back(columnHeaders[i]); originalGroupIndexes[i-1] = columnHeaders[i]; }
+ if (columnHeaders.size() > 1) { defaultClass = columnHeaders[1]; }
+
+ //sort groups to keep consistent with how we store the groups in groupmap
+ sort(namesOfCategories.begin(), namesOfCategories.end());
+ for (int i = 0; i < namesOfCategories.size(); i++) { indexCategoryMap[namesOfCategories[i]] = i; }
+ int numCategories = namesOfCategories.size();
+
+ bool error = false;
+ string group;
+ totalCategories.resize(numCategories);
+ int count = 0;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ in >> group; m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + "\n"); }
+
+ //if group info, then read it
+ vector<string> categoryValues; categoryValues.resize(numCategories, "not found");
+ for (int i = 0; i < numCategories; i++) {
+ int thisIndex = indexCategoryMap[originalGroupIndexes[i]]; //find index of this category because we sort the values.
+ string temp = "not found";
+ in >> temp; categoryValues[thisIndex] = temp; m->gobble(in);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: value = " + temp + "\n"); }
+
+ //do we have this value for this category already
+ map<string, int>::iterator it = totalCategories[thisIndex].find(temp);
+ if (it == totalCategories[thisIndex].end()) { totalCategories[thisIndex][temp] = 1; }
+ else { totalCategories[thisIndex][temp]++; }
+ }
+
+
+ map<string, int>::iterator it = indexNameMap.find(group);
+ if (it == indexNameMap.end()) {
+ indexNameMap[group] = count;
+ designMap.push_back(categoryValues);
+ count++;
+ }else {
+ error = true;
+ m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct."); m->mothurOutEndLine();
+ }
+ }
+ in.close();
+
+ //sanity check
+ for (int i = 0; i < totalCategories.size(); i++) {
+ map<string, int>::iterator it = totalCategories[i].find(namesOfCategories[i]);
+ if (it != totalCategories[i].end()) { //we may have an old style design file since category name matches a value
+ m->mothurOut("\n[WARNING]: Your design file has a category and value for that category named " + namesOfCategories[i] + ". Perhaps you are using an old style design file without headers? If so, please correct."); m->mothurOutEndLine();
+ }
+ }
+
+ if (error) { m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "read");
+ exit(1);
+ }
+}
+/************************************************************/
+////groupName, returns first categories value.
+string DesignMap::get(string groupName) {
+ try {
+ string value = "not found";
+
+ map<string, int>::iterator it2 = indexNameMap.find(groupName);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return designMap[it2->second][0];
+ }
+
+ return value;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "get");
+ exit(1);
+ }
+}
+/************************************************************/
+////categoryName, returns category values.
+vector<string> DesignMap::getValues(string catName) {
+ try {
+ vector<string> values;
+
+ map<string, int>::iterator it2 = indexCategoryMap.find(catName);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: category " + catName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ for (map<string, int>::iterator it = totalCategories[it2->second].begin(); it != totalCategories[it2->second].end(); it++) {
+ values.push_back(it->first);
+ }
+ }
+
+ return values;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getValues");
+ exit(1);
+ }
+}
+
+/************************************************************/
+////groupName, category returns value. example F000132, sex -> male
+string DesignMap::get(string groupName, string categoryName) {
+ try {
+ string value = "not found";
+ map<string, int>::iterator it = indexCategoryMap.find(categoryName);
+ if (it == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: category " + categoryName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ map<string, int>::iterator it2 = indexNameMap.find(groupName);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ return designMap[it2->second][it->second];
+ }
+ }
+ return value;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "get");
+ exit(1);
+ }
+}
+/************************************************************/
+//add group, assumes order is correct
+int DesignMap::push_back(string group, vector<string> values) {
+ try {
+ map<string, int>::iterator it = indexNameMap.find(group);
+ if (it == indexNameMap.end()) {
+ if (values.size() != getNumCategories()) { m->mothurOut("[ERROR]: Your design file has a " + toString(getNumCategories()) + " categories and " + group + " has " + toString(values.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+
+ for (int i = 0; i < values.size(); i++) {
+ //do we have this value for this category already
+ map<string, int>::iterator it = totalCategories[i].find(values[i]);
+ if (it == totalCategories[i].end()) { totalCategories[i][values[i]] = 1; }
+ else { totalCategories[i][values[i]]++; }
+ }
+ int count = indexNameMap.size();
+ indexNameMap[group] = count;
+ designMap.push_back(values);
+ }else {
+ m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "push_back");
+ exit(1);
+ }
+}
+/************************************************************/
+//set values for group, does not need to set all values. assumes group is in table already
+int DesignMap::set(string group, map<string, string> values) {
+ try {
+ map<string, int>::iterator it = indexNameMap.find(group);
+ if (it != indexNameMap.end()) {
+ for (map<string, string>::iterator it2 = values.begin(); it2 != values.end(); it2++) {
+
+ map<string, int>::iterator it3 = indexCategoryMap.find(it2->first); //do we have this category
+ if (it3 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category called " + it2->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+ }else {
+ string oldCategory = designMap[it->second][it3->second];
+ //adjust totals for old category
+ int oldCount = totalCategories[it3->second][oldCategory];
+ if (oldCount == 1) { totalCategories[it3->second].erase(oldCategory); }
+ else { totalCategories[it3->second][oldCategory]--; }
+
+ designMap[it->second][it3->second] = it2->second; //reset value
+
+ //adjust totals for new category
+ map<string, int>::iterator it4 = totalCategories[it3->second].find(it2->second);
+ if (it4 == totalCategories[it3->second].end()) { totalCategories[it3->second][it2->second] = 1; }
+ else { totalCategories[it3->second][it2->second]++; }
+ }
+ }
+ }else {
+ m->mothurOut("[ERROR]: Your design file does not contain a group named " + group + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "set");
+ exit(1);
+ }
+}
+/************************************************************/
+//get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return 1. Only one group is male and from early or late.
+int DesignMap::getNumUnique(map<string, vector<string> > selected) {
+ try {
+ int num = 0;
+
+ //get indexes of categories
+ vector<int> indexes;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+ map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0;
+ }else { indexes.push_back(it2->second); }
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ bool hasAll = true; //innocent til proven guilty
+ int count = 0;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+ //check category to see if this group meets the requirements
+ if (!m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAll = false; it = selected.end(); }
+ count++;
+ }
+ if (hasAll) { num++; }
+ }
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNumUnique");
+ exit(1);
+ }
+}
+/************************************************************/
+//get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return 3. All three group have are either male or from early or late.
+int DesignMap::getNumShared(map<string, vector<string> > selected) {
+ try {
+ int num = 0;
+
+ //get indexes of categories
+ vector<int> indexes;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+ map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0;
+ }else { indexes.push_back(it2->second); }
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ bool hasAny = false; //guilty til proven innocent
+ int count = 0;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+ //check category to see if this group meets the requirements
+ if (m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAny = true; it = selected.end(); }
+ count++;
+ }
+ if (hasAny) { num++; }
+ }
+
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNumShared");
+ exit(1);
+ }
+}
+
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+vector<string> DesignMap::getNamesUnique(map<string, vector<string> > selected) {
+ try {
+ vector<string> names;
+
+ //get indexes of categories
+ vector<int> indexes;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+ map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { indexes.push_back(it2->second); }
+ }
+
+ //map int to name
+ map<int, string> reverse;
+ for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+ reverse[it->second] = it->first;
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ bool hasAll = true; //innocent til proven guilty
+ int count = 0;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+ //check category to see if this group meets the requirements
+ if (!m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAll = false; it = selected.end(); }
+ count++;
+ }
+ if (hasAll) {
+ map<int, string>::iterator it = reverse.find(i);
+ if (it == reverse.end()) {
+ m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { names.push_back(it->second); }
+ }
+ }
+
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNamesUnique");
+ exit(1);
+ }
+}
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+vector<string> DesignMap::getNamesShared(map<string, vector<string> > selected) {
+ try {
+ vector<string> names;
+
+ //get indexes of categories
+ vector<int> indexes;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) {
+ map<string, int>::iterator it2 = indexCategoryMap.find(it->first);
+ if (it2 == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { indexes.push_back(it2->second); }
+ }
+
+ //map int to name
+ map<int, string> reverse;
+ for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+ reverse[it->second] = it->first;
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ bool hasAny = false;
+ int count = 0;
+ for (map<string, vector<string> >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each
+ //check category to see if this group meets the requirements
+ if (m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAny = true; it = selected.end(); }
+ count++;
+ }
+ if (hasAny) {
+ map<int, string>::iterator it = reverse.find(i);
+ if (it == reverse.end()) {
+ m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { names.push_back(it->second); }
+ }
+ }
+
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNamesShared");
+ exit(1);
+ }
+}
+
+/************************************************************/
+//get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+// map<treatment - > early, late>, <sex -> male> would return F000132, F000142, F000138. All three group have are either male or from early or late.
+
+vector<string> DesignMap::getNames(string category, string value) {
+ try {
+ vector<string> names;
+
+ map<string, int>::iterator it = indexCategoryMap.find(category);
+ if (it == indexCategoryMap.end()) {
+ m->mothurOut("[ERROR]: category " + category + " is not in your design file. Please correct.\n"); m->control_pressed = true;
+ }else {
+ int column = it->second;
+
+ //map int to name
+ map<int, string> reverse;
+ for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
+ reverse[it2->second] = it2->first;
+ }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ if (designMap[i][column] == value) {
+ map<int, string>::iterator it2 = reverse.find(i);
+ if (it2 == reverse.end()) {
+ m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }else { names.push_back(it2->second); }
+ }
+ }
+ }
+ return names;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "getNames");
+ exit(1);
+ }
+}
+
+/************************************************************/
+int DesignMap::print(ofstream& out) {
+ try {
+
+ out << "group\t";
+ for (int i = 0; i < namesOfCategories.size(); i++) { out << namesOfCategories[i] << '\t'; }
+ out << endl;
+
+ map<int, string> reverse; //use this to preserve order
+ for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ map<int, string>::iterator itR = reverse.find(i);
+
+ if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+ out << itR->second << '\t';
+
+ for (int j = 0; j < namesOfCategories.size(); j++) {
+ out << designMap[i][j] << '\t';
+ }
+ out << endl;
+ }
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "print");
+ exit(1);
+ }
+}
+/************************************************************/
+//print specific categories
+int DesignMap::print(ofstream& out, vector<string> cats) {
+ try {
+
+ out << "group\t";
+ for (int i = 0; i < namesOfCategories.size(); i++) { if (m->inUsersGroups(namesOfCategories[i], cats)) { out << namesOfCategories[i] << '\t'; } }
+ out << endl;
+
+ map<int, string> reverse; //use this to preserve order
+ for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; }
+
+ for (int i = 0; i < designMap.size(); i++) {
+ map<int, string>::iterator itR = reverse.find(i);
+
+ if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+ out << itR->second << '\t';
+
+ for (int j = 0; j < namesOfCategories.size(); j++) {
+ if (m->inUsersGroups(namesOfCategories[i], cats)) {
+ out << designMap[i][j] << '\t';
+ }
+ }
+ out << endl;
+ }
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DesignMap", "print");
+ exit(1);
+ }
+}
+
+/************************************************************/
+
--- /dev/null
+//
+// designmap.h
+// Mothur
+//
+// Created by SarahsWork on 6/17/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__designmap__
+#define __Mothur__designmap__
+
+#include "mothurout.h"
+
+/* This class is a representation of the design file.
+
+ group treatment sex age
+ F000142 Early female young
+ F000132 Late male old
+ F000138 Mid male old
+
+
+ */
+
+class DesignMap {
+public:
+ DesignMap() { m = MothurOut::getInstance(); defaultClass = "not found"; }
+ DesignMap(string);
+ ~DesignMap() {}
+
+ int read(string);
+ string get(string, string); //groupName, category returns value. example F000132, sex -> male
+ string get(string); //groupName, returns first categories value. example F000132, -> late
+ vector<string> getValues(string); //categoryName, returns values. example treatment, -> early,late,mid
+ int set(string, map<string, string>); //groupName, map<category, value>
+ int push_back(string, vector<string>); //groupName, vector<value> - assumes you put values in order of getNamesOfCategories
+ vector<string> getNamesOfCategories() {
+ sort(namesOfCategories.begin(), namesOfCategories.end());
+ return namesOfCategories;
+ }
+ int getNumCategories() { return namesOfCategories.size(); }
+ int getNum() { return designMap.size(); }
+ int getNumUnique(map<string, vector<string> >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+ // map<treatment - > early, late>, <sex -> male> would return 1. Only one group is male and from early or late.
+ int getNumShared(map<string, vector<string> >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+ // map<treatment - > early, late>, <sex -> male> would return 3. All three group have are either male or from early or late.
+
+ vector<string> getNames();
+ vector<string> getNames(string, string); //get names group from category and value.
+ vector<string> getNamesUnique(map<string, vector<string> >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example:
+ // map<treatment - > early, late>, <sex -> male> would return F000132. F000132 is the only group which is male and from early or late.
+ vector<string> getNamesShared(map<string, vector<string> >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example:
+ // map<treatment - > early, late>, <sex -> male> would return F000132, F000142, F000138. All three group have are either male or from early or late.
+
+ int print(ofstream&);
+ int print(ofstream&, vector<string>); //print certain categories
+
+ string getDefaultClass() { return defaultClass; }
+
+private:
+ string defaultClass;
+ vector<string> namesOfCategories;
+ MothurOut* m;
+ vector< vector<string> > designMap;
+ vector< map<string, int> > totalCategories; //for each category, total groups assigned to it. vector[0] early -> 1, vector[1] male -> 2
+ map<string, int> indexNameMap;
+ map<string, int> indexCategoryMap;
+};
+
+
+#endif /* defined(__Mothur__designmap__) */
*/
#include "kruskalwalliscommand.h"
+#include "linearalgebra.h"
//**********************************************************************************************************************
-vector<string> KruskalWallisCommand::setParameters(){
+vector<string> KruskalWallisCommand::setParameters(){
try {
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
+ CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
+ //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
- CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pgroups);
- CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
}
}
//**********************************************************************************************************************
-string KruskalWallisCommand::getHelpString(){
+string KruskalWallisCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The kruskalwallis command parameter options are \n";
- helpString += "Kruskal–Wallis one-way analysis of variance is a non-parametric method for testing whether samples originate from the same distribution.";
- return helpString;
+ helpString += "The kruskal.wallis command allows you to ....\n";
+ helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n";
+ helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
+ helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old. class=treatment<Early|Late>-age<young|old>.\n";
+ helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
+ helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n";
+ return helpString;
}
catch(exception& e) {
m->errorOut(e, "KruskalWallisCommand", "getHelpString");
try {
string pattern = "";
- if (type == "summary") { pattern = "[filename],cooccurence.summary"; }
+ if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
}
}
//**********************************************************************************************************************
-KruskalWallisCommand::KruskalWallisCommand(){
+KruskalWallisCommand::KruskalWallisCommand(){
try {
- abort = true; calledHelp = true;
+ abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
- outputTypes["summary"] = tempOutNames;
-
+ outputTypes["kruskall-wallis"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
}
}
//**********************************************************************************************************************
-KruskalWallisCommand::KruskalWallisCommand(string option) {
+KruskalWallisCommand::KruskalWallisCommand(string option) {
try {
- abort = false; calledHelp = false;
-
+ abort = false; calledHelp = false;
+ allLines = 1;
+
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
+ //valid paramters for this command
vector<string> myArray = setParameters();
OptionParser parser(option);
map<string,string> parameters = parser.getParameters();
- map<string,string>::iterator it;
ValidParameters validParameter;
-
+ map<string,string>::iterator it;
//check to make sure all parameters are valid for command
- for (it = parameters.begin(); it != parameters.end(); it++) {
+ for (it = parameters.begin(); it != parameters.end(); it++) {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
+
+ vector<string> tempOutNames;
+ outputTypes["kruskall-wallis"] = tempOutNames;
- //get shared file
- sharedfile = validParameter.validFile(parameters, "shared", true);
- if (sharedfile == "not open") { sharedfile = ""; abort = true; }
- else if (sharedfile == "not found") {
- //if there is a current shared file, use it
- sharedfile = m->getSharedFile();
- if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
- }else { m->setSharedFile(sharedfile); }
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); }
-
- groups = validParameter.validFile(parameters, "groups", false);
- if (groups == "not found") { groups = ""; }
- else {
- m->splitAtDash(groups, Groups);
- }
- m->setGroups(Groups);
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
- string path;
- it = parameters.find("shared");
+
+ string path;
+ it = parameters.find("design");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["desing"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("shared");
//user has given a template file
- if(it != parameters.end()){
+ if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["shared"] = inputDir + it->second; }
}
+ }
+
+ //get shared file, it is required
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") {
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setSharedFile(sharedfile); }
+
+ //get shared file, it is required
+ designfile = validParameter.validFile(parameters, "design", true);
+ if (designfile == "not open") { designfile = ""; abort = true; }
+ else if (designfile == "not found") {
+ //if there is a current shared file, use it
+ designfile = m->getDesignFile();
+ if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setDesignFile(designfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
}
-
- vector<string> tempOutNames;
- outputTypes["summary"] = tempOutNames;
-
-
+
+ string label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+
+ mclass = validParameter.validFile(parameters, "class", false);
+ if (mclass == "not found") { mclass = ""; }
+
+ classes = validParameter.validFile(parameters, "classes", false);
+ if (classes == "not found") { classes = ""; }
+
}
-
+
}
catch(exception& e) {
m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
}
}
//**********************************************************************************************************************
+
int KruskalWallisCommand::execute(){
try {
+
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- InputData* input = new InputData(sharedfile, "sharedfile");
- vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
- string lastLabel = lookup[0]->getLabel();
-
-
- //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
- set<string> processedLabels;
- set<string> userLabels = labels;
-
- ofstream out;
- map<string,string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
- string outputFileName = getOutputFileName("summary",variables);
- m->openOutputFile(outputFileName, out);
- outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
- out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
- out << "H\tpvalue\n";
-
- //math goes here
+ DesignMap designMap(designfile);
+ //if user set classes set groups=those classes
+ if (classes != "") {
+ map<string, vector<string> > thisClasses = m->parseClasses(classes);
+ vector<string> groups = designMap.getNamesUnique(thisClasses);
+ if (groups.size() != 0) { m->setGroups(groups); }
+ else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
+ }
- int N = m->getNumGroups();
- double H;
- double tmp = 0.0;
- vector<groupRank> vec;
- vector<string> groups = m->getGroups();
- string group;
- int count;
- double sum;
-
- //merge all groups into a vector
+ //if user did not select class use first column
+ if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
- //rank function here
- assignRank(vec);
- //populate counts and ranSums vectors
- for (int i=0;i<N;i++) {
- count = 0;
- sum = 0;
- group = groups[i];
- for(int j;j<vec.size();j++) {
- if (vec[j].group == group) {
- count++;
- sum = sum + vec[j].rank;
- }
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup, designMap);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
}
- counts[i] = count;
- rankSums[i] = sum;
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup, designMap);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundVectors();
}
- //test statistic
- for (int i=0;i<N;i++) { tmp = tmp + (pow(rankSums[i],2) / counts[i]); }
-
- H = (12 / (N*(N+1))) * tmp - (3*(N+1));
-
- //ss = tmp - pow(accumulate(rankSums.begin(), rankSums.end(), 0), 2);
+ if (m->control_pressed) { return 0; }
- //H = ss / ( (N * (N + 1))/12 );
-
- //correction for ties?
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
- //p-value calculation
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+ process(lookup, designMap);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
- return 0;
- }
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
catch(exception& e) {
m->errorOut(e, "KruskalWallisCommand", "execute");
exit(1);
}
}
//**********************************************************************************************************************
-void KruskalWallisCommand::assignRank(vector<groupRank> &vec) {
- try {
- double rank = 1;
- double numRanks, avgRank, j;
- vector<groupRank>::iterator it, oldit;
-
- sort (vec.begin(), vec.end(), comparevalue);
-
- it = vec.begin();
-
- while ( it != vec.end() ) {
- j = rank;
- oldit = it;
- if (!equalvalue(*it, *(it+1))) {
- (*it).rank = rank;
- rank = rank+1;
- it++; }
- else {
- while(equalrank(*it, *(it+1))) {
- j = j + (j+1);
- rank++;
- it++;
- }
- numRanks = double (distance(oldit, it));
- avgRank = j / numRanks;
- while(oldit != it) {
- (*oldit).rank = avgRank;
- oldit++;
- }
- }
+int KruskalWallisCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+ try {
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("kruskall-wallis",variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
+ out << "OTULabel\tKW\tPvalue\n";
+
+ int numBins = lookup[0]->getNumBins();
+ //sanity check to make sure each treatment has a group in the shared file
+ set<string> treatments;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ treatments.insert(treatment);
}
+ if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
-
+ LinearAlgebra linear;
+ for (int i = 0; i < numBins; i++) {
+ if (m->control_pressed) { break; }
+
+ vector<spearmanRank> values;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ spearmanRank temp(treatment, lookup[j]->getAbundance(i));
+ values.push_back(temp);
+ }
+
+ double pValue = 0.0;
+ double H = linear.calcKruskalWallis(values, pValue);
+
+ //output H and signifigance
+ out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
+ }
+ out.close();
+
+ return 0;
}
- catch(exception& e) {
- m->errorOut(e, "KruskalWallisCommand", "getRank");
+ catch(exception& e) {
+ m->errorOut(e, "KruskalWallisCommand", "process");
exit(1);
}
-
-}
-//**********************************************************************************************************************
-void KruskalWallisCommand::assignValue(vector<groupRank> &vec) {
-
}
//**********************************************************************************************************************
-//**********************************************************************************************************************
-//**********************************************************************************************************************
+
#include "command.hpp"
#include "inputdata.h"
-#include "sharedrabundvector.h"
-
+#include "designmap.h"
class KruskalWallisCommand : public Command {
~KruskalWallisCommand(){}
vector<string> setParameters();
- string getCommandName() { return "kruskalwallis"; }
+ string getCommandName() { return "kruskal.wallis"; }
string getCommandCategory() { return "Hypothesis Testing"; }
string getOutputPattern(string);
string getHelpString();
- string getCitation() { return "http://www.mothur.org/wiki/kruskalwallis"; }
+ string getCitation() { return "http://www.mothur.org/wiki/Kruskal.wallis"; }
string getDescription() { return "Non-parametric method for testing whether samples originate from the same distribution."; }
struct groupRank {
private:
- string outputDir, sharedfile, groups;
- bool abort;
+ bool abort, allLines;
+ string outputDir, sharedfile, designfile, mclass, classes;
+ vector<string> outputNames;
set<string> labels;
- vector<string> outputNames, Groups;
- vector<int> counts;
- vector<double> rankSums;
- vector<double> rankMeans;
-
-
-
- static bool comparevalue(const groupRank &a, const groupRank &b) { return a.value < b.value; }
- static bool equalvalue(const groupRank &a, const groupRank &b) { return a.value == b.value; }
- static bool comparerank(const groupRank &a, const groupRank &b) { return a.rank < b.rank; }
- static bool equalrank(const groupRank &a, const groupRank &b) { return a.rank == b.rank; }
- static bool equalgroup(const groupRank &a, const groupRank &b) { return a.group == b.group; }
+ int process(vector<SharedRAbundVector*>&, DesignMap&);
};
#endif /* KRUSKALWALLISCOMMAND_H */
//
#include "lefsecommand.h"
+#include "linearalgebra.h"
+
+//**********************************************************************************************************************
+vector<string> LefseCommand::setParameters(){
+ try {
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
+ CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
+ CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
+ CommandParameter palpha("anova_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
+ CommandParameter pwalpha("wilcoxon_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
+ //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string LefseCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The lefse command allows you to ....\n";
+ helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, wilcoxon_alpha, anova_alpha.\n";
+ helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
+ helpString += "The subclass parameter is used to indicate the .....If none is provided second category is used, or if only one category subclass is ignored. \n";
+ helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old. class=treatment<Early|Late>-age<young|old>.\n";
+ helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
+ helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string LefseCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "summary") { pattern = "[filename],[distance],lefse_summary"; }
+ else if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; }
+ else if (type == "wilcoxon") { pattern = "[filename],[distance],wilcoxon"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+LefseCommand::LefseCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+ outputTypes["kruskall-wallis"] = tempOutNames;
+ outputTypes["wilcoxon"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "LefseCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+LefseCommand::LefseCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+ allLines = 1;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+ outputTypes["kruskall-wallis"] = tempOutNames;
+ outputTypes["wilcoxon"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ string path;
+ it = parameters.find("design");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["desing"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+ }
+
+ //get shared file, it is required
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") {
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setSharedFile(sharedfile); }
+
+ //get shared file, it is required
+ designfile = validParameter.validFile(parameters, "design", true);
+ if (designfile == "not open") { designfile = ""; abort = true; }
+ else if (designfile == "not found") {
+ //if there is a current shared file, use it
+ designfile = m->getDesignFile();
+ if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setDesignFile(designfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+ }
+
+ string label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+
+ mclass = validParameter.validFile(parameters, "class", false);
+ if (mclass == "not found") { mclass = ""; }
+
+ subclass = validParameter.validFile(parameters, "subclass", false);
+ if (subclass == "not found") { subclass = ""; }
+
+ classes = validParameter.validFile(parameters, "classes", false);
+ if (classes == "not found") { classes = ""; }
+
+ string temp = validParameter.validFile(parameters, "anova_alpha", false);
+ if (temp == "not found") { temp = "0.05"; }
+ m->mothurConvert(temp, anovaAlpha);
+
+ temp = validParameter.validFile(parameters, "wilcoxon_alpha", false);
+ if (temp == "not found") { temp = "0.05"; }
+ m->mothurConvert(temp, wilcoxonAlpha);
+
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "LefseCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int LefseCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ DesignMap designMap(designfile);
+ //if user set classes set groups=those classes
+ if (classes != "") {
+ map<string, vector<string> > thisClasses = m->parseClasses(classes);
+ vector<string> groups = designMap.getNamesUnique(thisClasses);
+ if (groups.size() != 0) { m->setGroups(groups); }
+ else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
+ }
+
+ //if user did not select class use first column
+ if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
+
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup, designMap);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup, designMap);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundVectors();
+ }
+
+ if (m->control_pressed) { return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+ process(lookup, designMap);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int LefseCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+ try {
+ //run kruskal wallis on each otu
+ vector<int> significantOtuLabels = runKruskalWallis(lookup, designMap);
+
+ //check for subclass
+ if (subclass != "") { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "process");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
+ try {
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("kruskall-wallis",variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
+ out << "OTULabel\tKW\tPvalue\n";
+
+ vector<int> significantOtuLabels;
+ int numBins = lookup[0]->getNumBins();
+ //sanity check to make sure each treatment has a group in the shared file
+ set<string> treatments;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ treatments.insert(treatment);
+ }
+ if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
+
+ LinearAlgebra linear;
+ for (int i = 0; i < numBins; i++) {
+ if (m->control_pressed) { break; }
+
+ vector<spearmanRank> values;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ spearmanRank temp(treatment, lookup[j]->getAbundance(i));
+ values.push_back(temp);
+ }
+
+ double pValue = 0.0;
+ double H = linear.calcKruskalWallis(values, pValue);
+
+ //output H and signifigance
+ out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
+
+ if (pValue < anovaAlpha) { significantOtuLabels.push_back(i); }
+ }
+ out.close();
+
+ return significantOtuLabels;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "runKruskalWallis");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) {
+ try {
+ vector<int> significantOtuLabels;
+ //if it exists and meets the following requirements run Wilcoxon
+ /*
+ 1. Subclass members all belong to same main class
+ 2. Number of groups in each subclass is the same
+ 3. anything else??
+
+ */
+ vector<string> subclasses;
+ map<string, string> subclass2Class;
+ map<string, int> subclassCounts;
+ map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below.
+ bool error = false;
+ for (int j = 0; j < lookup.size(); j++) {
+ string group = lookup[j]->getGroup();
+ string treatment = designMap.get(group, mclass); //get value for this group in this category
+ string thisSub = designMap.get(group, subclass);
+ map<string, string>::iterator it = subclass2Class.find(thisSub);
+ if (it == subclass2Class.end()) {
+ subclass2Class[thisSub] = treatment;
+ subclassCounts[thisSub] = 1;
+ vector<int> temp; temp.push_back(j);
+ subClass2GroupIndex[thisSub] = temp;
+ }
+ else {
+ subclassCounts[thisSub]++;
+ subClass2GroupIndex[thisSub].push_back(j);
+ if (it->second != treatment) {
+ error = true;
+ m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n");
+ }
+ }
+ }
+
+ if (error) { return significantOtuLabels; }
+ else { //check counts to make sure subclasses are the same size
+ set<int> counts;
+ for (map<string, int>::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); }
+ if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n");
+ return significantOtuLabels; }
+ }
+
+ int numBins = lookup[0]->getNumBins();
+ vector<compGroup> comp;
+ //find comparisons and fill comp
+ map<string, int>::iterator itB;
+ for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){
+ itB = it;itB++;
+ for(itB;itB!=subclassCounts.end();itB++){
+ compGroup temp(it->first,itB->first);
+ comp.push_back(temp);
+ }
+ }
+
+ int numComp = comp.size();
+ if (numComp < 2) { m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n");
+ return significantOtuLabels; }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("wilcoxon",variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName);
+ out << "OTULabel\tComparision\tWilcoxon\tPvalue\n";
+
+ LinearAlgebra linear;
+ for (int i = 0; i < numBins; i++) {
+ if (m->control_pressed) { break; }
+
+ if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis
+
+ bool sig = false;
+ //for each subclass comparision
+ for (int j = 0; j < numComp; j++) {
+ //fill x and y with this comparisons data
+ vector<double> x; vector<double> y;
+
+ //fill x and y
+ vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass
+ for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); }
+
+ vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass
+ for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); }
+
+ double pValue = 0.0;
+ double H = linear.calcWilcoxon(x, y, pValue);
+
+ //output H and signifigance
+ out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl;
+
+ //set sig - not sure how yet
+ }
+ if (sig) { significantOtuLabels.push_back(i); }
+ }
+ }
+ out.close();
+
+ return significantOtuLabels;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LefseCommand", "runWilcoxon");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+
#include "command.hpp"
/*
- Columns = groups, rows are OTUs, class = design??
+ Columns = groups, rows are OTUs, class = design
From http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload
Input data consist of a collection of m samples (columns) each made up of n numerical features (rows, typically normalized per-sample, red representing high values and green low). These samples are labeled with a class (taking two or more possible values) that represents the main biological hypothesis under investigation; they may also have one or more subclass labels reflecting within-class groupings.
Step 3: the resulting subset of vectors is used to build a Linear Discriminant Analysis model from which the relative difference among classes is used to rank the features. The final output thus consists of a list of features that are discriminative with respect to the classes, consistent with the subclass grouping within classes, and ranked according to the effect size with which they differentiate classes.
*/
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "designmap.h"
+
+/**************************************************************************************************/
+
+class LefseCommand : public Command {
+public:
+ LefseCommand(string);
+ LefseCommand();
+ ~LefseCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "lefse"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+
+ string getOutputPattern(string);
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Lefse"; }
+ string getDescription() { return "brief description"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort, allLines;
+ string outputDir, sharedfile, designfile, mclass, subclass, classes;
+ vector<string> outputNames;
+ set<string> labels;
+ float anovaAlpha, wilcoxonAlpha;
+
+ int process(vector<SharedRAbundVector*>&, DesignMap&);
+ vector<int> runKruskalWallis(vector<SharedRAbundVector*>&, DesignMap&);
+ vector<int> runWilcoxon(vector<SharedRAbundVector*>&, DesignMap&, vector<int>);
+
+};
+
+/**************************************************************************************************/
+
+
+
+
#endif /* defined(__Mothur__lefsecommand__) */
exit(1);
}
}
-
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcKruskalWallis(vector<spearmanRank>& values, double& pValue){
+ try {
+ double H;
+ set<string> treatments;
+
+ //rank values
+ sort(values.begin(), values.end(), compareSpearman);
+ vector<spearmanRank*> ties;
+ int rankTotal = 0;
+ vector<int> TIES;
+ for (int j = 0; j < values.size(); j++) {
+ treatments.insert(values[j].name);
+ rankTotal += (j+1);
+ ties.push_back(&(values[j]));
+
+ if (j != values.size()-1) { // you are not the last so you can look ahead
+ if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue
+ if (ties.size() > 1) { TIES.push_back(ties.size()); }
+ for (int k = 0; k < ties.size(); k++) {
+ double thisrank = rankTotal / (double) ties.size();
+ (*ties[k]).score = thisrank;
+ }
+ ties.clear();
+ rankTotal = 0;
+ }
+ }else { // you are the last one
+ if (ties.size() > 1) { TIES.push_back(ties.size()); }
+ for (int k = 0; k < ties.size(); k++) {
+ double thisrank = rankTotal / (double) ties.size();
+ (*ties[k]).score = thisrank;
+ }
+ }
+ }
+
+
+ // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1)
+ map<string, double> sums;
+ map<string, double> counts;
+ for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; }
+
+ for (int j = 0; j < values.size(); j++) {
+ sums[values[j].name] += values[j].score;
+ counts[values[j].name]+= 1.0;
+ }
+
+ double middleTerm = 0.0;
+ for (set<string>::iterator it = treatments.begin(); it != treatments.end(); it++) {
+ middleTerm += ((sums[*it]*sums[*it])/counts[*it]);
+ }
+
+ double firstTerm = 12 / (double) (values.size()*(values.size()+1));
+ double lastTerm = 3 * (values.size()+1);
+
+ H = firstTerm * middleTerm - lastTerm;
+
+ //adjust for ties
+ if (TIES.size() != 0) {
+ double sum = 0.0;
+ for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); }
+ double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size()));
+ H /= result;
+ }
+
+ //Numerical Recipes pg221
+ pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0));
+
+ return H;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "calcKruskalWallis");
+ exit(1);
+ }
+}
/*********************************************************************************************************************************/
//assumes both matrices are square and the same size
double LinearAlgebra::calcKendall(vector< vector<double> >& euclidDists, vector< vector<double> >& userDists){
exit(1);
}
}
+/*********************************************************************************************************************************/
+double LinearAlgebra::calcWilcoxon(vector<double>& x, vector<double>& y, double& sig){
+ try {
+ if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; }
+
+ double W = 0.0;
+ sig = 0.0;
+
+ vector<double> signPairs;
+ vector<spearmanRank> absV;
+ for (int i = 0; i < x.size(); i++) {
+ if (m->control_pressed) { return W; }
+ double temp = y[i]-x[i];
+ double signV = sign(temp);
+ if (signV != 0) { // exclude zeros
+ spearmanRank member(toString(i), abs(temp));
+ absV.push_back(member);
+ signPairs.push_back(signV);
+ }
+ }
+
+ //rank absV
+ //sort xscores
+ sort(absV.begin(), absV.end(), compareSpearman);
+
+ //convert scores to ranks of x
+ vector<spearmanRank*> ties;
+ int rankTotal = 0;
+ for (int j = 0; j < absV.size(); j++) {
+ if (m->control_pressed) { return W; }
+ rankTotal += (j+1);
+ ties.push_back(&(absV[j]));
+
+ if (j != absV.size()-1) { // you are not the last so you can look ahead
+ if (absV[j].score != absV[j+1].score) { // you are done with ties, rank them and continue
+ for (int k = 0; k < ties.size(); k++) {
+ float thisrank = rankTotal / (float) ties.size();
+ (*ties[k]).score = thisrank;
+ }
+ ties.clear();
+ rankTotal = 0;
+ }
+ }else { // you are the last one
+ for (int k = 0; k < ties.size(); k++) {
+ float thisrank = rankTotal / (float) ties.size();
+ (*ties[k]).score = thisrank;
+ }
+ }
+ }
+ //sum ranks times sign
+ for (int i = 0; i < absV.size(); i++) {
+ if (m->control_pressed) { return W; }
+ W += (absV[i].score*signPairs[i]);
+ }
+ W = abs(W);
+
+ //find zScore
+ cout << "still need to find sig!!" << endl;
+
+ return W;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "calcWilcoxon");
+ exit(1);
+ }
+}
/*********************************************************************************************************************************/
double LinearAlgebra::calcSpearman(vector<double>& x, vector<double>& y, double& sig){
try {
double calcPearson(vector<vector<double> >&, vector<vector<double> >&);
double calcSpearman(vector<vector<double> >&, vector<vector<double> >&);
double calcKendall(vector<vector<double> >&, vector<vector<double> >&);
+ double calcKruskalWallis(vector<spearmanRank>&, double&);
+ double calcWilcoxon(vector<double>&, vector<double>&, double&);
double calcPearson(vector<double>&, vector<double>&, double&);
double calcSpearman(vector<double>&, vector<double>&, double&);
vector<double> solveEquations(vector<vector<double> >, vector<double>);
vector<float> solveEquations(vector<vector<float> >, vector<float>);
vector<vector<double> > getInverse(vector<vector<double> >);
-
+
private:
MothurOut* m;
double betacf(const double, const double, const double);
double betai(const double, const double, const double);
double gammln(const double);
- double gammp(const double, const double);
double gammq(const double, const double);
double gser(double&, const double, const double, double&);
double gcf(double&, const double, const double, double&);
double erfcc(double);
+ double gammp(const double, const double);
double ran0(int&); //for testing
double ran1(int&); //for testing
}
m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ".\n"); }
//check to make sure both are able to be opened
ifstream in2;
//
#include "makelefsecommand.h"
+#include "designmap.h"
//**********************************************************************************************************************
vector<string> MakeLefseCommand::setParameters(){
ofstream out;
m->openOutputFile(outputFile, out);
- GroupMap* designMap = NULL;
+ DesignMap* designMap = NULL;
if (designfile != "") {
- designMap = new GroupMap(designfile);
- designMap->readDesignMap();
+ designMap = new DesignMap(designfile);
+ vector<string> categories = designMap->getNamesOfCategories();
- out << "treatment\t";
- for (int i = 0; i < lookup.size(); i++) {
- string treatment = designMap->getGroup(lookup[i]->getGroup());
- if (treatment == "not found") {
- m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n");
- }else { out << treatment << '\t'; }
+ for (int j = 0; j < categories.size(); j++) {
+ out << categories[j] << "\t";
+ for (int i = 0; i < lookup.size(); i++) {
+ if (m->control_pressed) { out.close(); delete designMap; return 0; }
+ string value = designMap->get(lookup[i]->getGroup(), categories[j]);
+ if (value == "not found") {
+ m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true;
+ }else { out << value << '\t'; }
+ }
+ out << endl;
}
- out << endl;
}
out << "group\t";
seqPriorityNode(int n, string s, string nm) : numIdentical(n), seq(s), name(nm) {}
~seqPriorityNode() {}
};
+/************************************************************/
+struct compGroup {
+ string group1;
+ string group2;
+ compGroup() {}
+ compGroup(string s, string nm) : group1(s), group2(nm) {}
+ string getCombo() { return group1+"-"+group2; }
+ ~compGroup() {}
+};
/***************************************************************/
struct spearmanRank {
string name;
//sorts lowest to highest
inline bool compareSequenceDistance(seqDist left, seqDist right){
return (left.dist < right.dist);
-}
+}
+//********************************************************************************************************************
+//returns sign of double
+inline double sign(double temp){
+ //find sign
+ if (temp > 0) { return 1.0; }
+ else if (temp < 0) { return -1.0; }
+ return 0;
+}
/***********************************************************************/
// snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
}
}
+//**********************************************************************************************************************
+
+map<string, vector<string> > MothurOut::parseClasses(string classes){
+ try {
+ map<string, vector<string> > parts;
+
+ //treatment<Early|Late>-age<young|old>
+ vector<string> pieces; splitAtDash(classes, pieces); // -> treatment<Early|Late>, age<young|old>
+
+ for (int i = 0; i < pieces.size(); i++) {
+ string category = ""; string value = "";
+ bool foundOpen = false;
+ for (int j = 0; j < pieces[i].length(); j++) {
+ if (control_pressed) { return parts; }
+
+ if (pieces[i][j] == '<') { foundOpen = true; }
+ else if (pieces[i][j] == '>') { j += pieces[i].length(); }
+ else {
+ if (!foundOpen) { category += pieces[i][j]; }
+ else { value += pieces[i][j]; }
+ }
+ }
+ vector<string> values; splitAtChar(value, values, '|');
+ parts[category] = values;
+ }
+
+ return parts;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "parseClasses");
+ exit(1);
+ }
+}
/***********************************************************************/
string MothurOut::hasPath(string longName){
string makeList(vector<string>&);
bool isSubset(vector<string>, vector<string>); //bigSet, subset
int checkName(string&);
+ map<string, vector<string> > parseClasses(string);
//math operation
int factorial(int num);
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- commandFactory = CommandFactory::getInstance();
-
- commandFactory->setLogfileName(name, append);
+ commandFactory = CommandFactory::getInstance();
+
+ string directory = m->hasPath(name);
+ if (directory == "") {
+ commandFactory->setLogfileName(name, append);
+ }else if (m->dirCheck(directory)) {
+ commandFactory->setLogfileName(name, append);
+ }
return 0;
}
break;
}
+ if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
+
if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
group = it->first;
string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
break;
}
+ if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; }
+
if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
group = it->first;
if (!keepForward) {