From b25ede2ad307ae76f8a610443e0ec3ec69621ce7 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Mon, 24 Jun 2013 14:31:29 -0400 Subject: [PATCH] added kruskal.wallis command. added worked on make.lefse. working of lefse command. fixed bug in trim.oligos when there was an exact match to paired primers or barcodes that overlapped causing the sequence to be shorter than the barcodes and primers. added design map class. added kruskal wallis and wilcoxon functions to linear algebra class. --- Mothur.xcodeproj/project.pbxproj | 6 + commandfactory.cpp | 10 + designmap.cpp | 502 +++++++++++++++++++++++++++++++ designmap.h | 70 +++++ kruskalwalliscommand.cpp | 360 +++++++++++++--------- kruskalwalliscommand.h | 25 +- lefsecommand.cpp | 480 +++++++++++++++++++++++++++++ lefsecommand.h | 45 ++- linearalgebra.cpp | 141 ++++++++- linearalgebra.h | 6 +- makecontigscommand.cpp | 1 + makelefsecommand.cpp | 24 +- mothur.h | 19 +- mothurout.cpp | 33 ++ mothurout.h | 1 + setlogfilecommand.cpp | 11 +- trimoligos.cpp | 4 + 17 files changed, 1558 insertions(+), 180 deletions(-) create mode 100644 designmap.cpp create mode 100644 designmap.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 60a929d..b09bab4 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -54,6 +54,7 @@ A774104814696F320098E6AC /* myseqdist.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774104614696F320098E6AC /* myseqdist.cpp */; }; A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; }; A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; }; + A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77916E6176F7F7600EEFE18 /* designmap.cpp */; }; A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; }; A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; }; A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; }; @@ -490,6 +491,8 @@ A77410F514697C300098E6AC /* seqnoise.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqnoise.h; sourceTree = ""; }; A778FE69134CA6CA00C0BA33 /* getcommandinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcommandinfocommand.h; sourceTree = ""; }; A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = ""; }; + A77916E6176F7F7600EEFE18 /* designmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = designmap.cpp; sourceTree = ""; }; + A77916E7176F7F7600EEFE18 /* designmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = designmap.h; sourceTree = ""; }; A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = ""; }; A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = ""; }; A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparcccommand.h; sourceTree = ""; }; @@ -1773,6 +1776,8 @@ A7E9B6BD12D37EC400DA6239 /* database.cpp */, A7E9B6BE12D37EC400DA6239 /* database.hpp */, A7E9B6BF12D37EC400DA6239 /* datavector.hpp */, + A77916E7176F7F7600EEFE18 /* designmap.h */, + A77916E6176F7F7600EEFE18 /* designmap.cpp */, A7E9B6CE12D37EC400DA6239 /* distancedb.hpp */, A7E9B6CD12D37EC400DA6239 /* distancedb.cpp */, A7E9B6DE12D37EC400DA6239 /* fastamap.cpp */, @@ -2382,6 +2387,7 @@ A7CFA4311755401800D9ED4D /* renameseqscommand.cpp in Sources */, A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */, A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */, + A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index 407b674..41a6729 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -146,6 +146,8 @@ #include "makelookupcommand.h" #include "renameseqscommand.h" #include "makelefsecommand.h" +#include "lefsecommand.h" +#include "kruskalwalliscommand.h" /*******************************************************/ @@ -315,6 +317,8 @@ CommandFactory::CommandFactory(){ commands["make.lookup"] = "make.lookup"; commands["rename.seqs"] = "rename.seqs"; commands["make.lefse"] = "make.lefse"; + commands["lefse"] = "lefse"; + commands["kruskal.wallis"] = "kruskal.wallis"; } @@ -540,6 +544,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "make.lookup") { command = new MakeLookupCommand(optionString); } else if(commandName == "rename.seqs") { command = new RenameSeqsCommand(optionString); } else if(commandName == "make.lefse") { command = new MakeLefseCommand(optionString); } + else if(commandName == "lefse") { command = new LefseCommand(optionString); } + else if(commandName == "kruskal.wallis") { command = new KruskalWallisCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -706,6 +712,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "make.lookup") { pipecommand = new MakeLookupCommand(optionString); } else if(commandName == "rename.seqs") { pipecommand = new RenameSeqsCommand(optionString); } else if(commandName == "make.lefse") { pipecommand = new MakeLefseCommand(optionString); } + else if(commandName == "lefse") { pipecommand = new LefseCommand(optionString); } + else if(commandName == "kruskal.wallis") { pipecommand = new KruskalWallisCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -858,6 +866,8 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "make.lookup") { shellcommand = new MakeLookupCommand(); } else if(commandName == "rename.seqs") { shellcommand = new RenameSeqsCommand(); } else if(commandName == "make.lefse") { shellcommand = new MakeLefseCommand(); } + else if(commandName == "lefse") { shellcommand = new LefseCommand(); } + else if(commandName == "kruskal.wallis") { shellcommand = new KruskalWallisCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/designmap.cpp b/designmap.cpp new file mode 100644 index 0000000..dff9700 --- /dev/null +++ b/designmap.cpp @@ -0,0 +1,502 @@ +// +// designmap.cpp +// Mothur +// +// Created by SarahsWork on 6/17/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#include "designmap.h" + +/************************************************************/ +DesignMap::DesignMap(string file) { + try { + m = MothurOut::getInstance(); + defaultClass = "not found"; + read(file); + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "DesignMap"); + exit(1); + } +} +/************************************************************/ +int DesignMap::read(string file) { + try { + ifstream in; + m->openInputFile(file, in); + + string headers = m->getline(in); m->gobble(in); + vector columnHeaders = m->splitWhiteSpace(headers); + + namesOfCategories.clear(); + indexCategoryMap.clear(); + indexNameMap.clear(); + designMap.clear(); + map originalGroupIndexes; + for (int i = 1; i < columnHeaders.size(); i++) { namesOfCategories.push_back(columnHeaders[i]); originalGroupIndexes[i-1] = columnHeaders[i]; } + if (columnHeaders.size() > 1) { defaultClass = columnHeaders[1]; } + + //sort groups to keep consistent with how we store the groups in groupmap + sort(namesOfCategories.begin(), namesOfCategories.end()); + for (int i = 0; i < namesOfCategories.size(); i++) { indexCategoryMap[namesOfCategories[i]] = i; } + int numCategories = namesOfCategories.size(); + + bool error = false; + string group; + totalCategories.resize(numCategories); + int count = 0; + while (!in.eof()) { + + if (m->control_pressed) { break; } + + in >> group; m->gobble(in); + if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + "\n"); } + + //if group info, then read it + vector categoryValues; categoryValues.resize(numCategories, "not found"); + for (int i = 0; i < numCategories; i++) { + int thisIndex = indexCategoryMap[originalGroupIndexes[i]]; //find index of this category because we sort the values. + string temp = "not found"; + in >> temp; categoryValues[thisIndex] = temp; m->gobble(in); + + if (m->debug) { m->mothurOut("[DEBUG]: value = " + temp + "\n"); } + + //do we have this value for this category already + map::iterator it = totalCategories[thisIndex].find(temp); + if (it == totalCategories[thisIndex].end()) { totalCategories[thisIndex][temp] = 1; } + else { totalCategories[thisIndex][temp]++; } + } + + + map::iterator it = indexNameMap.find(group); + if (it == indexNameMap.end()) { + indexNameMap[group] = count; + designMap.push_back(categoryValues); + count++; + }else { + error = true; + m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct."); m->mothurOutEndLine(); + } + } + in.close(); + + //sanity check + for (int i = 0; i < totalCategories.size(); i++) { + map::iterator it = totalCategories[i].find(namesOfCategories[i]); + if (it != totalCategories[i].end()) { //we may have an old style design file since category name matches a value + m->mothurOut("\n[WARNING]: Your design file has a category and value for that category named " + namesOfCategories[i] + ". Perhaps you are using an old style design file without headers? If so, please correct."); m->mothurOutEndLine(); + } + } + + if (error) { m->control_pressed = true; } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "read"); + exit(1); + } +} +/************************************************************/ +////groupName, returns first categories value. +string DesignMap::get(string groupName) { + try { + string value = "not found"; + + map::iterator it2 = indexNameMap.find(groupName); + if (it2 == indexNameMap.end()) { + m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->control_pressed = true; + }else { + return designMap[it2->second][0]; + } + + return value; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "get"); + exit(1); + } +} +/************************************************************/ +////categoryName, returns category values. +vector DesignMap::getValues(string catName) { + try { + vector values; + + map::iterator it2 = indexCategoryMap.find(catName); + if (it2 == indexCategoryMap.end()) { + m->mothurOut("[ERROR]: category " + catName + " is not in your design file. Please correct.\n"); m->control_pressed = true; + }else { + for (map::iterator it = totalCategories[it2->second].begin(); it != totalCategories[it2->second].end(); it++) { + values.push_back(it->first); + } + } + + return values; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "getValues"); + exit(1); + } +} + +/************************************************************/ +////groupName, category returns value. example F000132, sex -> male +string DesignMap::get(string groupName, string categoryName) { + try { + string value = "not found"; + map::iterator it = indexCategoryMap.find(categoryName); + if (it == indexCategoryMap.end()) { + m->mothurOut("[ERROR]: category " + categoryName + " is not in your design file. Please correct.\n"); m->control_pressed = true; + }else { + map::iterator it2 = indexNameMap.find(groupName); + if (it2 == indexNameMap.end()) { + m->mothurOut("[ERROR]: group " + groupName + " is not in your design file. Please correct.\n"); m->control_pressed = true; + }else { + return designMap[it2->second][it->second]; + } + } + return value; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "get"); + exit(1); + } +} +/************************************************************/ +//add group, assumes order is correct +int DesignMap::push_back(string group, vector values) { + try { + map::iterator it = indexNameMap.find(group); + if (it == indexNameMap.end()) { + if (values.size() != getNumCategories()) { m->mothurOut("[ERROR]: Your design file has a " + toString(getNumCategories()) + " categories and " + group + " has " + toString(values.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; } + + for (int i = 0; i < values.size(); i++) { + //do we have this value for this category already + map::iterator it = totalCategories[i].find(values[i]); + if (it == totalCategories[i].end()) { totalCategories[i][values[i]] = 1; } + else { totalCategories[i][values[i]]++; } + } + int count = indexNameMap.size(); + indexNameMap[group] = count; + designMap.push_back(values); + }else { + m->mothurOut("[ERROR]: Your design file contains more than 1 group named " + group + ", group names must be unique. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "push_back"); + exit(1); + } +} +/************************************************************/ +//set values for group, does not need to set all values. assumes group is in table already +int DesignMap::set(string group, map values) { + try { + map::iterator it = indexNameMap.find(group); + if (it != indexNameMap.end()) { + for (map::iterator it2 = values.begin(); it2 != values.end(); it2++) { + + map::iterator it3 = indexCategoryMap.find(it2->first); //do we have this category + if (it3 == indexCategoryMap.end()) { + m->mothurOut("[ERROR]: Your design file does not contain a category called " + it2->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; + }else { + string oldCategory = designMap[it->second][it3->second]; + //adjust totals for old category + int oldCount = totalCategories[it3->second][oldCategory]; + if (oldCount == 1) { totalCategories[it3->second].erase(oldCategory); } + else { totalCategories[it3->second][oldCategory]--; } + + designMap[it->second][it3->second] = it2->second; //reset value + + //adjust totals for new category + map::iterator it4 = totalCategories[it3->second].find(it2->second); + if (it4 == totalCategories[it3->second].end()) { totalCategories[it3->second][it2->second] = 1; } + else { totalCategories[it3->second][it2->second]++; } + } + } + }else { + m->mothurOut("[ERROR]: Your design file does not contain a group named " + group + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "set"); + exit(1); + } +} +/************************************************************/ +//get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: +// map early, late>, male> would return 1. Only one group is male and from early or late. +int DesignMap::getNumUnique(map > selected) { + try { + int num = 0; + + //get indexes of categories + vector indexes; + for (map >::iterator it = selected.begin(); it != selected.end(); it++) { + map::iterator it2 = indexCategoryMap.find(it->first); + if (it2 == indexCategoryMap.end()) { + m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; + }else { indexes.push_back(it2->second); } + } + + for (int i = 0; i < designMap.size(); i++) { + bool hasAll = true; //innocent til proven guilty + int count = 0; + for (map >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each + //check category to see if this group meets the requirements + if (!m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAll = false; it = selected.end(); } + count++; + } + if (hasAll) { num++; } + } + + return num; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "getNumUnique"); + exit(1); + } +} +/************************************************************/ +//get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example: +// map early, late>, male> would return 3. All three group have are either male or from early or late. +int DesignMap::getNumShared(map > selected) { + try { + int num = 0; + + //get indexes of categories + vector indexes; + for (map >::iterator it = selected.begin(); it != selected.end(); it++) { + map::iterator it2 = indexCategoryMap.find(it->first); + if (it2 == indexCategoryMap.end()) { + m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; + }else { indexes.push_back(it2->second); } + } + + for (int i = 0; i < designMap.size(); i++) { + bool hasAny = false; //guilty til proven innocent + int count = 0; + for (map >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each + //check category to see if this group meets the requirements + if (m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAny = true; it = selected.end(); } + count++; + } + if (hasAny) { num++; } + } + + + return num; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "getNumShared"); + exit(1); + } +} + +/************************************************************/ +//get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: +// map early, late>, male> would return F000132. F000132 is the only group which is male and from early or late. +vector DesignMap::getNamesUnique(map > selected) { + try { + vector names; + + //get indexes of categories + vector indexes; + for (map >::iterator it = selected.begin(); it != selected.end(); it++) { + map::iterator it2 = indexCategoryMap.find(it->first); + if (it2 == indexCategoryMap.end()) { + m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names; + }else { indexes.push_back(it2->second); } + } + + //map int to name + map reverse; + for (map::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { + reverse[it->second] = it->first; + } + + for (int i = 0; i < designMap.size(); i++) { + bool hasAll = true; //innocent til proven guilty + int count = 0; + for (map >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each + //check category to see if this group meets the requirements + if (!m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAll = false; it = selected.end(); } + count++; + } + if (hasAll) { + map::iterator it = reverse.find(i); + if (it == reverse.end()) { + m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names; + }else { names.push_back(it->second); } + } + } + + + return names; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "getNamesUnique"); + exit(1); + } +} +/************************************************************/ +//get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: +// map early, late>, male> would return F000132. F000132 is the only group which is male and from early or late. +vector DesignMap::getNamesShared(map > selected) { + try { + vector names; + + //get indexes of categories + vector indexes; + for (map >::iterator it = selected.begin(); it != selected.end(); it++) { + map::iterator it2 = indexCategoryMap.find(it->first); + if (it2 == indexCategoryMap.end()) { + m->mothurOut("[ERROR]: Your design file does not contain a category named " + it->first + ". Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names; + }else { indexes.push_back(it2->second); } + } + + //map int to name + map reverse; + for (map::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { + reverse[it->second] = it->first; + } + + for (int i = 0; i < designMap.size(); i++) { + bool hasAny = false; + int count = 0; + for (map >::iterator it = selected.begin(); it != selected.end(); it++) { //loop through each + //check category to see if this group meets the requirements + if (m->inUsersGroups(designMap[i][indexes[count]], it->second)) { hasAny = true; it = selected.end(); } + count++; + } + if (hasAny) { + map::iterator it = reverse.find(i); + if (it == reverse.end()) { + m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names; + }else { names.push_back(it->second); } + } + } + + + return names; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "getNamesShared"); + exit(1); + } +} + +/************************************************************/ +//get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example: +// map early, late>, male> would return F000132, F000142, F000138. All three group have are either male or from early or late. + +vector DesignMap::getNames(string category, string value) { + try { + vector names; + + map::iterator it = indexCategoryMap.find(category); + if (it == indexCategoryMap.end()) { + m->mothurOut("[ERROR]: category " + category + " is not in your design file. Please correct.\n"); m->control_pressed = true; + }else { + int column = it->second; + + //map int to name + map reverse; + for (map::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) { + reverse[it2->second] = it2->first; + } + + for (int i = 0; i < designMap.size(); i++) { + if (designMap[i][column] == value) { + map::iterator it2 = reverse.find(i); + if (it2 == reverse.end()) { + m->mothurOut("[ERROR]: should never get here, oops. Please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names; + }else { names.push_back(it2->second); } + } + } + } + return names; + + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "getNames"); + exit(1); + } +} + +/************************************************************/ +int DesignMap::print(ofstream& out) { + try { + + out << "group\t"; + for (int i = 0; i < namesOfCategories.size(); i++) { out << namesOfCategories[i] << '\t'; } + out << endl; + + map reverse; //use this to preserve order + for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } + + for (int i = 0; i < designMap.size(); i++) { + map::iterator itR = reverse.find(i); + + if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap + out << itR->second << '\t'; + + for (int j = 0; j < namesOfCategories.size(); j++) { + out << designMap[i][j] << '\t'; + } + out << endl; + } + } + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "print"); + exit(1); + } +} +/************************************************************/ +//print specific categories +int DesignMap::print(ofstream& out, vector cats) { + try { + + out << "group\t"; + for (int i = 0; i < namesOfCategories.size(); i++) { if (m->inUsersGroups(namesOfCategories[i], cats)) { out << namesOfCategories[i] << '\t'; } } + out << endl; + + map reverse; //use this to preserve order + for (map::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first; } + + for (int i = 0; i < designMap.size(); i++) { + map::iterator itR = reverse.find(i); + + if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap + out << itR->second << '\t'; + + for (int j = 0; j < namesOfCategories.size(); j++) { + if (m->inUsersGroups(namesOfCategories[i], cats)) { + out << designMap[i][j] << '\t'; + } + } + out << endl; + } + } + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "DesignMap", "print"); + exit(1); + } +} + +/************************************************************/ + diff --git a/designmap.h b/designmap.h new file mode 100644 index 0000000..b6dec3b --- /dev/null +++ b/designmap.h @@ -0,0 +1,70 @@ +// +// designmap.h +// Mothur +// +// Created by SarahsWork on 6/17/13. +// Copyright (c) 2013 Schloss Lab. All rights reserved. +// + +#ifndef __Mothur__designmap__ +#define __Mothur__designmap__ + +#include "mothurout.h" + +/* This class is a representation of the design file. + + group treatment sex age + F000142 Early female young + F000132 Late male old + F000138 Mid male old + + + */ + +class DesignMap { +public: + DesignMap() { m = MothurOut::getInstance(); defaultClass = "not found"; } + DesignMap(string); + ~DesignMap() {} + + int read(string); + string get(string, string); //groupName, category returns value. example F000132, sex -> male + string get(string); //groupName, returns first categories value. example F000132, -> late + vector getValues(string); //categoryName, returns values. example treatment, -> early,late,mid + int set(string, map); //groupName, map + int push_back(string, vector); //groupName, vector - assumes you put values in order of getNamesOfCategories + vector getNamesOfCategories() { + sort(namesOfCategories.begin(), namesOfCategories.end()); + return namesOfCategories; + } + int getNumCategories() { return namesOfCategories.size(); } + int getNum() { return designMap.size(); } + int getNumUnique(map >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: + // map early, late>, male> would return 1. Only one group is male and from early or late. + int getNumShared(map >); //get number of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example: + // map early, late>, male> would return 3. All three group have are either male or from early or late. + + vector getNames(); + vector getNames(string, string); //get names group from category and value. + vector getNamesUnique(map >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have all categories and values. Example: + // map early, late>, male> would return F000132. F000132 is the only group which is male and from early or late. + vector getNamesShared(map >); //get names of groups belonging to a category or set of categories, with value or a set of values. Must have at least one categories and values. Example: + // map early, late>, male> would return F000132, F000142, F000138. All three group have are either male or from early or late. + + int print(ofstream&); + int print(ofstream&, vector); //print certain categories + + string getDefaultClass() { return defaultClass; } + +private: + string defaultClass; + vector namesOfCategories; + MothurOut* m; + vector< vector > designMap; + vector< map > totalCategories; //for each category, total groups assigned to it. vector[0] early -> 1, vector[1] male -> 2 + map indexNameMap; + map indexCategoryMap; +}; + + +#endif /* defined(__Mothur__designmap__) */ diff --git a/kruskalwalliscommand.cpp b/kruskalwalliscommand.cpp index bf4fafa..805d21e 100644 --- a/kruskalwalliscommand.cpp +++ b/kruskalwalliscommand.cpp @@ -6,14 +6,19 @@ */ #include "kruskalwalliscommand.h" +#include "linearalgebra.h" //********************************************************************************************************************** -vector KruskalWallisCommand::setParameters(){ +vector KruskalWallisCommand::setParameters(){ try { + CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared); + CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); + //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); - CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pgroups); - CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -25,12 +30,16 @@ vector KruskalWallisCommand::setParameters(){ } } //********************************************************************************************************************** -string KruskalWallisCommand::getHelpString(){ +string KruskalWallisCommand::getHelpString(){ try { string helpString = ""; - helpString += "The kruskalwallis command parameter options are \n"; - helpString += "Kruskal–Wallis one-way analysis of variance is a non-parametric method for testing whether samples originate from the same distribution."; - return helpString; + helpString += "The kruskal.wallis command allows you to ....\n"; + helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n"; + helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n"; + helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; + helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n"; + helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n"; + return helpString; } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "getHelpString"); @@ -42,7 +51,7 @@ string KruskalWallisCommand::getOutputPattern(string type) { try { string pattern = ""; - if (type == "summary") { pattern = "[filename],cooccurence.summary"; } + if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -53,13 +62,12 @@ string KruskalWallisCommand::getOutputPattern(string type) { } } //********************************************************************************************************************** -KruskalWallisCommand::KruskalWallisCommand(){ +KruskalWallisCommand::KruskalWallisCommand(){ try { - abort = true; calledHelp = true; + abort = true; calledHelp = true; setParameters(); vector tempOutNames; - outputTypes["summary"] = tempOutNames; - + outputTypes["kruskall-wallis"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand"); @@ -67,68 +75,95 @@ KruskalWallisCommand::KruskalWallisCommand(){ } } //********************************************************************************************************************** -KruskalWallisCommand::KruskalWallisCommand(string option) { +KruskalWallisCommand::KruskalWallisCommand(string option) { try { - abort = false; calledHelp = false; - + abort = false; calledHelp = false; + allLines = 1; + //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { + //valid paramters for this command vector myArray = setParameters(); OptionParser parser(option); map parameters = parser.getParameters(); - map::iterator it; ValidParameters validParameter; - + map::iterator it; //check to make sure all parameters are valid for command - for (it = parameters.begin(); it != parameters.end(); it++) { + for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + + vector tempOutNames; + outputTypes["kruskall-wallis"] = tempOutNames; - //get shared file - sharedfile = validParameter.validFile(parameters, "shared", true); - if (sharedfile == "not open") { sharedfile = ""; abort = true; } - else if (sharedfile == "not found") { - //if there is a current shared file, use it - sharedfile = m->getSharedFile(); - if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } - }else { m->setSharedFile(sharedfile); } - - //if the user changes the output directory command factory will send this info to us in the output parameter - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); } - - groups = validParameter.validFile(parameters, "groups", false); - if (groups == "not found") { groups = ""; } - else { - m->splitAtDash(groups, Groups); - } - m->setGroups(Groups); - - //if the user changes the input directory command factory will send this info to us in the output parameter - string inputDir = validParameter.validFile(parameters, "inputdir", false); + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; } else { - string path; - it = parameters.find("shared"); + + string path; + it = parameters.find("design"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["desing"] = inputDir + it->second; } + } + + it = parameters.find("shared"); //user has given a template file - if(it != parameters.end()){ + if(it != parameters.end()){ path = m->hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["shared"] = inputDir + it->second; } } + } + + //get shared file, it is required + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setSharedFile(sharedfile); } + + //get shared file, it is required + designfile = validParameter.validFile(parameters, "design", true); + if (designfile == "not open") { designfile = ""; abort = true; } + else if (designfile == "not found") { + //if there is a current shared file, use it + designfile = m->getDesignFile(); + if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setDesignFile(designfile); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it } - - vector tempOutNames; - outputTypes["summary"] = tempOutNames; - - + + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + mclass = validParameter.validFile(parameters, "class", false); + if (mclass == "not found") { mclass = ""; } + + classes = validParameter.validFile(parameters, "classes", false); + if (classes == "not found") { classes = ""; } + } - + } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand"); @@ -136,128 +171,165 @@ KruskalWallisCommand::KruskalWallisCommand(string option) { } } //********************************************************************************************************************** + int KruskalWallisCommand::execute(){ try { + if (abort == true) { if (calledHelp) { return 0; } return 2; } - InputData* input = new InputData(sharedfile, "sharedfile"); - vector lookup = input->getSharedRAbundVectors(); - string lastLabel = lookup[0]->getLabel(); - - - //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. - set processedLabels; - set userLabels = labels; - - ofstream out; - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); - string outputFileName = getOutputFileName("summary",variables); - m->openOutputFile(outputFileName, out); - outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); - out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - out << "H\tpvalue\n"; - - //math goes here + DesignMap designMap(designfile); + //if user set classes set groups=those classes + if (classes != "") { + map > thisClasses = m->parseClasses(classes); + vector groups = designMap.getNamesUnique(thisClasses); + if (groups.size() != 0) { m->setGroups(groups); } + else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; } + } - int N = m->getNumGroups(); - double H; - double tmp = 0.0; - vector vec; - vector groups = m->getGroups(); - string group; - int count; - double sum; - - //merge all groups into a vector + //if user did not select class use first column + if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); } + InputData input(sharedfile, "sharedfile"); + vector lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; - //rank function here - assignRank(vec); - //populate counts and ranSums vectors - for (int i=0;icontrol_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup, designMap); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); } - counts[i] = count; - rankSums[i] = sum; + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup, designMap); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + if (m->control_pressed) { return 0; } + + //get next line to process + lookup = input.getSharedRAbundVectors(); } - //test statistic - for (int i=0;icontrol_pressed) { return 0; } - //H = ss / ( (N * (N + 1))/12 ); - - //correction for ties? + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } - //p-value calculation + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + process(lookup, designMap); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } - return 0; - } + + //output files created by command + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + return 0; + + } catch(exception& e) { m->errorOut(e, "KruskalWallisCommand", "execute"); exit(1); } } //********************************************************************************************************************** -void KruskalWallisCommand::assignRank(vector &vec) { - try { - double rank = 1; - double numRanks, avgRank, j; - vector::iterator it, oldit; - - sort (vec.begin(), vec.end(), comparevalue); - - it = vec.begin(); - - while ( it != vec.end() ) { - j = rank; - oldit = it; - if (!equalvalue(*it, *(it+1))) { - (*it).rank = rank; - rank = rank+1; - it++; } - else { - while(equalrank(*it, *(it+1))) { - j = j + (j+1); - rank++; - it++; - } - numRanks = double (distance(oldit, it)); - avgRank = j / numRanks; - while(oldit != it) { - (*oldit).rank = avgRank; - oldit++; - } - } +int KruskalWallisCommand::process(vector& lookup, DesignMap& designMap) { + try { + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = lookup[0]->getLabel(); + string outputFileName = getOutputFileName("kruskall-wallis",variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName); + out << "OTULabel\tKW\tPvalue\n"; + + int numBins = lookup[0]->getNumBins(); + //sanity check to make sure each treatment has a group in the shared file + set treatments; + for (int j = 0; j < lookup.size(); j++) { + string group = lookup[j]->getGroup(); + string treatment = designMap.get(group, mclass); //get value for this group in this category + treatments.insert(treatment); } + if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; } - + LinearAlgebra linear; + for (int i = 0; i < numBins; i++) { + if (m->control_pressed) { break; } + + vector values; + for (int j = 0; j < lookup.size(); j++) { + string group = lookup[j]->getGroup(); + string treatment = designMap.get(group, mclass); //get value for this group in this category + spearmanRank temp(treatment, lookup[j]->getAbundance(i)); + values.push_back(temp); + } + + double pValue = 0.0; + double H = linear.calcKruskalWallis(values, pValue); + + //output H and signifigance + out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl; + } + out.close(); + + return 0; } - catch(exception& e) { - m->errorOut(e, "KruskalWallisCommand", "getRank"); + catch(exception& e) { + m->errorOut(e, "KruskalWallisCommand", "process"); exit(1); } - -} -//********************************************************************************************************************** -void KruskalWallisCommand::assignValue(vector &vec) { - } //********************************************************************************************************************** -//********************************************************************************************************************** -//********************************************************************************************************************** + diff --git a/kruskalwalliscommand.h b/kruskalwalliscommand.h index feefebd..a82d208 100644 --- a/kruskalwalliscommand.h +++ b/kruskalwalliscommand.h @@ -10,8 +10,7 @@ #include "command.hpp" #include "inputdata.h" -#include "sharedrabundvector.h" - +#include "designmap.h" class KruskalWallisCommand : public Command { @@ -22,11 +21,11 @@ public: ~KruskalWallisCommand(){} vector setParameters(); - string getCommandName() { return "kruskalwallis"; } + string getCommandName() { return "kruskal.wallis"; } string getCommandCategory() { return "Hypothesis Testing"; } string getOutputPattern(string); string getHelpString(); - string getCitation() { return "http://www.mothur.org/wiki/kruskalwallis"; } + string getCitation() { return "http://www.mothur.org/wiki/Kruskal.wallis"; } string getDescription() { return "Non-parametric method for testing whether samples originate from the same distribution."; } struct groupRank { @@ -42,22 +41,12 @@ public: private: - string outputDir, sharedfile, groups; - bool abort; + bool abort, allLines; + string outputDir, sharedfile, designfile, mclass, classes; + vector outputNames; set labels; - vector outputNames, Groups; - vector counts; - vector rankSums; - vector rankMeans; - - - - static bool comparevalue(const groupRank &a, const groupRank &b) { return a.value < b.value; } - static bool equalvalue(const groupRank &a, const groupRank &b) { return a.value == b.value; } - static bool comparerank(const groupRank &a, const groupRank &b) { return a.rank < b.rank; } - static bool equalrank(const groupRank &a, const groupRank &b) { return a.rank == b.rank; } - static bool equalgroup(const groupRank &a, const groupRank &b) { return a.group == b.group; } + int process(vector&, DesignMap&); }; #endif /* KRUSKALWALLISCOMMAND_H */ diff --git a/lefsecommand.cpp b/lefsecommand.cpp index 455019b..e894d57 100644 --- a/lefsecommand.cpp +++ b/lefsecommand.cpp @@ -7,3 +7,483 @@ // #include "lefsecommand.h" +#include "linearalgebra.h" + +//********************************************************************************************************************** +vector LefseCommand::setParameters(){ + try { + CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared); + CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass); + CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); + CommandParameter palpha("anova_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); + CommandParameter pwalpha("wilcoxon_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha); + //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string LefseCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The lefse command allows you to ....\n"; + helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, wilcoxon_alpha, anova_alpha.\n"; + helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n"; + helpString += "The subclass parameter is used to indicate the .....If none is provided second category is used, or if only one category subclass is ignored. \n"; + helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; + helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n"; + helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string LefseCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "summary") { pattern = "[filename],[distance],lefse_summary"; } + else if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; } + else if (type == "wilcoxon") { pattern = "[filename],[distance],wilcoxon"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "getOutputPattern"); + exit(1); + } +} +//********************************************************************************************************************** +LefseCommand::LefseCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + outputTypes["kruskall-wallis"] = tempOutNames; + outputTypes["wilcoxon"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "LefseCommand"); + exit(1); + } +} +//********************************************************************************************************************** +LefseCommand::LefseCommand(string option) { + try { + abort = false; calledHelp = false; + allLines = 1; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + //valid paramters for this command + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + outputTypes["kruskall-wallis"] = tempOutNames; + outputTypes["wilcoxon"] = tempOutNames; + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + + string path; + it = parameters.find("design"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["desing"] = inputDir + it->second; } + } + + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + } + + //get shared file, it is required + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setSharedFile(sharedfile); } + + //get shared file, it is required + designfile = validParameter.validFile(parameters, "design", true); + if (designfile == "not open") { designfile = ""; abort = true; } + else if (designfile == "not found") { + //if there is a current shared file, use it + designfile = m->getDesignFile(); + if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setDesignFile(designfile); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it + } + + string label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + mclass = validParameter.validFile(parameters, "class", false); + if (mclass == "not found") { mclass = ""; } + + subclass = validParameter.validFile(parameters, "subclass", false); + if (subclass == "not found") { subclass = ""; } + + classes = validParameter.validFile(parameters, "classes", false); + if (classes == "not found") { classes = ""; } + + string temp = validParameter.validFile(parameters, "anova_alpha", false); + if (temp == "not found") { temp = "0.05"; } + m->mothurConvert(temp, anovaAlpha); + + temp = validParameter.validFile(parameters, "wilcoxon_alpha", false); + if (temp == "not found") { temp = "0.05"; } + m->mothurConvert(temp, wilcoxonAlpha); + + } + + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "LefseCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +int LefseCommand::execute(){ + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + DesignMap designMap(designfile); + //if user set classes set groups=those classes + if (classes != "") { + map > thisClasses = m->parseClasses(classes); + vector groups = designMap.getNamesUnique(thisClasses); + if (groups.size() != 0) { m->setGroups(groups); } + else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; } + } + + //if user did not select class use first column + if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); } + + InputData input(sharedfile, "sharedfile"); + vector lookup = input.getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup, designMap); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input.getSharedRAbundVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + process(lookup, designMap); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + if (m->control_pressed) { return 0; } + + //get next line to process + lookup = input.getSharedRAbundVectors(); + } + + if (m->control_pressed) { return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input.getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + process(lookup, designMap); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + + + //output files created by command + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + return 0; + + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** + +int LefseCommand::process(vector& lookup, DesignMap& designMap) { + try { + //run kruskal wallis on each otu + vector significantOtuLabels = runKruskalWallis(lookup, designMap); + + //check for subclass + if (subclass != "") { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels); } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "process"); + exit(1); + } +} +//********************************************************************************************************************** + +vector LefseCommand::runKruskalWallis(vector& lookup, DesignMap& designMap) { + try { + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = lookup[0]->getLabel(); + string outputFileName = getOutputFileName("kruskall-wallis",variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName); + out << "OTULabel\tKW\tPvalue\n"; + + vector significantOtuLabels; + int numBins = lookup[0]->getNumBins(); + //sanity check to make sure each treatment has a group in the shared file + set treatments; + for (int j = 0; j < lookup.size(); j++) { + string group = lookup[j]->getGroup(); + string treatment = designMap.get(group, mclass); //get value for this group in this category + treatments.insert(treatment); + } + if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; } + + LinearAlgebra linear; + for (int i = 0; i < numBins; i++) { + if (m->control_pressed) { break; } + + vector values; + for (int j = 0; j < lookup.size(); j++) { + string group = lookup[j]->getGroup(); + string treatment = designMap.get(group, mclass); //get value for this group in this category + spearmanRank temp(treatment, lookup[j]->getAbundance(i)); + values.push_back(temp); + } + + double pValue = 0.0; + double H = linear.calcKruskalWallis(values, pValue); + + //output H and signifigance + out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl; + + if (pValue < anovaAlpha) { significantOtuLabels.push_back(i); } + } + out.close(); + + return significantOtuLabels; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "runKruskalWallis"); + exit(1); + } +} +//********************************************************************************************************************** + +vector LefseCommand::runWilcoxon(vector& lookup, DesignMap& designMap, vector bins) { + try { + vector significantOtuLabels; + //if it exists and meets the following requirements run Wilcoxon + /* + 1. Subclass members all belong to same main class + 2. Number of groups in each subclass is the same + 3. anything else?? + + */ + vector subclasses; + map subclass2Class; + map subclassCounts; + map > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. + bool error = false; + for (int j = 0; j < lookup.size(); j++) { + string group = lookup[j]->getGroup(); + string treatment = designMap.get(group, mclass); //get value for this group in this category + string thisSub = designMap.get(group, subclass); + map::iterator it = subclass2Class.find(thisSub); + if (it == subclass2Class.end()) { + subclass2Class[thisSub] = treatment; + subclassCounts[thisSub] = 1; + vector temp; temp.push_back(j); + subClass2GroupIndex[thisSub] = temp; + } + else { + subclassCounts[thisSub]++; + subClass2GroupIndex[thisSub].push_back(j); + if (it->second != treatment) { + error = true; + m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n"); + } + } + } + + if (error) { return significantOtuLabels; } + else { //check counts to make sure subclasses are the same size + set counts; + for (map::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); } + if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n"); + return significantOtuLabels; } + } + + int numBins = lookup[0]->getNumBins(); + vector comp; + //find comparisons and fill comp + map::iterator itB; + for(map::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){ + itB = it;itB++; + for(itB;itB!=subclassCounts.end();itB++){ + compGroup temp(it->first,itB->first); + comp.push_back(temp); + } + } + + int numComp = comp.size(); + if (numComp < 2) { m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n"); + return significantOtuLabels; } + + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); + variables["[distance]"] = lookup[0]->getLabel(); + string outputFileName = getOutputFileName("wilcoxon",variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName); + out << "OTULabel\tComparision\tWilcoxon\tPvalue\n"; + + LinearAlgebra linear; + for (int i = 0; i < numBins; i++) { + if (m->control_pressed) { break; } + + if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis + + bool sig = false; + //for each subclass comparision + for (int j = 0; j < numComp; j++) { + //fill x and y with this comparisons data + vector x; vector y; + + //fill x and y + vector xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass + for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); } + + vector yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass + for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); } + + double pValue = 0.0; + double H = linear.calcWilcoxon(x, y, pValue); + + //output H and signifigance + out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl; + + //set sig - not sure how yet + } + if (sig) { significantOtuLabels.push_back(i); } + } + } + out.close(); + + return significantOtuLabels; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "runWilcoxon"); + exit(1); + } +} + +//********************************************************************************************************************** + + diff --git a/lefsecommand.h b/lefsecommand.h index 095ad48..c9c4092 100644 --- a/lefsecommand.h +++ b/lefsecommand.h @@ -12,7 +12,7 @@ #include "command.hpp" /* - Columns = groups, rows are OTUs, class = design?? + Columns = groups, rows are OTUs, class = design From http://huttenhower.sph.harvard.edu/galaxy/root?tool_id=lefse_upload Input data consist of a collection of m samples (columns) each made up of n numerical features (rows, typically normalized per-sample, red representing high values and green low). These samples are labeled with a class (taking two or more possible values) that represents the main biological hypothesis under investigation; they may also have one or more subclass labels reflecting within-class groupings. @@ -22,4 +22,47 @@ Step 3: the resulting subset of vectors is used to build a Linear Discriminant Analysis model from which the relative difference among classes is used to rank the features. The final output thus consists of a list of features that are discriminative with respect to the classes, consistent with the subclass grouping within classes, and ranked according to the effect size with which they differentiate classes. */ + +#include "command.hpp" +#include "inputdata.h" +#include "designmap.h" + +/**************************************************************************************************/ + +class LefseCommand : public Command { +public: + LefseCommand(string); + LefseCommand(); + ~LefseCommand(){} + + vector setParameters(); + string getCommandName() { return "lefse"; } + string getCommandCategory() { return "OTU-Based Approaches"; } + + string getOutputPattern(string); + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/Lefse"; } + string getDescription() { return "brief description"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + +private: + bool abort, allLines; + string outputDir, sharedfile, designfile, mclass, subclass, classes; + vector outputNames; + set labels; + float anovaAlpha, wilcoxonAlpha; + + int process(vector&, DesignMap&); + vector runKruskalWallis(vector&, DesignMap&); + vector runWilcoxon(vector&, DesignMap&, vector); + +}; + +/**************************************************************************************************/ + + + + #endif /* defined(__Mothur__lefsecommand__) */ diff --git a/linearalgebra.cpp b/linearalgebra.cpp index e1b9470..e4eaf48 100644 --- a/linearalgebra.cpp +++ b/linearalgebra.cpp @@ -805,7 +805,80 @@ double LinearAlgebra::calcSpearman(vector< vector >& euclidDists, vector exit(1); } } - +/*********************************************************************************************************************************/ +double LinearAlgebra::calcKruskalWallis(vector& values, double& pValue){ + try { + double H; + set treatments; + + //rank values + sort(values.begin(), values.end(), compareSpearman); + vector ties; + int rankTotal = 0; + vector TIES; + for (int j = 0; j < values.size(); j++) { + treatments.insert(values[j].name); + rankTotal += (j+1); + ties.push_back(&(values[j])); + + if (j != values.size()-1) { // you are not the last so you can look ahead + if (values[j].score != values[j+1].score) { // you are done with ties, rank them and continue + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + double thisrank = rankTotal / (double) ties.size(); + (*ties[k]).score = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + if (ties.size() > 1) { TIES.push_back(ties.size()); } + for (int k = 0; k < ties.size(); k++) { + double thisrank = rankTotal / (double) ties.size(); + (*ties[k]).score = thisrank; + } + } + } + + + // H = 12/(N*(N+1)) * (sum Ti^2/n) - 3(N+1) + map sums; + map counts; + for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { sums[*it] = 0.0; counts[*it] = 0; } + + for (int j = 0; j < values.size(); j++) { + sums[values[j].name] += values[j].score; + counts[values[j].name]+= 1.0; + } + + double middleTerm = 0.0; + for (set::iterator it = treatments.begin(); it != treatments.end(); it++) { + middleTerm += ((sums[*it]*sums[*it])/counts[*it]); + } + + double firstTerm = 12 / (double) (values.size()*(values.size()+1)); + double lastTerm = 3 * (values.size()+1); + + H = firstTerm * middleTerm - lastTerm; + + //adjust for ties + if (TIES.size() != 0) { + double sum = 0.0; + for (int j = 0; j < TIES.size(); j++) { sum += ((TIES[j]*TIES[j]*TIES[j])-TIES[j]); } + double result = 1.0 - (sum / (double) ((values.size()*values.size()*values.size())-values.size())); + H /= result; + } + + //Numerical Recipes pg221 + pValue = 1.0 - (gammp(((treatments.size()-1)/(double)2.0), H/2.0)); + + return H; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcKruskalWallis"); + exit(1); + } +} /*********************************************************************************************************************************/ //assumes both matrices are square and the same size double LinearAlgebra::calcKendall(vector< vector >& euclidDists, vector< vector >& userDists){ @@ -1228,7 +1301,73 @@ double LinearAlgebra::calcKendallSig(double n, double r){ exit(1); } } +/*********************************************************************************************************************************/ +double LinearAlgebra::calcWilcoxon(vector& x, vector& y, double& sig){ + try { + if (x.size() != y.size()) { m->mothurOut("[ERROR]: vector size mismatch."); m->mothurOutEndLine(); return 0.0; } + + double W = 0.0; + sig = 0.0; + + vector signPairs; + vector absV; + for (int i = 0; i < x.size(); i++) { + if (m->control_pressed) { return W; } + double temp = y[i]-x[i]; + double signV = sign(temp); + if (signV != 0) { // exclude zeros + spearmanRank member(toString(i), abs(temp)); + absV.push_back(member); + signPairs.push_back(signV); + } + } + + //rank absV + //sort xscores + sort(absV.begin(), absV.end(), compareSpearman); + + //convert scores to ranks of x + vector ties; + int rankTotal = 0; + for (int j = 0; j < absV.size(); j++) { + if (m->control_pressed) { return W; } + rankTotal += (j+1); + ties.push_back(&(absV[j])); + + if (j != absV.size()-1) { // you are not the last so you can look ahead + if (absV[j].score != absV[j+1].score) { // you are done with ties, rank them and continue + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + (*ties[k]).score = thisrank; + } + ties.clear(); + rankTotal = 0; + } + }else { // you are the last one + for (int k = 0; k < ties.size(); k++) { + float thisrank = rankTotal / (float) ties.size(); + (*ties[k]).score = thisrank; + } + } + } + //sum ranks times sign + for (int i = 0; i < absV.size(); i++) { + if (m->control_pressed) { return W; } + W += (absV[i].score*signPairs[i]); + } + W = abs(W); + + //find zScore + cout << "still need to find sig!!" << endl; + + return W; + } + catch(exception& e) { + m->errorOut(e, "LinearAlgebra", "calcWilcoxon"); + exit(1); + } +} /*********************************************************************************************************************************/ double LinearAlgebra::calcSpearman(vector& x, vector& y, double& sig){ try { diff --git a/linearalgebra.h b/linearalgebra.h index e0933ee..5819ea8 100644 --- a/linearalgebra.h +++ b/linearalgebra.h @@ -29,6 +29,8 @@ public: double calcPearson(vector >&, vector >&); double calcSpearman(vector >&, vector >&); double calcKendall(vector >&, vector >&); + double calcKruskalWallis(vector&, double&); + double calcWilcoxon(vector&, vector&, double&); double calcPearson(vector&, vector&, double&); double calcSpearman(vector&, vector&, double&); @@ -41,7 +43,7 @@ public: vector solveEquations(vector >, vector); vector solveEquations(vector >, vector); vector > getInverse(vector >); - + private: MothurOut* m; @@ -50,11 +52,11 @@ private: double betacf(const double, const double, const double); double betai(const double, const double, const double); double gammln(const double); - double gammp(const double, const double); double gammq(const double, const double); double gser(double&, const double, const double, double&); double gcf(double&, const double, const double, double&); double erfcc(double); + double gammp(const double, const double); double ran0(int&); //for testing double ran1(int&); //for testing diff --git a/makecontigscommand.cpp b/makecontigscommand.cpp index 9eb4b80..f888fbe 100644 --- a/makecontigscommand.cpp +++ b/makecontigscommand.cpp @@ -1541,6 +1541,7 @@ vector< vector > MakeContigsCommand::readFileNames(string filename){ } m->gobble(in); + if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ".\n"); } //check to make sure both are able to be opened ifstream in2; diff --git a/makelefsecommand.cpp b/makelefsecommand.cpp index 8f2857e..dd44249 100644 --- a/makelefsecommand.cpp +++ b/makelefsecommand.cpp @@ -7,6 +7,7 @@ // #include "makelefsecommand.h" +#include "designmap.h" //********************************************************************************************************************** vector MakeLefseCommand::setParameters(){ @@ -258,19 +259,22 @@ int MakeLefseCommand::runRelabund(map& consTax, vectoropenOutputFile(outputFile, out); - GroupMap* designMap = NULL; + DesignMap* designMap = NULL; if (designfile != "") { - designMap = new GroupMap(designfile); - designMap->readDesignMap(); + designMap = new DesignMap(designfile); + vector categories = designMap->getNamesOfCategories(); - out << "treatment\t"; - for (int i = 0; i < lookup.size(); i++) { - string treatment = designMap->getGroup(lookup[i]->getGroup()); - if (treatment == "not found") { - m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); - }else { out << treatment << '\t'; } + for (int j = 0; j < categories.size(); j++) { + out << categories[j] << "\t"; + for (int i = 0; i < lookup.size(); i++) { + if (m->control_pressed) { out.close(); delete designMap; return 0; } + string value = designMap->get(lookup[i]->getGroup(), categories[j]); + if (value == "not found") { + m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true; + }else { out << value << '\t'; } + } + out << endl; } - out << endl; } out << "group\t"; diff --git a/mothur.h b/mothur.h index dcdd76b..32f4778 100644 --- a/mothur.h +++ b/mothur.h @@ -173,6 +173,15 @@ struct seqPriorityNode { seqPriorityNode(int n, string s, string nm) : numIdentical(n), seq(s), name(nm) {} ~seqPriorityNode() {} }; +/************************************************************/ +struct compGroup { + string group1; + string group2; + compGroup() {} + compGroup(string s, string nm) : group1(s), group2(nm) {} + string getCombo() { return group1+"-"+group2; } + ~compGroup() {} +}; /***************************************************************/ struct spearmanRank { string name; @@ -209,7 +218,15 @@ inline bool compareDistLinePairs(distlinePair left, distlinePair right){ //sorts lowest to highest inline bool compareSequenceDistance(seqDist left, seqDist right){ return (left.dist < right.dist); -} +} +//******************************************************************************************************************** +//returns sign of double +inline double sign(double temp){ + //find sign + if (temp > 0) { return 1.0; } + else if (temp < 0) { return -1.0; } + return 0; +} /***********************************************************************/ // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2 diff --git a/mothurout.cpp b/mothurout.cpp index 96c7305..1f1c96b 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -796,6 +796,39 @@ bool MothurOut::dirCheck(string& dirName){ } } +//********************************************************************************************************************** + +map > MothurOut::parseClasses(string classes){ + try { + map > parts; + + //treatment-age + vector pieces; splitAtDash(classes, pieces); // -> treatment, age + + for (int i = 0; i < pieces.size(); i++) { + string category = ""; string value = ""; + bool foundOpen = false; + for (int j = 0; j < pieces[i].length(); j++) { + if (control_pressed) { return parts; } + + if (pieces[i][j] == '<') { foundOpen = true; } + else if (pieces[i][j] == '>') { j += pieces[i].length(); } + else { + if (!foundOpen) { category += pieces[i][j]; } + else { value += pieces[i][j]; } + } + } + vector values; splitAtChar(value, values, '|'); + parts[category] = values; + } + + return parts; + } + catch(exception& e) { + errorOut(e, "MothurOut", "parseClasses"); + exit(1); + } +} /***********************************************************************/ string MothurOut::hasPath(string longName){ diff --git a/mothurout.h b/mothurout.h index efb3823..77b1b6d 100644 --- a/mothurout.h +++ b/mothurout.h @@ -153,6 +153,7 @@ class MothurOut { string makeList(vector&); bool isSubset(vector, vector); //bigSet, subset int checkName(string&); + map > parseClasses(string); //math operation int factorial(int num); diff --git a/setlogfilecommand.cpp b/setlogfilecommand.cpp index 128c567..41c8e74 100644 --- a/setlogfilecommand.cpp +++ b/setlogfilecommand.cpp @@ -85,9 +85,14 @@ int SetLogFileCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - commandFactory = CommandFactory::getInstance(); - - commandFactory->setLogfileName(name, append); + commandFactory = CommandFactory::getInstance(); + + string directory = m->hasPath(name); + if (directory == "") { + commandFactory->setLogfileName(name, append); + }else if (m->dirCheck(directory)) { + commandFactory->setLogfileName(name, append); + } return 0; } diff --git a/trimoligos.cpp b/trimoligos.cpp index 504a8b5..ddc1053 100644 --- a/trimoligos.cpp +++ b/trimoligos.cpp @@ -855,6 +855,8 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou break; } + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { group = it->first; string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode @@ -1078,6 +1080,8 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou break; } + if (rawSeq.length() < (foligo.length() + roligo.length())) { success = pdiffs + 10; break; } + if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) { group = it->first; if (!keepForward) { -- 2.39.2