From: Kathryn Iverson Date: Wed, 11 Jul 2012 15:10:56 +0000 (-0400) Subject: merged mgcluster. kw files added, but not working in this commit! X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=bfc3401db792f7630a5bfe7aea44b4eb5bae6e6f;hp=ce8794490ab1d83adcdb2b92e0302a1e43e17adf;p=mothur.git merged mgcluster. kw files added, but not working in this commit! --- diff --git a/kruskalwalliscommand.cpp b/kruskalwalliscommand.cpp new file mode 100644 index 0000000..474e82c --- /dev/null +++ b/kruskalwalliscommand.cpp @@ -0,0 +1,265 @@ +/* + * File: kruskalwalliscommand.cpp + * Author: kiverson + * + * Created on June 26, 2012, 11:06 AM + */ + +#include "kruskalwalliscommand.h" + +//********************************************************************************************************************** +vector KruskalWallisCommand::setParameters(){ + try { + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "KruskalWallisCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string KruskalWallisCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The kruskalwallis command parameter options are \n"; + helpString += "Kruskal–Wallis one-way analysis of variance is a non-parametric method for testing whether samples originate from the same distribution."; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "KruskalWallisCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +string KruskalWallisCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "summary") { outputFileName = "cooccurence.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "KruskalWallisCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** +KruskalWallisCommand::KruskalWallisCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + + } + catch(exception& e) { + m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand"); + exit(1); + } +} +//********************************************************************************************************************** +KruskalWallisCommand::KruskalWallisCommand(string option) { + try { + abort = false; calledHelp = false; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + map::iterator it; + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //get shared file + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setSharedFile(sharedfile); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); } + + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + m->splitAtDash(groups, Groups); + } + m->setGroups(Groups); + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + } + + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + + + } + + } + catch(exception& e) { + m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand"); + exit(1); + } +} +//********************************************************************************************************************** +int KruskalWallisCommand::execute(){ + try { + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + InputData* input = new InputData(sharedfile, "sharedfile"); + vector lookup = input->getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + ofstream out; + string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("summary"); + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + out << "H\tpvalue\n"; + + //math goes here + + int N = m->getNumGroups(); + double H; + double tmp = 0.0; + vector vec; + vector groups = m->getGroups(); + string group; + int count; + double sum; + + //merge all groups into a vector + + + + //rank function here + assignRank(vec); + + //populate counts and ranSums vectors + for (int i=0;ierrorOut(e, "KruskalWallisCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +void KruskalWallisCommand::assignRank(vector &vec) { + try { + double rank = 1; + double numRanks, avgRank, j; + vector::iterator it, oldit; + + sort (vec.begin(), vec.end(), comparevalue); + + it = vec.begin(); + + while ( it != vec.end() ) { + j = rank; + oldit = it; + if (!equalvalue(*it, *(it+1))) { + (*it).rank = rank; + rank = rank+1; + it++; } + else { + while(equalrank(*it, *(it+1))) { + j = j + (j+1); + rank++; + it++; + } + numRanks = double (distance(oldit, it)); + avgRank = j / numRanks; + while(oldit != it) { + (*oldit).rank = avgRank; + oldit++; + } + } + + } + + + } + catch(exception& e) { + m->errorOut(e, "KruskalWallisCommand", "getRank"); + exit(1); + } + +} +//********************************************************************************************************************** +void KruskalWallisCommand::assignValue(vector &vec) { + +} +//********************************************************************************************************************** +//********************************************************************************************************************** +//********************************************************************************************************************** \ No newline at end of file diff --git a/kruskalwalliscommand.h b/kruskalwalliscommand.h new file mode 100644 index 0000000..da1166e --- /dev/null +++ b/kruskalwalliscommand.h @@ -0,0 +1,64 @@ +/* + * File: kruskalwalliscommand.h + * Author: kiverson + * + * Created on June 26, 2012, 11:07 AM + */ + +#ifndef KRUSKALWALLISCOMMAND_H +#define KRUSKALWALLISCOMMAND_H + +#include "command.hpp" +#include "inputdata.h" +#include "sharedrabundvector.h" + + +class KruskalWallisCommand : public Command { + +public: + + KruskalWallisCommand(string); + KruskalWallisCommand(); + ~KruskalWallisCommand(){} + + vector setParameters(); + string getCommandName() { return "kruskalwallis"; } + string getCommandCategory() { return "Hypothesis Testing"; } + string getOutputFileNameTag(string, string); + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/kruskalwallis"; } + string getDescription() { return "Non-parametric method for testing whether samples originate from the same distribution."; } + + struct groupRank { + string group; + double value; + double rank; + }; + + int execute(); + void help() { m->mothurOut(getHelpString()); } + void assignRank(vector&); + void assignValue(vector&); + + +private: + string outputDir, sharedfile, groups; + bool abort; + set labels; + vector outputNames, Groups; + vector counts; + vector rankSums; + vector rankMeans; + + + + static bool comparevalue(const groupRank &a, const groupRank &b) { return a.value < b.value; } + static bool equalvalue(const groupRank &a, const groupRank &b) { return a.value == b.value; } + static bool comparerank(const groupRank &a, const groupRank &b) { return a.rank < b.rank; } + static bool equalrank(const groupRank &a, const groupRank &b) { return a.rank == b.rank; } + static bool equalgroup(const groupRank &a, const groupRank &b) { return a.group == b.group; } + +}; + +#endif /* KRUSKALWALLISCOMMAND_H */ + diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp index 2143495..cffb80f 100644 --- a/mgclustercommand.cpp +++ b/mgclustercommand.cpp @@ -14,7 +14,8 @@ vector MGClusterCommand::setParameters(){ try { CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge); + CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount); + //CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge); CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength); CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty); CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff); @@ -165,6 +166,13 @@ MGClusterCommand::MGClusterCommand(string option) { if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { m->setNameFile(namefile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if (countfile != "" && namefile != "") { m->mothurOut("Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; } if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; } @@ -237,9 +245,11 @@ int MGClusterCommand::execute(){ list = new ListVector(nameMap->getListVector()); RAbundVector* rabund = NULL; - if(large) { - map nameMapCounts = m->readNames(namefile); - createRabund(nameMapCounts); + if(countfile != "") { + //map nameMapCounts = m->readNames(namefile); + ct = new CountTable(); + ct->readTable(countfile); + createRabund(ct, list); rabund = &rav; }else { rabund = new RAbundVector(list->getRAbundVector()); @@ -714,8 +724,25 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve //********************************************************************************************************************** -void MGClusterCommand::createRabund(map nameMapCounts){ +void MGClusterCommand::createRabund(CountTable* ct, ListVector* list){ try { + //vector names = ct.getNamesOfSeqs(); + + //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); } + //return rav; + + for(int i = 0; i < list->getNumBins(); i++) { + vector binNames; + string bin = list->get(i); + m->splitAtComma(bin, binNames); + int total = 0; + for (int j = 0; j < binNames.size(); j++) { + total += ct->getNumSeqs(binNames[j]); + } + rav.push_back(total); + } + + } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "createRabund"); diff --git a/mgclustercommand.h b/mgclustercommand.h index ce3ffec..0626b86 100644 --- a/mgclustercommand.h +++ b/mgclustercommand.h @@ -18,6 +18,7 @@ #include "hcluster.h" #include "rabundvector.hpp" #include "sabundvector.hpp" +#include "counttable.h" /**********************************************************************/ @@ -46,12 +47,13 @@ private: Cluster* cluster; HCluster* hcluster; ListVector* list; + CountTable* ct; ListVector oldList; RAbundVector rav; vector overlapMatrix; vector outputNames; - string blastfile, method, namefile, overlapFile, distFile, outputDir; + string blastfile, method, namefile, countfile, overlapFile, distFile, outputDir; ofstream sabundFile, rabundFile, listFile; double cutoff; float penalty; @@ -62,7 +64,7 @@ private: ListVector* mergeOPFs(map, float); void sortHclusterFiles(string, string); vector getSeqs(ifstream&); - void createRabund(map); + void createRabund(CountTable*, ListVector*); };