From: Sarah Westcott Date: Fri, 2 Mar 2012 16:22:17 +0000 (-0500) Subject: added cooccurence command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=597560b3c23f03d0069082cf096ce65e0c087519 added cooccurence command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 702e3bb..0951dd2 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -58,6 +58,8 @@ A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; }; A7BF221414587886000AD524 /* myPerseus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF221214587886000AD524 /* myPerseus.cpp */; }; A7BF2232145879B2000AD524 /* chimeraperseuscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */; }; + A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */; }; + A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */; }; A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; }; A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; }; A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; }; @@ -486,6 +488,10 @@ A7BF221314587886000AD524 /* myPerseus.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myPerseus.h; sourceTree = ""; }; A7BF2230145879B2000AD524 /* chimeraperseuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraperseuscommand.h; sourceTree = ""; }; A7BF2231145879B2000AD524 /* chimeraperseuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraperseuscommand.cpp; sourceTree = ""; }; + A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cooccurrencecommand.cpp; sourceTree = ""; }; + A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cooccurrencecommand.h; sourceTree = ""; }; + A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trialSwap2.cpp; sourceTree = ""; }; + A7C3DC0E14FE469500FE1924 /* trialswap2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trialswap2.h; sourceTree = ""; }; A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = ""; }; A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = ""; }; A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = ""; }; @@ -1185,6 +1191,8 @@ A7E9B82D12D37EC400DA6239 /* singlelinkage.cpp */, A7E9B83012D37EC400DA6239 /* slibshuff.cpp */, A7E9B83112D37EC400DA6239 /* slibshuff.h */, + A7C3DC0E14FE469500FE1924 /* trialswap2.h */, + A7C3DC0D14FE469500FE1924 /* trialSwap2.cpp */, A7FF19F0140FFDA500AD216D /* trimoligos.h */, A7FF19F1140FFDA500AD216D /* trimoligos.cpp */, A7E9B87412D37EC400DA6239 /* validcalculator.cpp */, @@ -1341,6 +1349,8 @@ A7E9B6B512D37EC400DA6239 /* consensuscommand.cpp */, A7E9B6B812D37EC400DA6239 /* consensusseqscommand.h */, A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */, + A7C3DC0A14FE457500FE1924 /* cooccurrencecommand.h */, + A7C3DC0914FE457500FE1924 /* cooccurrencecommand.cpp */, A7E9B6BA12D37EC400DA6239 /* corraxescommand.h */, A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */, A795840B13F13CD900F201D5 /* countgroupscommand.h */, @@ -2289,6 +2299,8 @@ A7A3C8C914D041AD00B1BFBE /* otuassociationcommand.cpp in Sources */, A7A32DAA14DC43B00001D2E5 /* sortseqscommand.cpp in Sources */, A7EEB0F514F29BFE00344B83 /* classifytreecommand.cpp in Sources */, + A7C3DC0B14FE457500FE1924 /* cooccurrencecommand.cpp in Sources */, + A7C3DC0F14FE469500FE1924 /* trialSwap2.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index 303bf08..bdcfbba 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -129,6 +129,7 @@ #include "otuassociationcommand.h" #include "sortseqscommand.h" #include "classifytreecommand.h" +#include "cooccurrencecommand.h" /*******************************************************/ @@ -279,6 +280,7 @@ CommandFactory::CommandFactory(){ commands["otu.association"] = "otu.association"; commands["sort.seqs"] = "sort.seqs"; commands["classify.tree"] = "classify.tree"; + commands["cooccurrence"] = "cooccurrence"; commands["quit"] = "MPIEnabled"; } @@ -442,6 +444,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "otu.association") { command = new OTUAssociationCommand(optionString); } else if(commandName == "sort.seqs") { command = new SortSeqsCommand(optionString); } else if(commandName == "classify.tree") { command = new ClassifyTreeCommand(optionString); } + else if(commandName == "cooccurrence") { command = new CooccurrenceCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -589,6 +592,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "otu.association") { pipecommand = new OTUAssociationCommand(optionString); } else if(commandName == "sort.seqs") { pipecommand = new SortSeqsCommand(optionString); } else if(commandName == "classify.tree") { pipecommand = new ClassifyTreeCommand(optionString); } + else if(commandName == "cooccurrence") { pipecommand = new CooccurrenceCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -724,6 +728,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "otu.association") { shellcommand = new OTUAssociationCommand(); } else if(commandName == "sort.seqs") { shellcommand = new SortSeqsCommand(); } else if(commandName == "classify.tree") { shellcommand = new ClassifyTreeCommand(); } + else if(commandName == "cooccurrence") { shellcommand = new CooccurrenceCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/cooccurrencecommand.cpp b/cooccurrencecommand.cpp new file mode 100644 index 0000000..fa9f723 --- /dev/null +++ b/cooccurrencecommand.cpp @@ -0,0 +1,423 @@ +/* + * cooccurrencecommand.cpp + * Mothur + * + * Created by kiverson on 1/2/12. + * Copyright 2012 Schloss Lab. All rights reserved. + * + */ + +#include "cooccurrencecommand.h" + +//********************************************************************************************************************** +vector CooccurrenceCommand::setParameters() { + try { + CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared); + CommandParameter pmetric("metric", "Multiple", "cscore-checker-combo-vratio", "cscore", "", "", "",false,false); parameters.push_back(pmetric); + CommandParameter pmatrix("matrixmodel", "Multiple", "sim1-sim2-sim3-sim4-sim5-sim6-sim7-sim8-sim9", "sim2", "", "", "",false,false); parameters.push_back(pmatrix); + CommandParameter pruns("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pruns); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); + CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "CooccurrenceCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string CooccurrenceCommand::getHelpString(){ + try { + string helpString = "help!"; + + return helpString; + } + catch(exception& e) { + m->errorOut(e, "CooccurrenceCommand", "getHelpString"); + exit(1); + } +} +//********************************************************************************************************************** +CooccurrenceCommand::CooccurrenceCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + + } + catch(exception& e) { + m->errorOut(e, "CooccurrenceCommand", "CooccurrenceCommand"); + exit(1); + } +} +//********************************************************************************************************************** +CooccurrenceCommand::CooccurrenceCommand(string option) { + try { + abort = false; calledHelp = false; + allLines = 1; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + map::iterator it; + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("shared"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["shared"] = inputDir + it->second; } + } + } + + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //get shared file + sharedfile = validParameter.validFile(parameters, "shared", true); + if (sharedfile == "not open") { sharedfile = ""; abort = true; } + else if (sharedfile == "not found") { + //if there is a current shared file, use it + sharedfile = m->getSharedFile(); + if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setSharedFile(sharedfile); } + + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(sharedfile); } + + + metric = validParameter.validFile(parameters, "metric", false); if (metric == "not found") { metric = "cscore"; } + + if ((metric != "cscore") && (metric != "checker") && (metric != "combo") && (metric != "vratio")) { + m->mothurOut("[ERROR]: " + metric + " is not a valid metric option for the cooccurrence command. Choices are cscore, checker, combo, vratio."); m->mothurOutEndLine(); abort = true; + } + + matrix = validParameter.validFile(parameters, "matrix", false); if (matrix == "not found") { matrix = "sim2"; } + + if ((matrix != "sim1") && (matrix != "sim2") && (matrix != "sim3") && (matrix != "sim4") && (matrix != "sim5" ) && (matrix != "sim6" ) && (matrix != "sim7" ) && (matrix != "sim8" ) && (matrix != "sim9" )) { + m->mothurOut("[ERROR]: " + matrix + " is not a valid matrix option for the cooccurrence command. Choices are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8, sim9."); m->mothurOutEndLine(); abort = true; + } + + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + m->splitAtDash(groups, Groups); + } + m->setGroups(Groups); + + string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + m->mothurConvert(temp, runs); + + } + + } + catch(exception& e) { + m->errorOut(e, "CooccurrenceCommand", "CooccurrenceCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +int CooccurrenceCommand::execute(){ + try { + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + InputData* input = new InputData(sharedfile, "sharedfile"); + vector lookup = input->getSharedRAbundVectors(); + string lastLabel = lookup[0]->getLabel(); + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + ofstream out; + string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "cooccurence.summary"; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + out << "metric\tlabel\tScore\tpValue\n"; + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } delete input; out.close(); m->mothurRemove(outputFileName); return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + getCooccurrence(lookup, out); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input->getSharedRAbundVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + getCooccurrence(lookup, out); + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + if (m->control_pressed) { outputTypes.clear(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; } + + //get next line to process + lookup = input->getSharedRAbundVectors(); + } + + if (m->control_pressed) { delete input; out.close(); m->mothurRemove(outputFileName); return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input->getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + getCooccurrence(lookup, out); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + + out.close(); + + //reset groups parameter + delete input; + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + m->mothurOut(outputFileName); m->mothurOutEndLine(); + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CooccurrenceCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** + +int CooccurrenceCommand::getCooccurrence(vector& thisLookUp, ofstream& out){ + try { + int numOTUS = thisLookUp[0]->getNumBins(); + vector< vector > initmatrix (thisLookUp.size()); + vector< vector > co_matrix (thisLookUp[0]->getNumBins()); + for (int i = 0; i < thisLookUp[0]->getNumBins(); i++) { co_matrix[i].resize((thisLookUp.size()), 0); } + for (int i = 0; i < thisLookUp.size(); i++) { initmatrix[i].resize((thisLookUp[i]->getNumBins()), 0); } + vector columntotal(thisLookUp.size(), 0); + vector rowtotal(numOTUS, 0); + + int rowcount = 0; + for (int i = 0; i < thisLookUp.size(); i++) { + for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) { + if (m->control_pressed) { return 0; } + int abund = thisLookUp[i]->getAbundance(j); + + if(abund > 0) { + initmatrix[i][j] = 1; + co_matrix[j][i] = 1; + rowcount++; + columntotal[j]++; + } + } + rowtotal[i] = rowcount; + rowcount = 0; + } + + //nrows is ncols of inital matrix. All the functions need this value. They assume the transposition has already taken place and nrows and ncols refer to that matrix. + //comatrix and initmatrix are still vectors of vectors of ints as in the original script. The abundancevector is only what was read in ie not a co-occurrence matrix! + int ncols = numOTUS;//rows of inital matrix + int nrows = thisLookUp.size();//OTUs + double initscore = 0.0; + //transpose matrix + int newmatrows = ncols; + int newmatcols = nrows; + + //swap for transposed matrix + nrows = newmatrows;//ncols; + ncols = newmatcols;//nrows; + + vector initcolumntotal(ncols, 0); + vector initrowtotal(nrows, 0); + vector stats; + + TrialSwap2 trial; + + initcolumntotal = rowtotal; + initrowtotal = columntotal; + trial.update_row_col_totals(co_matrix, rowtotal, columntotal); + + if (metric == "cscore") { initscore = trial.calc_c_score(co_matrix, rowtotal); } + else if (metric == "checker") { initscore = trial.calc_checker(co_matrix, rowtotal); } + else if (metric == "vratio") { initscore = trial.calc_vratio(rowtotal, columntotal); } + else if (metric == "combo") { initscore = trial.calc_combo(co_matrix); } + else { m->mothurOut("[ERROR]: No metric selected!\n"); m->control_pressed = true; return 1; } + + m->mothurOut("Initial c score: " + toString(initscore)); m->mothurOutEndLine(); + + //nullmatrix burn in + for(int i=0;i<10000;i++) { + if (m->control_pressed) { return 0; } + if (matrix == "sim1") { + trial.sim1(co_matrix); + }else if (matrix == "sim2") { + trial.sim2(co_matrix); + }else if (matrix == "sim3") { + trial.sim3(initmatrix); + co_matrix = initmatrix; + }else if (matrix == "sim4") { + trial.sim4(columntotal, rowtotal, co_matrix); + }else if (matrix == "sim5") { + trial.sim5(initcolumntotal, initrowtotal, initmatrix); + trial.transpose_matrix(initmatrix,co_matrix); + }else if (matrix == "sim6") { + trial.sim6(columntotal, co_matrix); + }else if (matrix == "sim7") { + trial.sim7(initcolumntotal, initmatrix); + co_matrix = initmatrix; + }else if (matrix == "sim8") { + trial.sim8(columntotal, rowtotal, co_matrix); + }else if (matrix == "sim9") { + trial.swap_checkerboards (co_matrix); + }else{ + m->mothurOut("[ERROR]: No model selected! \n"); + m->control_pressed = true; + } + } + + //run + for(int i=0;icontrol_pressed) { return 0; } + //calc metric of nullmatrix + if (matrix == "sim1") { + trial.sim1(co_matrix); + }else if (matrix == "sim2") { + trial.sim2(co_matrix); + }else if (matrix == "sim3") { + trial.sim3(initmatrix); + co_matrix = initmatrix; + }else if (matrix == "sim4") { + trial.sim4(columntotal, rowtotal, co_matrix); + }else if (matrix == "sim5") { + trial.sim5(initcolumntotal, initrowtotal, initmatrix); + trial.transpose_matrix(initmatrix,co_matrix); + }else if (matrix == "sim6") { + trial.sim6(columntotal, co_matrix); + }else if (matrix == "sim7") { + trial.sim7(initcolumntotal, initmatrix); + co_matrix = initmatrix; + }else if (matrix == "sim8") { + trial.sim8(columntotal, rowtotal, co_matrix); + }else if (matrix == "sim9") { + trial.swap_checkerboards (co_matrix); + }else{ + m->mothurOut("[ERROR]: No model selected! \n"); + m->control_pressed = true; + } + // + // + trial.update_row_col_totals(co_matrix, rowtotal, columntotal); + + if (metric == "cscore") { + stats.push_back(trial.calc_c_score(co_matrix, rowtotal)); + }else if (metric == "checker") { + stats.push_back(trial.calc_checker(co_matrix, rowtotal)); + }else if (metric == "vratio") { + stats.push_back(trial.calc_vratio(rowtotal, columntotal)); + }else if (metric == "combo") { + stats.push_back(trial.calc_combo(co_matrix)); + }else { + m->mothurOut("[ERROR]: No metric selected!\n"); + m->control_pressed = true; + return 1; + } + + } + + double total = 0.0; + for (int i=0; imothurOutEndLine(); m->mothurOut("average metric score: " + toString(nullMean)); m->mothurOutEndLine(); + + double pvalue = 0.0; + if (metric == "cscore" || metric == "checker") { pvalue = trial.calc_pvalue_greaterthan (stats, initscore); } + else{ pvalue = trial.calc_pvalue_lessthan (stats, initscore); } + + m->mothurOut("pvalue: " + toString(pvalue)); m->mothurOutEndLine(); + out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << pvalue << endl; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "CooccurrenceCommand", "Cooccurrence"); + exit(1); + } +} +//********************************************************************************************************************** + + diff --git a/cooccurrencecommand.h b/cooccurrencecommand.h new file mode 100644 index 0000000..da7cde8 --- /dev/null +++ b/cooccurrencecommand.h @@ -0,0 +1,53 @@ +#ifndef COOCCURRENCECOMMAND_H +#define COOCCURRENCECOMMAND_H + +/* + * COOCCURRENCE.h + * Mothur + * + * Created by westcott on 11/10/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + + +#include "command.hpp" +#include "trialswap2.h" +#include "inputdata.h" +#include "sharedrabundvector.h" + + +class CooccurrenceCommand : public Command { + +public: + + CooccurrenceCommand(string); + CooccurrenceCommand(); + ~CooccurrenceCommand(){} + + vector setParameters(); + string getCommandName() { return "Cooccurrence"; } + string getCommandCategory() { return "Hypothesis Testing"; } + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/Cooccurrence"; } + string getDescription() { return "Cooccurrence"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + + +private: + string metric, matrix, outputDir; + string label, sharedfile, groups; + bool abort, allLines; + set labels; + vector outputNames, Groups; + int runs; + + int getCooccurrence(vector&, ofstream&); + +}; + +#endif + + diff --git a/metastatscommand.cpp b/metastatscommand.cpp index e23224b..5991649 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -232,21 +232,19 @@ int MetaStatsCommand::execute(){ //only 1 combo if (numGroups == 2) { processors = 1; } else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; } - -// #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors != 1){ - int numPairs = namesOfGroupCombos.size(); - int numPairsPerProcessor = numPairs / processors; + + if(processors != 1){ + int numPairs = namesOfGroupCombos.size(); + int numPairsPerProcessor = numPairs / processors; - for (int i = 0; i < processors; i++) { - int startPos = i * numPairsPerProcessor; - if(i == processors - 1){ - numPairsPerProcessor = numPairs - i * numPairsPerProcessor; - } - lines.push_back(linePair(startPos, numPairsPerProcessor)); - } - } -// #endif + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + if(i == processors - 1){ + numPairsPerProcessor = numPairs - i * numPairsPerProcessor; + } + lines.push_back(linePair(startPos, numPairsPerProcessor)); + } + } //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { diff --git a/mothur.h b/mothur.h index 50344e2..57b409e 100644 --- a/mothur.h +++ b/mothur.h @@ -42,6 +42,7 @@ #include #include #include +#include //misc #include diff --git a/trialSwap2.cpp b/trialSwap2.cpp new file mode 100644 index 0000000..750c89f --- /dev/null +++ b/trialSwap2.cpp @@ -0,0 +1,1539 @@ +#include "trialswap2.h" + + +//The sum_of_squares, havel_hakimi and calc_c_score algorithms have been adapted from I. Miklos and J. Podani. 2004. Randomization of presence-absence matrices: comments and new algorithms. Ecology 85:86-92. + + +/**************************************************************************************************/ +int TrialSwap2::intrand(int n){ + try { + double z; + + z = (double)random() * (double)n / (double)RAND_MAX; + if(z>=n) + z=n-1; + if(z<0) + z=0; + return((int)floor(z)); + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "intrand"); + exit(1); + } +} +/**************************************************************************************************/ +/* completely random matrix, all column and row totals are variable, matrix size is the same + * + * + */ +/**************************************************************************************************/ +int TrialSwap2::sim1(vector > &co_matrix){ + try { + vector randRow; + vector > tmpmatrix; + int nrows = co_matrix.size(); + int ncols = co_matrix[0].size(); + + //clear co_matrix + // for(i=0;icontrol_pressed) { break; } + + for(int j=0;j 0.5) { + randRow.push_back(1); + }else{ + randRow.push_back(0); + } + } + tmpmatrix.push_back(randRow); + randRow.clear(); + //cout << endl; + } + co_matrix = tmpmatrix; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sim1"); + exit(1); + } +} +/**************************************************************************************************/ +/* + *row sums fixed, columns equiprobable + */ +void TrialSwap2::sim2(vector > &co_matrix) +{ + try { + + for(int i=0;icontrol_pressed) { break; } + random_shuffle( co_matrix[i].begin(), co_matrix[i].end() ); + } + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sim2"); + exit(1); + } +} +/**************************************************************************************************/ +int TrialSwap2::sim2plus(vector rowtotal, vector > &co_matrix) +{ + try { + int nrows = co_matrix.size(); + int ncols = co_matrix[0].size(); + double cellprob = 1.0/ncols; + vector cellprobvec; + vector tmprow; + vector > tmpmatrix; + //double randNum; + + double start = 0.0; + + for(int i=0; icontrol_pressed) { return 0; } + cellprobvec.push_back(start + cellprob); + start = cellprobvec[i]; + } + + for(int i=0; icontrol_pressed) { return 0; } + double randNum = rand() / double(RAND_MAX); + //cout << randNum << endl; + if(randNum <= cellprobvec[0]) + { + tmprow[0] = 1; + continue; + } + for(int j=1;j cellprobvec[j-1] && tmprow[j] != 1) + { + tmprow[j] = 1; + } + } + } + tmpmatrix.push_back(tmprow); + tmprow.clear(); + } + co_matrix = tmpmatrix; + tmpmatrix.clear(); + cellprobvec.clear(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sim2plus"); + exit(1); + } +} +/**************************************************************************************************/ +/* + * same as sim2 but using initmatrix which is the initial co-occurrence matrix before transposition + * may have to be changed depending on what matrix 'seed' is used. One way to use is to transpose + * every null matrix before using an index and use the random matrix as a seed for the next null. + */ +/**************************************************************************************************/ +void TrialSwap2::sim3(vector > &initmatrix) +{ + try { + for(int i=0;icontrol_pressed) { break; } + random_shuffle( initmatrix[i].begin(), initmatrix[i].end() ); + } + + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sim3"); + exit(1); + } +} +/**************************************************************************************************/ +/* + * + * + * + */ +/**************************************************************************************************/ +int TrialSwap2::sim4(vector columntotal, vector rowtotal, vector > &co_matrix) +{ + try { + vector colProb; + vector tmprow;//(ncols, 7); + vector > tmpmatrix; + vector range; + vector randNums; + int ncols = columntotal.size(); + int nrows = rowtotal.size(); + tmprow.clear(); + + double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 ); + //cout << "col sum: " << colSum << endl; + for(int i=0;icontrol_pressed) { return 0; } + colProb.push_back(columntotal[i]/colSum); + } + + double start = 0.0; + + for(int i=0;icontrol_pressed) { return 0; } + range.push_back(start + colProb[i]); + start = range[i]; + } + + for(int i=0;icontrol_pressed) { return 0; } + + while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i]) + { + if (m->control_pressed) { return 0; } + + double randNum = rand() / double(RAND_MAX); + if(randNum <= range[0]) + { + tmprow[0] = 1; + continue; + } + for(int j=1;j range[j-1] && tmprow[j] != 1) + { + tmprow[j] = 1; + } + + } + } + tmpmatrix.push_back(tmprow); + tmprow.clear(); + } + + co_matrix = tmpmatrix; + + return 0; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sim4"); + exit(1); + } +} +/**************************************************************************************************/ +/* + * inverse of sim4, MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS + * + * + */ +/**************************************************************************************************/ +int TrialSwap2::sim5(vector initcolumntotal,vector initrowtotal, vector > &initmatrix) +{ + try { + vector colProb; + vector tmprow;//(ncols, 7); + vector > tmpmatrix; + vector range; + vector randNums; + int ncols = initcolumntotal.size(); + int nrows = initrowtotal.size(); + + tmprow.clear(); + + double colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 ); + //cout << "col sum: " << colSum << endl; + for(int i=0;icontrol_pressed) { return 0; } + colProb.push_back(initcolumntotal[i]/colSum); + } + + double start = 0.0; + + for(int i=0;icontrol_pressed) { return 0; } + range.push_back(start + colProb[i]); + start = range[i]; + } + + for(int i=0;icontrol_pressed) { return 0; } + + while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < initrowtotal[i]) + { + if (m->control_pressed) { return 0; } + + double randNum = rand() / double(RAND_MAX); + if(randNum <= range[0]) + { + tmprow[0] = 1; + continue; + } + for(int j=1;j range[j-1] && tmprow[j] != 1) + { + tmprow[j] = 1; + } + + } + } + tmpmatrix.push_back(tmprow); + tmprow.clear(); + } + + initmatrix = tmpmatrix; + return 0; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sim5"); + exit(1); + } +} +/**************************************************************************************************/ +/* + * + * + * + */ +/**************************************************************************************************/ +int TrialSwap2::sim6(vector columntotal, vector > &co_matrix) +{ + try { + vector > tmpmatrix; + vector colProb; + vector tmprow; + vector range; + int ncols = columntotal.size(); + int nrows = co_matrix.size(); + + int colSum = accumulate( columntotal.begin(), columntotal.end(), 0 ); + + for(int i=0;icontrol_pressed) { return 0; } + colProb.push_back(columntotal[i]/double (colSum)); + } + + double start = 0.0; + + for(int i=0;icontrol_pressed) { return 0; } + range.push_back(start + colProb[i]); + start = range[i]; + } + + for(int i=0;icontrol_pressed) { return 0; } + tmprow.assign(ncols, 0); + int tmprowtotal; + tmprowtotal = (rand() / double (RAND_MAX)) * 10; + while ( tmprowtotal > ncols) { + if (m->control_pressed) { return 0; } + tmprowtotal = (rand() / double (RAND_MAX)) * 10; + } + //cout << tmprowtotal << endl; + //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl; + + while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal) + { + if (m->control_pressed) { return 0; } + double randNum = rand() / double(RAND_MAX); + //cout << randNum << endl; + if(randNum <= range[0]) + { + tmprow[0] = 1; + continue; + } + for(int j=1;j range[j-1] && tmprow[j] != 1) + { + tmprow[j] = 1; + } + + } + + + } + + tmpmatrix.push_back(tmprow); + tmprow.clear(); + } + + co_matrix = tmpmatrix; + tmpmatrix.clear(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sim6"); + exit(1); + } +} +/**************************************************************************************************/ +/* + * MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS + * + * + */ +/**************************************************************************************************/ +int TrialSwap2::sim7(vector initrowtotal, vector > &co_matrix) +{ + try { + vector > probmatrix; + vector > tmpmatrix; + vector colProb; + vector probrow; + vector tmprow; + vector range; + double nc; + int ncols = co_matrix[0].size(); int nrows = co_matrix.size(); + + tmpmatrix.assign(nrows, vector(ncols, 0.)); + + int rowsum = accumulate( initrowtotal.begin(), initrowtotal.end(), 0 ); + + nc = rowsum * ncols; + //cout << nc << endl; + + //assign null matrix based on probabilities + + double start = 0.0; // don't reset start -- probs should be from 0-1 thoughout the entire matrix + + for(int i=0;icontrol_pressed) { return 0; } + //cout << initrowtotal[i]/double(nc) << endl; + double cellprob = initrowtotal[i]/double(nc); + //cout << cellprob << endl; + for(int j=0;jcontrol_pressed) { return 0; } + done: + //cout << k << endl; + //tmprowsum = accumulate( tmprowtotal.begin(), tmprowtotal.end(), 0 ); + double randNum = rand() / double(RAND_MAX); + //cout << randNum << "+" << endl; + //special case for the first entry + if(randNum <= probmatrix[0][0] && tmpmatrix[0][0] != 1) + { + tmpmatrix[0][0] = 1; + k++; + //cout << k << endl; + continue; + } + + + for(int i=0;icontrol_pressed) { return 0; } + for(int j=0;j probmatrix[i][j-1] && tmpmatrix[i][j] != 1) + { + tmpmatrix[i][j] = 1; + k++; + //cout << k << endl; + goto done; + } + //else + //k = k-1; + } + + } + + } + + co_matrix = tmpmatrix; + return 0; + //build probibility matrix + /* for(int i=0;i ncols) + tmprowtotal = (rand() / double (RAND_MAX)) * 10; + //cout << tmprowtotal << endl; + //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl; + + while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal) + { + double randNum = rand() / double(RAND_MAX); + //cout << randNum << endl; + if(randNum <= range[0]) + { + tmprow[0] = 1; + continue; + } + for(int j=1;j range[j-1] && tmprow[j] != 1) + { + tmprow[j] = 1; + } + } + } + + tmpmatrix.push_back(tmprow); + tmprow.clear(); + } + + initmatrix = tmpmatrix; + */ + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sim7"); + exit(1); + } +} +/**************************************************************************************************/ +/* + * + * + * + */ +/**************************************************************************************************/ +int TrialSwap2::sim8(vector columntotal, vector rowtotal, vector > &co_matrix) +{ + try { + double prob; + double start = 0.0; + int ncols = columntotal.size(); int nrows = rowtotal.size(); + double probarray[nrows * ncols]; + double randnum; + int grandtotal; + int total = 0; + + //double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 ); + double rowSum = accumulate( rowtotal.begin(), rowtotal.end(), 0 ); + + if (m->control_pressed) { return 0; } + + //cout << "rowsum: " << rowSum << endl; + + grandtotal = rowSum; + + //create probability matrix with each site being between 0 and 1 + for (int i=0;icontrol_pressed) { return 0; } + for (int j=0;jcontrol_pressed) { return 0; } + randnum = rand() / double(RAND_MAX); + //cout << "rand num: " << randnum << endl; + if((randnum <= probarray[0]) && (probarray[0] != 2) ) { + probarray[0] = 2; + total++; + continue; + } + for(int i=1;i<(nrows*ncols);i++) { + if (m->control_pressed) { return 0; } + if((randnum <= probarray[i]) && (randnum > probarray[i-1]) && (probarray[i] != 2) ) { + probarray[i] = 2; + total++; + break; + } + else + continue; + } + } + //cout << "prbarray" << endl; + //for(int i=0;i<(nrows*ncols);i++) + //cout << probarray[i] << " "; + //cout << endl; + for(int i=0;icontrol_pressed) { return 0; } + for(int j=0;jerrorOut(e, "TrialSwap2", "sim8"); + exit(1); + } +} +/**************************************************************************************************/ +int TrialSwap2::havel_hakimi(vector rowtotal,vector columntotal,vector > &co_matrix) +{ + try { + int nrows = co_matrix.size(); + int ncols = co_matrix[0].size(); + int i,j,k; + vector r1; r1.resize(nrows,0); + vector c; c.resize(ncols,0); + vector c1; c1.resize(ncols,0); + + + for(i=0;icontrol_pressed) { return 0; } + i=intrand(nrows); + while(r1[i]==0) { + if (m->control_pressed) { return 0; } + i=intrand(nrows); + } + r1[i]=0; + sho(c,c1,ncols); + for(j=0;jcontrol_pressed) { return 0; } + co_matrix[i][c1[j]]=1; + c[j]--; + if(c[j]<0) + m->mothurOut("Uhh! " + toString(c1[j]) + "\n"); + } + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "havel_hakimi"); + exit(1); + } +} +/**************************************************************************************************/ +int TrialSwap2::sho(vector c, vector c1, int k) +{ + try { + int i,j,temp; + + for(j=k-1;j>0;j--) + { + if (m->control_pressed) { return 0; } + for(i=0;icontrol_pressed) { return 0; } + i=intrand(k-1); + if(c[i]==c[i+1]) + { + temp=c[i]; + c[i]=c[i+1]; + c[i+1]=temp; + temp=c1[i]; + c1[i]=c1[i+1]; + c1[i+1]=temp; + } + } + return(0); + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "sho"); + exit(1); + } +} +/**************************************************************************************************/ +double TrialSwap2::calc_c_score (vector > &co_matrix,vector rowtotal) +{ + try { + double cscore = 0.0; + double maxD; + double D; + double normcscore = 0.0; + int nonzeros = 0; + int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); + vector > s(nrows, vector(nrows,0.0)); //only fill half the matrix + + for(int i=0;icontrol_pressed) { return 0; } + for(int k=0;kerrorOut(e, "TrialSwap2", "calc_c_score"); + exit(1); + } +} +/**************************************************************************************************/ +int TrialSwap2::calc_checker (vector > &co_matrix, vector rowtotal) +{ + try { + int cunits=0; + //int s[nrows][ncols]; + int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); + vector > s(nrows, vector(nrows,0)); //only fill half the matrix + + for(int i=0;icontrol_pressed) { return 0; } + //s[i][j]=0; + for(int k=0;kerrorOut(e, "TrialSwap2", "calc_checker"); + exit(1); + } +} +/**************************************************************************************************/ +double TrialSwap2::calc_vratio (vector rowtotal, vector columntotal) +{ + try { + int nrows = rowtotal.size(); + int ncols = columntotal.size(); + int sumCol = accumulate(columntotal.begin(), columntotal.end(), 0 ); + // int sumRow = accumulate(rowtotal.begin(), rowtotal.end(), 0 ); + + double colAvg = (double) sumCol / (double) ncols; + // double rowAvg = (double) sumRow / (double) nrows; + + double p = 0.0; + + // double totalRowVar = 0.0; + double rowVar = 0.0; + double colVar = 0.0; + + for(int i=0;icontrol_pressed) { return 0; } + p = (double) rowtotal[i]/(double) ncols; + rowVar += p * (1.0-p); + } + + for(int i=0;icontrol_pressed) { return 0; } + colVar += pow(((double) columntotal[i]-colAvg),2); + } + + colVar = (1.0/(double)ncols) * colVar; + + return colVar/rowVar; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "calc_vratio"); + exit(1); + } + +} +/**************************************************************************************************/ +int TrialSwap2::calc_combo (vector > &initmatrix) +{ + try { + int initrows = initmatrix.size(); + int unique = 0; + int match = 0; + int matches = 0; + for(int i=0;icontrol_pressed) { return 0; } + if( (initmatrix[i] == initmatrix[j])) + { + match++; + matches++; + break; + } + } + + //on the last iteration of a previously matched row it will add itself because it doesn't match any following rows, so that combination is counted + if (match == 0) + unique++; + } + return unique; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "calc_combo"); + exit(1); + } +} +/**************************************************************************************************/ +int TrialSwap2::swap_checkerboards (vector > &co_matrix) +{ + try { + int ncols = co_matrix[0].size(); int nrows = co_matrix.size(); + int i, j, k, l; + i=intrand(nrows); + while((j = intrand(nrows) ) == i ) {;if (m->control_pressed) { return 0; }} + k=intrand(ncols); + while((l = intrand(ncols) ) == k ) {;if (m->control_pressed) { return 0; }} + //cout << co_matrix[i][k] << " " << co_matrix[j][l] << endl; + //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl; + //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl; + //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl; + if((co_matrix[i][k]*co_matrix[j][l]==1 && co_matrix[i][l]+co_matrix[j][k]==0)||(co_matrix[i][k]+co_matrix[j][l]==0 && co_matrix[i][l]*co_matrix[j][k]==1)) //checking for checkerboard value and swap + { + co_matrix[i][k]=1-co_matrix[i][k]; + co_matrix[i][l]=1-co_matrix[i][l]; + co_matrix[j][k]=1-co_matrix[j][k]; + co_matrix[j][l]=1-co_matrix[j][l]; + //cout << "swapped!" << endl; + } + //cout << "i: " << i << " j: " << j << " k: " << " l: " << l << endl; + return 0; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "swap_checkerboards"); + exit(1); + } +} +/**************************************************************************************************/ +double TrialSwap2::calc_pvalue_greaterthan (vector scorevec, double initialscore) +{ + try { + int runs = scorevec.size(); + double p = 0.0; + for( int i=0;icontrol_pressed) { return 0; } + if(scorevec[i]>=initialscore) + p++; + } + return p/(double)runs; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "calc_pvalue_greaterthan"); + exit(1); + } +} +/**************************************************************************************************/ +double TrialSwap2::calc_pvalue_lessthan (vector scorevec, double initialscore) +{ + try { + int runs = scorevec.size(); + double p = 0.0; + for( int i=0;icontrol_pressed) { return 0; } + if(scorevec[i]<=initialscore) + p++; + } + return p/(double)runs; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "calc_pvalue_lessthan"); + exit(1); + } +} +/**************************************************************************************************/ +double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vector scorevec) +{ + try { + double t; + double sampleSD; + double sum = 0; + + for(int i=0;icontrol_pressed) { return 0; } + sum += pow((scorevec[i] - nullMean),2); + //cout << "scorevec[" << i << "]" << scorevec[i] << endl; + } + + m->mothurOut("nullMean: " + toString(nullMean)); m->mothurOutEndLine(); + + m->mothurOut("sum: " + toString(sum)); m->mothurOutEndLine(); + + sampleSD = sqrt( (1/runs) * sum ); + + m->mothurOut("samplSD: " + toString(sampleSD)); m->mothurOutEndLine(); + + t = (nullMean - initialscore) / (sampleSD / sqrt(runs)); + + return t; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "t_test"); + exit(1); + } +} +/**************************************************************************************************/ +int TrialSwap2::print_matrix(vector > &matrix, int nrows, int ncols) +{ + try { + m->mothurOut("matrix:"); m->mothurOutEndLine(); + + for (int i = 0; i < nrows; i++) + { + if (m->control_pressed) { return 0; } + for (int j = 0; j < ncols; j++) + { + m->mothurOut(toString(matrix[i][j])); + } + m->mothurOutEndLine(); + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "print_matrix"); + exit(1); + } +} +/**************************************************************************************************/ +int TrialSwap2::transpose_matrix (vector > &initmatrix, vector > &co_matrix)//, int nrows, int nocols) +{ + try { + int ncols = initmatrix.size(); int nrows = initmatrix[0].size(); + int tmpnrows = nrows; + //vector > tmpvec; + vector tmprow; + if(!co_matrix.empty()) + co_matrix.clear(); + for (int i=0;icontrol_pressed) { return 0; } + for (int j=0;jerrorOut(e, "TrialSwap2", "transpose_matrix"); + exit(1); + } +} +/**************************************************************************************************/ +int TrialSwap2::update_row_col_totals(vector > &co_matrix, vector &rowtotal, vector &columntotal) +{ + try { + //rowtotal.clear(); + //columntotal.clear(); + //generate (rowtotal.begin(), rowtotal.end(), 0); + //generate (columntotal.begin(), columntotal.end(), 0); + int nrows = co_matrix.size(); + int ncols = co_matrix[0].size(); + vector tmpcolumntotal(ncols, 0); + vector tmprowtotal(nrows, 0); + + int rowcount = 0; + + for (int i = 0; i < nrows; i++) + { + if (m->control_pressed) { return 0; } + for (int j = 0; j < ncols; j++) + { + if (co_matrix[i][j] == 1) + { + rowcount++; + tmpcolumntotal[j]++; + } + } + tmprowtotal[i] = rowcount; + rowcount = 0; + } + columntotal = tmpcolumntotal; + rowtotal = tmprowtotal; + /*cout << "rowtotal: "; + for(int i = 0; ierrorOut(e, "TrialSwap2", "update_row_col_totals"); + exit(1); + } +} +/**************************************************************************************************/ +/*int main(int argc, char *argv[]) +{ + srand (time(0)); + char* input_filename = argv[1]; + std::ifstream infile (input_filename); + + //skip the first line of headers + getline(infile, line); + //get the first line of data + getline(infile, line); + + nrows = 0; + ncols = 0; + + //int numspaces = 0; + char nextChar; + + for (int i=0; i > co_matrix;//[nrows][ncols]; + vector > initmatrix; + vector tmprow; + vector stats; + int tmpnrows = nrows; + + for (int row1=0; row1> tmp; + + for (int col=0; col> tmp; + //cout << tmp << endl; + if (atoi(tmp.c_str()) > 0) + tmprow.push_back(1); + else + tmprow.push_back(0); + } + if (accumulate( tmprow.begin(), tmprow.end(), 0 ) == 0) + { + tmpnrows--; + } + else + initmatrix.push_back(tmprow); + //add the row to the matrix + //initmatrix.push_back(tmprow); + tmprow.clear(); + //cout << tmprow << endl; + } + + infile3.close(); + nrows = tmpnrows; + + //print init matrix + /* cout << "original matrix:" << endl; + + for (int i = 0; i < nrows; i++) + { + for (int j = 0; j < ncols; j++) + { + cout << initmatrix[i][j]; + } + cout << endl; + } */ + + //for (i=0;i columntotal(ncols, 0); + vector initcolumntotal(ncols, 0); + vector initrowtotal(nrows, 0); + vector rowtotal(nrows, 0); + + transpose_matrix(initmatrix,co_matrix); + //remove degenerate rows and cols + + //cout << "transposed matrix:" << endl; + int rowcount = 0; + for (int i = 0; i < nrows; i++) + { + for (int j = 0; j < ncols; j++) + { + if (co_matrix[i][j] == 1) + { + rowcount++; + columntotal[j]++; + } + //cout << co_matrix[i][j]; + } + //cout << " row total: " << rowcount << endl; + //cout << endl; + rowtotal[i] = rowcount; + rowcount = 0; + } + + initcolumntotal = rowtotal; + initrowtotal = columntotal; + + cout << endl; + + runs = atol(argv[2]); + int metric = atol(argv[3]); + int nullModel = atol(argv[4]); + double initscore; + update_row_col_totals(co_matrix, rowtotal, columntotal, ncols, nrows); + //do initial metric: checker, c score, v ratio or combo + switch(metric) + { + case 1: + //c score + initscore = calc_c_score(co_matrix, rowtotal); + cout << "initial c score: " << initscore << endl; + //print_matrix(co_matrix, nrows, ncols); + break; + + case 2: + //checker + initscore = calc_checker(co_matrix, rowtotal); + cout << "initial checker score: " << initscore << endl; + break; + + case 3: + //v ratio + initscore = calc_vratio(nrows, ncols, rowtotal, columntotal); + cout << "initial v ratio: " << initscore << endl; + break; + + case 4: + //combo + initscore = calc_combo(initrows, initcols, initmatrix); + cout << "initial combo score: " << initscore << endl; + //set co_matrix equal to initmatrix because combo requires row comparisons + co_matrix = initmatrix; + break; + + case 5: + //test! + + //print_matrix(co_matrix, nrows, ncols); + //sim1(nrows, ncols, co_matrix); + //sim2(nrows, ncols, co_matrix); + //sim3(initrows, initcols, initmatrix); + //sim4(columntotal, rowtotal, co_matrix); + //sim5(initcolumntotal, initmatrix); + //sim6(columntotal, co_matrix); + //sim7(initcolumntotal, initmatrix); + sim8(columntotal, rowtotal, co_matrix); + //print_matrix(initmatrix, initrows, initcols); + //print_matrix(co_matrix, nrows, ncols); + + break; + + default: + cout << "no metric selected!" << endl; + return 1; + + } + + //matrix initialization + //havel_hakimi(nrows, ncols, rowtotal, columntotal, co_matrix); + //sum_of_square(nrows, ncols, rowtotal, columntotal, co_matrix); + //co-matrix is now a random matrix with the same row and column totals as the initial matrix + + //null matrix burn in + cout << "initializing null matrix..."; + for(int l=0;l<10000;l++) + { + //swap_checkerboards (co_matrix); + //if(l%10 == 0) + switch(nullModel) + { + case 1: + // + sim1(nrows, ncols, co_matrix); + break; + + case 2: + //sim2 + sim2(nrows, ncols, co_matrix); + //sim2plus(nrows, ncols, initrowtotal, co_matrix); + break; + + case 3: + //sim3 + sim3(initrows, initcols, initmatrix); + //transpose_matrix(initmatrix,co_matrix); + co_matrix = initmatrix; + break; + + case 4: + //sim4 + sim4(columntotal, rowtotal, co_matrix); + break; + + case 5: + //sim5 + sim5(initcolumntotal, initrowtotal, initmatrix); + transpose_matrix(initmatrix,co_matrix); + //co_matrix = initmatrix; + break; + + case 6: + sim6(columntotal, co_matrix); + break; + + case 7: + //sim7(ncols, nrows, initrowtotal, co_matrix); + //transpose_matrix(initmatrix,co_matrix); + //co_matrix = initmatrix; + break; + + case 8: + sim8(columntotal, rowtotal, co_matrix); + break; + + case 9: + //swap_checkerboards + swap_checkerboards (co_matrix); + break; + + default: + cout << "no null model selected!" << endl; + return 1; + } + } + cout << "done!" << endl; + + //generate null matrices and calculate the metrics + + cout << "run: " << endl; + for(int trial=0;trial, double); + double calc_pvalue_greaterthan (vector, double); + int swap_checkerboards (vector > &); + int calc_combo (vector > &); + double calc_vratio (vector, vector); + int calc_checker (vector > &,vector); + double calc_c_score (vector > &,vector); + int sho(vector,vector,int k); + int havel_hakimi(vector,vector,vector > &); + int intrand(int); + + int sim1 (vector > &); + void sim2(vector >&); + int sim2plus(vector, vector > &); + void sim3(vector > &); + int sim4(vector, vector, vector > &); + int sim5(vector, vector, vector > &); + int sim6(vector, vector > &); + int sim7(vector, vector > &); + int sim8(vector, vector, vector > &); + int transpose_matrix (vector > &, vector > &); + int update_row_col_totals(vector > &, vector&, vector&); + + +private: + MothurOut* m; + + double t_test (double, int, double, vector); + int print_matrix(vector > &, int, int); + + + +}; + +#endif \ No newline at end of file