#include "metastatscommand.h"
#include "sharedutilities.h"
+#include "sharedrabundfloatvector.h"
//**********************************************************************************************************************
vector<string> MetaStatsCommand::setParameters(){
try {
- CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
- CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
- CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
- CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
- CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
- CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
- CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared);
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+ CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
}
}
//**********************************************************************************************************************
-string MetaStatsCommand::getOutputFileNameTag(string type, string inputName=""){
- try {
- string outputFileName = "";
- map<string, vector<string> >::iterator it;
+string MetaStatsCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
- //is this a type this command creates
- it = outputTypes.find(type);
- if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
- else {
- if (type == "metastats") { outputFileName = "metastats"; }
- else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
- }
- return outputFileName;
- }
- catch(exception& e) {
- m->errorOut(e, "MetaStatsCommand", "getOutputFileNameTag");
- exit(1);
- }
+ if (type == "metastats") { pattern = "[filename],[distance],[group],metastats"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MetaStatsCommand", "getOutputPattern");
+ exit(1);
+ }
}
-
//**********************************************************************************************************************
MetaStatsCommand::MetaStatsCommand(){
try {
int MetaStatsCommand::execute(){
try {
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ //just used to convert files to test metastats online
+ /****************************************************/
+ bool convertInputToShared = false;
+ convertSharedToInput = false;
+ if (convertInputToShared) { convertToShared(sharedfile); return 0; }
+ /****************************************************/
designMap = new GroupMap(designfile);
designMap->readDesignMap();
else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
if(processors != 1){
- int numPairs = namesOfGroupCombos.size();
- int numPairsPerProcessor = numPairs / processors;
-
- for (int i = 0; i < processors; i++) {
- int startPos = i * numPairsPerProcessor;
- if(i == processors - 1){
- numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
- }
- lines.push_back(linePair(startPos, numPairsPerProcessor));
+ int remainingPairs = namesOfGroupCombos.size();
+ int startIndex = 0;
+ for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
+ int numPairs = remainingPairs; //case for last processor
+ if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
+ lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
+ startIndex = startIndex + numPairs;
+ remainingPairs = remainingPairs - numPairs;
}
}
//Close all thread handles and free memory allocations.
for(int i=0; i < pDataArray.size(); i++){
+ if (pDataArray[i]->count != (pDataArray[i]->num)) {
+ m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true;
+ }
for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) { delete pDataArray[i]->thisLookUp[j]; }
for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
outputNames.push_back(pDataArray[i]->outputNames[j]);
for (int c = start; c < (start+num); c++) {
//get set names
- string setA = namesOfGroupCombos[c][0];
+ string setA = namesOfGroupCombos[c][0];
string setB = namesOfGroupCombos[c][1];
//get filename
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + "." + getOutputFileNameTag("metastats");
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = thisLookUp[0]->getLabel();
+ variables["[group]"] = setA + "-" + setB;
+ string outputFileName = getOutputFileName("metastats",variables);
outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
//int nameLength = outputFileName.length();
//char * output = new char[nameLength];
m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine();
//metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
+ if (convertSharedToInput) { convertToInput(subset, outputFileName); }
m->mothurOutEndLine();
MothurMetastats mothurMeta(threshold, iters);
}
}
//**********************************************************************************************************************
+/*Metastats files look like:
+ 13_0 14_0 13_52 14_52 70S 71S 72S M1 M2 M3 C11 C12 C21 C15 C16 C19 C3 C4 C9
+ Alphaproteobacteria 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0
+ Mollicutes 0 0 2 0 0 59 5 11 4 1 0 2 8 1 0 1 0 3 0
+ Verrucomicrobiae 0 0 0 0 0 1 6 0 0 0 0 0 0 0 0 0 0 0 0
+ Deltaproteobacteria 0 0 0 0 0 6 1 0 1 0 1 1 7 0 0 0 0 0 0
+ Cyanobacteria 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
+ Epsilonproteobacteria 0 0 0 0 0 0 0 0 6 0 0 3 1 0 0 0 0 0 0
+ Clostridia 75 65 207 226 801 280 267 210 162 197 81 120 106 148 120 94 84 98 121
+ Bacilli 3 2 16 8 21 52 31 70 46 65 4 28 5 23 62 26 20 30 25
+ Bacteroidetes (class) 21 25 22 64 226 193 296 172 98 55 19 149 201 85 50 76 113 92 82
+ Gammaproteobacteria 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0
+ TM7_genera_incertae_sedis 0 0 0 0 0 0 0 0 1 0 1 2 0 2 0 0 0 0 0
+ Actinobacteria (class) 1 1 1 2 0 0 0 9 3 7 1 1 1 3 1 2 1 2 3
+ Betaproteobacteria 0 0 3 3 0 0 9 1 1 0 1 2 3 1 1 0 0 0 0
+*/
+//this function is just used to convert files to test the differences between the metastats version and mothurs version
+int MetaStatsCommand::convertToShared(string filename) {
+ try {
+ ifstream in;
+ m->openInputFile(filename, in);
+
+ string header = m->getline(in); m->gobble(in);
+
+ vector<string> groups = m->splitWhiteSpace(header);
+ vector<SharedRAbundFloatVector*> newLookup;
+ cout << groups.size() << endl;
+ for (int i = 0; i < groups.size(); i++) {
+ cout << "creating group " << groups[i] << endl;
+ SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+ temp->setLabel("0.03");
+ temp->setGroup(groups[i]);
+ newLookup.push_back(temp);
+ }
+
+ int otuCount = 0;
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ string otuname;
+ in >> otuname; m->gobble(in);
+ otuCount++;
+ cout << otuname << endl;
+ for (int i = 0; i < groups.size(); i++) {
+ double temp;
+ in >> temp; m->gobble(in);
+ newLookup[i]->push_back(temp, groups[i]);
+ }
+ m->gobble(in);
+ }
+ in.close();
+
+ ofstream out;
+ m->openOutputFile(filename+".shared", out);
+
+ out << "label\tgroup\tnumOTUs\t";
+
+ string snumBins = toString(otuCount);
+ for (int i = 0; i < otuCount; i++) {
+ string binLabel = "Otu";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ out << binLabel << '\t';
+ }
+ out << endl;
+
+ for (int i = 0; i < groups.size(); i++) {
+ out << "0.03" << '\t' << groups[i] << '\t';
+ newLookup[i]->print(out);
+ }
+ out.close();
+
+ cout << filename+".shared" << endl;
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MetaStatsCommand", "convertToShared");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string thisfilename) {
+ try {
+ ofstream out;
+ m->openOutputFile(thisfilename+".matrix", out);
+
+ out << "\t";
+ for (int i = 0; i < subset.size()-1; i++) {
+ out << subset[i]->getGroup() << '\t';
+ }
+ out << subset[subset.size()-1]->getGroup() << endl;
+
+ for (int i = 0; i < subset[0]->getNumBins(); i++) {
+ out << m->currentSharedBinLabels[i] << '\t';
+ for (int j = 0; j < subset.size()-1; j++) {
+ out << subset[j]->getAbundance(i) << '\t';
+ }
+ out << subset[subset.size()-1]->getAbundance(i) << endl;
+ }
+ out.close();
+
+ cout << thisfilename+".matrix" << endl;
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MetaStatsCommand", "convertToInput");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************