]> git.donarmstrong.com Git - mothur.git/blobdiff - metastatscommand.cpp
fixes while testing 1.33.0
[mothur.git] / metastatscommand.cpp
index 4744424d426d61243894d4779fa60dcd20781be3..de03dab2a147174384d3e3ffce329c1e16fa07f5 100644 (file)
@@ -9,21 +9,22 @@
 
 #include "metastatscommand.h"
 #include "sharedutilities.h"
+#include "sharedrabundfloatvector.h"
 
 
 //**********************************************************************************************************************
 vector<string> MetaStatsCommand::setParameters(){      
        try {
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
-               CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
-               CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared);
+               CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+               CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+               CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -61,6 +62,21 @@ string MetaStatsCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string MetaStatsCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "metastats") {  pattern = "[filename],[distance],[group],metastats"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MetaStatsCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 MetaStatsCommand::MetaStatsCommand(){  
        try {
                abort = true; calledHelp = true;
@@ -198,7 +214,14 @@ MetaStatsCommand::MetaStatsCommand(string option) {
 int MetaStatsCommand::execute(){
        try {
        
-               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        if (abort == true) { if (calledHelp) { return 0; }  return 2;  }
+        
+        //just used to convert files to test metastats online
+        /****************************************************/
+        bool convertInputToShared = false;
+        convertSharedToInput = false;
+        if (convertInputToShared) { convertToShared(sharedfile); return 0; }
+        /****************************************************/
                
                designMap = new GroupMap(designfile);
                designMap->readDesignMap();
@@ -233,15 +256,14 @@ int MetaStatsCommand::execute(){
                else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
 
         if(processors != 1){
-            int numPairs = namesOfGroupCombos.size();
-            int numPairsPerProcessor = numPairs / processors;
-                       
-            for (int i = 0; i < processors; i++) {
-                int startPos = i * numPairsPerProcessor;
-                if(i == processors - 1){
-                    numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
-                }
-                lines.push_back(linePair(startPos, numPairsPerProcessor));
+            int remainingPairs = namesOfGroupCombos.size();
+            int startIndex = 0;
+            for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
+                int numPairs = remainingPairs; //case for last processor
+                if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
+                lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
+                startIndex = startIndex + numPairs;
+                remainingPairs = remainingPairs - numPairs;
             }
         }
                
@@ -416,6 +438,9 @@ int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
                     
                     //Close all thread handles and free memory allocations.
                     for(int i=0; i < pDataArray.size(); i++){
+                        if (pDataArray[i]->count != (pDataArray[i]->num)) {
+                            m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+                        }
                         for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
                         for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
                             outputNames.push_back(pDataArray[i]->outputNames[j]);
@@ -445,11 +470,15 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                for (int c = start; c < (start+num); c++) {
                        
                        //get set names
-                       string setA = namesOfGroupCombos[c][0]; 
+                       string setA = namesOfGroupCombos[c][0];
                        string setB = namesOfGroupCombos[c][1];
                
                        //get filename
-                       string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
+            map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+            variables["[distance]"] = thisLookUp[0]->getLabel();
+            variables["[group]"] = setA + "-" + setB;
+                       string outputFileName = getOutputFileName("metastats",variables);
                        outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
                        //int nameLength = outputFileName.length();
                        //char * output = new char[nameLength];
@@ -483,27 +512,19 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                                outputNames.pop_back();
                        }else {
                 
-                ofstream outTemp;
-                string tempOut = outputDir + "data." + setA + "-" + setB + ".matrix";
-                m->openOutputFile(tempOut, outTemp);
-                for (int i = 0; i < subset.size(); i++) { outTemp << '\t' << subset[i]->getGroup(); }
-                outTemp << endl;
-                
-                
                                //fill data
                                for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
                                        //data[j] = new double[subset.size()];
                                        data2[j].resize(subset.size(), 0.0);
-                    outTemp << "OTU" << (j+1);
+                   
                                        for (int i = 0; i < subset.size(); i++) {
                                                data2[j][i] = (subset[i]->getAbundance(j));
-                        outTemp << '\t' << subset[i]->getAbundance(j);
                                        }
-                    outTemp << endl;
                                }
-                               outTemp.close();
+                               
                                m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
                                //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
+                if (convertSharedToInput) { convertToInput(subset, outputFileName);  }
                                
                                m->mothurOutEndLine();
                                MothurMetastats mothurMeta(threshold, iters);
@@ -527,3 +548,121 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
        }
 }
 //**********************************************************************************************************************
+/*Metastats files look like:
+ 13_0  14_0    13_52   14_52   70S     71S     72S     M1      M2      M3      C11     C12     C21     C15     C16     C19     C3      C4      C9
+ Alphaproteobacteria   0       0       0       0       0       0       5       0       0       0       0       0       0       0       0       0       0       0       0
+ Mollicutes    0       0       2       0       0       59      5       11      4       1       0       2       8       1       0       1       0       3       0
+ Verrucomicrobiae      0       0       0       0       0       1       6       0       0       0       0       0       0       0       0       0       0       0       0
+ Deltaproteobacteria   0       0       0       0       0       6       1       0       1       0       1       1       7       0       0       0       0       0       0
+ Cyanobacteria 0       0       1       0       0       0       1       0       0       0       0       0       0       0       0       0       0       0       0
+ Epsilonproteobacteria 0       0       0       0       0       0       0       0       6       0       0       3       1       0       0       0       0       0       0
+ Clostridia    75      65      207     226     801     280     267     210     162     197     81      120     106     148     120     94      84      98      121
+ Bacilli       3       2       16      8       21      52      31      70      46      65      4       28      5       23      62      26      20      30      25
+ Bacteroidetes (class) 21      25      22      64      226     193     296     172     98      55      19      149     201     85      50      76      113     92      82
+ Gammaproteobacteria   0       0       0       0       0       1       0       0       0       0       1       1       0       0       0       1       0       0       0
+ TM7_genera_incertae_sedis     0       0       0       0       0       0       0       0       1       0       1       2       0       2       0       0       0       0       0
+ Actinobacteria (class)        1       1       1       2       0       0       0       9       3       7       1       1       1       3       1       2       1       2       3
+ Betaproteobacteria    0       0       3       3       0       0       9       1       1       0       1       2       3       1       1       0       0       0       0
+*/
+//this function is just used to convert files to test the differences between the metastats version and mothurs version
+int MetaStatsCommand::convertToShared(string filename) {
+       try {
+        ifstream in;
+        m->openInputFile(filename, in);
+        
+        string header = m->getline(in); m->gobble(in);
+        
+        vector<string> groups = m->splitWhiteSpace(header);
+        vector<SharedRAbundFloatVector*> newLookup;
+        cout << groups.size() << endl;
+        for (int i = 0; i < groups.size(); i++) {
+            cout << "creating group " << groups[i] << endl;
+            SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+            temp->setLabel("0.03");
+            temp->setGroup(groups[i]);
+            newLookup.push_back(temp);
+        }
+        
+        int otuCount = 0;
+        while (!in.eof()) {
+            if (m->control_pressed) { break; }
+            
+            string otuname;
+            in >> otuname; m->gobble(in);
+            otuCount++;
+            cout << otuname << endl;
+            for (int i = 0; i < groups.size(); i++) {
+                double temp;
+                in >> temp; m->gobble(in);
+                newLookup[i]->push_back(temp, groups[i]);
+            }
+            m->gobble(in);
+        }
+        in.close();
+    
+        ofstream out;
+        m->openOutputFile(filename+".shared", out);
+        
+        out << "label\tgroup\tnumOTUs\t";
+        
+        string snumBins = toString(otuCount);
+        for (int i = 0; i < otuCount; i++) {
+            string binLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) {
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { binLabel += "0"; }
+            }
+            binLabel += sbinNumber;
+            out << binLabel << '\t';
+        }
+        out << endl;
+        
+        for (int i = 0; i < groups.size(); i++) {
+            out << "0.03" << '\t' << groups[i] << '\t';
+            newLookup[i]->print(out);
+        }
+        out.close();
+        
+        cout << filename+".shared" << endl;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MetaStatsCommand", "convertToShared");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string thisfilename) {
+       try {
+        ofstream out;
+        m->openOutputFile(thisfilename+".matrix", out);
+        
+        out << "\t";
+        for (int i = 0; i < subset.size()-1; i++) {
+            out << subset[i]->getGroup() << '\t';
+        }
+        out << subset[subset.size()-1]->getGroup() << endl;
+        
+        for (int i = 0; i < subset[0]->getNumBins(); i++) {
+            out << m->currentSharedBinLabels[i] << '\t';
+            for (int j = 0; j < subset.size()-1; j++) {
+                out << subset[j]->getAbundance(i) << '\t';
+            }
+            out << subset[subset.size()-1]->getAbundance(i) << endl;
+        }
+        out.close();
+        
+        cout << thisfilename+".matrix" << endl;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MetaStatsCommand", "convertToInput");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************