]> git.donarmstrong.com Git - mothur.git/blobdiff - metastatscommand.cpp
adding labels to list file.
[mothur.git] / metastatscommand.cpp
index 0704f55896e835f331eda28446012e62c790904e..33b559fc2dc358017c90fd44fe77ec0be61acb65 100644 (file)
@@ -8,22 +8,22 @@
  */
 
 #include "metastatscommand.h"
-#include "metastats.h"
 #include "sharedutilities.h"
 
+
 //**********************************************************************************************************************
 vector<string> MetaStatsCommand::setParameters(){      
        try {
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
-               CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
-               CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(pthreshold);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","metastats",false,true,true); parameters.push_back(pshared);
+               CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+               CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+               CommandParameter pthreshold("threshold", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pthreshold);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -61,6 +61,21 @@ string MetaStatsCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string MetaStatsCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "metastats") {  pattern = "[filename],[distance],[group],metastats"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MetaStatsCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 MetaStatsCommand::MetaStatsCommand(){  
        try {
                abort = true; calledHelp = true;
@@ -166,7 +181,7 @@ MetaStatsCommand::MetaStatsCommand(string option) {
                        else { 
                                pickedGroups = true;
                                m->splitAtDash(groups, Groups);
-                               m->Groups = Groups;
+                               m->setGroups(Groups);
                        }
                        
                        sets = validParameter.validFile(parameters, "sets", false);                     
@@ -177,14 +192,14 @@ MetaStatsCommand::MetaStatsCommand(string option) {
 
                        
                        string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
-                       convert(temp, iters); 
+                       m->mothurConvert(temp, iters); 
                        
                        temp = validParameter.validFile(parameters, "threshold", false);                        if (temp == "not found") { temp = "0.05"; }
-                       convert(temp, threshold); 
+                       m->mothurConvert(temp, threshold); 
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
                }
 
        }
@@ -199,6 +214,14 @@ int MetaStatsCommand::execute(){
        try {
        
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        //just used to convert files to test metastats online
+        /****************************************************/
+        bool convertInputToShared = false;
+        convertSharedToInput = false;
+        if (convertInputToShared) { convertToShared(sharedfile); return 0; }
+        /****************************************************/
+        
                
                designMap = new GroupMap(designfile);
                designMap->readDesignMap();
@@ -214,8 +237,9 @@ int MetaStatsCommand::execute(){
                //setup the pairwise comparions of sets for metastats
                //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
                //make sure sets are all in designMap
-               SharedUtil* util = new SharedUtil();  
-               util->setGroups(Sets, designMap->namesOfGroups);  
+               SharedUtil* util = new SharedUtil(); 
+               vector<string> dGroups = designMap->getNamesOfGroups();
+               util->setGroups(Sets, dGroups);  
                delete util;
                
                int numGroups = Sets.size();
@@ -230,26 +254,24 @@ int MetaStatsCommand::execute(){
                //only 1 combo
                if (numGroups == 2) { processors = 1; }
                else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       if(processors != 1){
-                               int numPairs = namesOfGroupCombos.size();
-                               int numPairsPerProcessor = numPairs / processors;
+
+        if(processors != 1){
+            int numPairs = namesOfGroupCombos.size();
+            int numPairsPerProcessor = numPairs / processors;
                        
-                               for (int i = 0; i < processors; i++) {
-                                       int startPos = i * numPairsPerProcessor;
-                                       if(i == processors - 1){
-                                               numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
-                                       }
-                                       lines.push_back(linePair(startPos, numPairsPerProcessor));
-                               }
-                       }
-               #endif
+            for (int i = 0; i < processors; i++) {
+                int startPos = i * numPairsPerProcessor;
+                if(i == processors - 1){
+                    numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
+                }
+                lines.push_back(linePair(startPos, numPairsPerProcessor));
+            }
+        }
                
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
-                       if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str()); } return 0; }
+                       if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
        
                        if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
 
@@ -280,13 +302,13 @@ int MetaStatsCommand::execute(){
                        //prevent memory leak
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
                        
-                       if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
+                       if (m->control_pressed) {  outputTypes.clear(); m->clearGroups(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]); } return 0; }
 
                        //get next line to process
                        lookup = input->getSharedRAbundVectors();                               
                }
                
-               if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); }  return 0; }
+               if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }
 
                //output error messages about any remaining user labels
                set<string>::iterator it;
@@ -314,11 +336,11 @@ int MetaStatsCommand::execute(){
                }
        
                //reset groups parameter
-               m->Groups.clear();  
+               m->clearGroups();  
                delete input; 
                delete designMap;
                
-               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); } return 0;}
+               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0;}
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -337,13 +359,13 @@ int MetaStatsCommand::execute(){
 int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
        try {
                
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               
                                if(processors == 1){
                                        driver(0, namesOfGroupCombos.size(), thisLookUp);
                                }else{
                                        int process = 1;
                                        vector<int> processIDS;
-               
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                                        //loop through and create all the processes you want
                                        while (process != processors) {
                                                int pid = fork();
@@ -369,11 +391,70 @@ int MetaStatsCommand::process(vector<SharedRAbundVector*>& thisLookUp){
                                                int temp = processIDS[i];
                                                wait(&temp);
                                        }
-                               }
-               #else
-                               driver(0, namesOfGroupCombos.size(), thisLookUp);
-               #endif
+        #else
+                    
+                    //////////////////////////////////////////////////////////////////////////////////////////////////////
+                    //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
+                    //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+                    //Taking advantage of shared memory to pass results vectors.
+                    //////////////////////////////////////////////////////////////////////////////////////////////////////
+                    
+                    vector<metastatsData*> pDataArray; 
+                    DWORD   dwThreadIdArray[processors-1];
+                    HANDLE  hThreadArray[processors-1]; 
+                    
+                    //Create processor worker threads.
+                    for( int i=1; i<processors; i++ ){
+                        
+                        //make copy of lookup so we don't get access violations
+                        vector<SharedRAbundVector*> newLookup;
+                        vector<string> designMapGroups;
+                        for (int k = 0; k < thisLookUp.size(); k++) {
+                            SharedRAbundVector* temp = new SharedRAbundVector();
+                            temp->setLabel(thisLookUp[k]->getLabel());
+                            temp->setGroup(thisLookUp[k]->getGroup());
+                            newLookup.push_back(temp);
+                            designMapGroups.push_back(designMap->getGroup(thisLookUp[k]->getGroup()));
+                        }
+                        
+                        //for each bin
+                        for (int k = 0; k < thisLookUp[0]->getNumBins(); k++) {
+                            if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                            for (int j = 0; j < thisLookUp.size(); j++) { newLookup[j]->push_back(thisLookUp[j]->getAbundance(k), thisLookUp[j]->getGroup()); }
+                        }
+                        
+                        // Allocate memory for thread data.
+                        metastatsData* tempSum = new metastatsData(sharedfile, outputDir, m, lines[i].start, lines[i].num, namesOfGroupCombos, newLookup, designMapGroups, iters, threshold);
+                        pDataArray.push_back(tempSum);
+                        processIDS.push_back(i);
+                        
+                        hThreadArray[i-1] = CreateThread(NULL, 0, MyMetastatsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+                    }
+                    
+                    //do my part
+                                       driver(lines[0].start, lines[0].num, thisLookUp);
+                    
+                    //Wait until all threads have terminated.
+                    WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+                    
+                    //Close all thread handles and free memory allocations.
+                    for(int i=0; i < pDataArray.size(); i++){
+                        if (pDataArray[i]->count != (pDataArray[i]->num)) {
+                            m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->num) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+                        }
+                        for (int j = 0; j < pDataArray[i]->thisLookUp.size(); j++) {  delete pDataArray[i]->thisLookUp[j];  } 
+                        for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {  
+                            outputNames.push_back(pDataArray[i]->outputNames[j]);
+                            outputTypes["metastats"].push_back(pDataArray[i]->outputNames[j]);
+                        }
+                                                
+                        CloseHandle(hThreadArray[i]);
+                        delete pDataArray[i];
+                    }
+        #endif
 
+                               }
+               
                return 0;
                
        }
@@ -390,19 +471,25 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                for (int c = start; c < (start+num); c++) {
                        
                        //get set names
-                       string setA = namesOfGroupCombos[c][0]; 
+                       string setA = namesOfGroupCombos[c][0];
                        string setB = namesOfGroupCombos[c][1];
-                       
+               
                        //get filename
-                       string outputFileName = outputDir +  m->getRootName(m->getSimpleName(sharedfile)) + thisLookUp[0]->getLabel() + "." + setA + "-" + setB + ".metastats";
+            map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+            variables["[distance]"] = thisLookUp[0]->getLabel();
+            variables["[group]"] = setA + "-" + setB;
+                       string outputFileName = getOutputFileName("metastats",variables);
                        outputNames.push_back(outputFileName); outputTypes["metastats"].push_back(outputFileName);
-                       int nameLength = outputFileName.length();
-                       char * output = new char[nameLength];
-                       strcpy(output, outputFileName.c_str());
+                       //int nameLength = outputFileName.length();
+                       //char * output = new char[nameLength];
+                       //strcpy(output, outputFileName.c_str());
        
                        //build matrix from shared rabunds
-                       double** data;
-                       data = new double*[thisLookUp[0]->getNumBins()];
+                       //double** data;
+                       //data = new double*[thisLookUp[0]->getNumBins()];
+                       
+                       vector< vector<double> > data2; data2.resize(thisLookUp[0]->getNumBins());
                        
                        vector<SharedRAbundVector*> subset;
                        int setACount = 0;
@@ -425,24 +512,32 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
                                m->mothurOut("Missing shared info for " + setA + " or " + setB + ". Skipping comparison."); m->mothurOutEndLine(); 
                                outputNames.pop_back();
                        }else {
+                
                                //fill data
                                for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
-                                       data[j] = new double[subset.size()];
+                                       //data[j] = new double[subset.size()];
+                                       data2[j].resize(subset.size(), 0.0);
+                   
                                        for (int i = 0; i < subset.size(); i++) {
-                                               data[j][i] = (subset[i]->getAbundance(j));
+                                               data2[j][i] = (subset[i]->getAbundance(j));
                                        }
                                }
                                
                                m->mothurOut("Comparing " + setA + " and " + setB + "..."); m->mothurOutEndLine(); 
-                               metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
+                               //metastat_main(output, thisLookUp[0]->getNumBins(), subset.size(), threshold, iters, data, setACount);
+                if (convertSharedToInput) { convertToInput(subset, outputFileName);  }
                                
+                               m->mothurOutEndLine();
+                               MothurMetastats mothurMeta(threshold, iters);
+                               mothurMeta.runMetastats(outputFileName , data2, setACount);
+                               m->mothurOutEndLine();
                                m->mothurOutEndLine(); 
                        }
                        
                        //free memory
-                       delete output;
-                       for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
-                       delete[] data; 
+                       //delete output;
+                       //for(int i = 0; i < thisLookUp[0]->getNumBins(); i++)  {  delete[] data[i];  }
+                       //delete[] data; 
                }
                
                return 0;
@@ -454,3 +549,117 @@ int MetaStatsCommand::driver(int start, int num, vector<SharedRAbundVector*>& th
        }
 }
 //**********************************************************************************************************************
+/*Metastats files look like:
+ 13_0  14_0    13_52   14_52   70S     71S     72S     M1      M2      M3      C11     C12     C21     C15     C16     C19     C3      C4      C9
+ Alphaproteobacteria   0       0       0       0       0       0       5       0       0       0       0       0       0       0       0       0       0       0       0
+ Mollicutes    0       0       2       0       0       59      5       11      4       1       0       2       8       1       0       1       0       3       0
+ Verrucomicrobiae      0       0       0       0       0       1       6       0       0       0       0       0       0       0       0       0       0       0       0
+ Deltaproteobacteria   0       0       0       0       0       6       1       0       1       0       1       1       7       0       0       0       0       0       0
+ Cyanobacteria 0       0       1       0       0       0       1       0       0       0       0       0       0       0       0       0       0       0       0
+ Epsilonproteobacteria 0       0       0       0       0       0       0       0       6       0       0       3       1       0       0       0       0       0       0
+ Clostridia    75      65      207     226     801     280     267     210     162     197     81      120     106     148     120     94      84      98      121
+ Bacilli       3       2       16      8       21      52      31      70      46      65      4       28      5       23      62      26      20      30      25
+ Bacteroidetes (class) 21      25      22      64      226     193     296     172     98      55      19      149     201     85      50      76      113     92      82
+ Gammaproteobacteria   0       0       0       0       0       1       0       0       0       0       1       1       0       0       0       1       0       0       0
+ TM7_genera_incertae_sedis     0       0       0       0       0       0       0       0       1       0       1       2       0       2       0       0       0       0       0
+ Actinobacteria (class)        1       1       1       2       0       0       0       9       3       7       1       1       1       3       1       2       1       2       3
+ Betaproteobacteria    0       0       3       3       0       0       9       1       1       0       1       2       3       1       1       0       0       0       0
+*/
+//this function is just used to convert files to test the differences between the metastats version and mothurs version
+int MetaStatsCommand::convertToShared(string filename) {
+       try {
+        ifstream in;
+        m->openInputFile(filename, in);
+        
+        string header = m->getline(in); m->gobble(in);
+        
+        vector<string> groups = m->splitWhiteSpace(header);
+        vector<SharedRAbundVector*> newLookup;
+        for (int i = 0; i < groups.size(); i++) {
+            SharedRAbundVector* temp = new SharedRAbundVector();
+            temp->setLabel("0.03");
+            temp->setGroup(groups[i]);
+            newLookup.push_back(temp);
+        }
+        
+        int otuCount = 0;
+        while (!in.eof()) {
+            if (m->control_pressed) { break; }
+            
+            string otuname;
+            in >> otuname; m->gobble(in);
+            otuCount++;
+            
+            for (int i = 0; i < groups.size(); i++) {
+                int temp;
+                in >> temp; m->gobble(in);
+                newLookup[i]->push_back(temp, groups[i]);
+            }
+            m->gobble(in);
+        }
+        in.close();
+    
+        ofstream out;
+        m->openOutputFile(filename+".shared", out);
+        
+        out << "label\tgroup\tnumOTUs\t";
+        
+        string snumBins = toString(otuCount);
+        for (int i = 0; i < otuCount; i++) {
+            string binLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) {
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { binLabel += "0"; }
+            }
+            binLabel += sbinNumber;
+            out << binLabel << '\t';
+        }
+        out << endl;
+        
+        for (int i = 0; i < groups.size(); i++) {
+            out << "0.03" << '\t' << groups[i] << '\t';
+            newLookup[i]->print(out);
+        }
+        out.close();
+        
+        cout << filename+".shared" << endl;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MetaStatsCommand", "convertToShared");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string thisfilename) {
+       try {
+        ofstream out;
+        m->openOutputFile(thisfilename+".matrix", out);
+        
+        out << "\t";
+        for (int i = 0; i < subset.size()-1; i++) {
+            out << subset[i]->getGroup() << '\t';
+        }
+        out << subset[subset.size()-1]->getGroup() << endl;
+        
+        for (int i = 0; i < subset[0]->getNumBins(); i++) {
+            out << m->currentSharedBinLabels[i] << '\t';
+            for (int j = 0; j < subset.size()-1; j++) {
+                out << subset[j]->getAbundance(i) << '\t';
+            }
+            out << subset[subset.size()-1]->getAbundance(i) << endl;
+        }
+        out.close();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MetaStatsCommand", "convertToInput");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************