]> git.donarmstrong.com Git - mothur.git/blobdiff - clustersplitcommand.h
changing command name classify.shared to classifyrf.shared
[mothur.git] / clustersplitcommand.h
index 4d1f4358ccf94382af1bb513e5cfaf10d87c8f34..e1f17b1bd0b8c4088d5deb3c06691fb3ccf26999 100644 (file)
 #include "sabundvector.hpp"
 #include "listvector.hpp"
 #include "cluster.hpp"
-#include "sparsematrix.hpp"
-#include "globaldata.hpp"
-
+#include "sparsedistancematrix.h"
+#include "readcluster.h"
+#include "splitmatrix.h"
+#include "readphylip.h"
+#include "readcolumn.h"
+#include "readmatrix.hpp"
+#include "inputdata.h"
+#include "clustercommand.h"
+#include "clusterclassic.h"
 
 class ClusterSplitCommand : public Command {
        
 public:
        ClusterSplitCommand(string);
-       ~ClusterSplitCommand();
-       int execute();  
-       void help();
+       ClusterSplitCommand();
+       ~ClusterSplitCommand() {}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "cluster.split";               }
+       string getCommandCategory()             { return "Clustering";                  }
        
+       string getHelpString(); 
+    string getOutputPattern(string);   
+       string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol 77:3219. \nhttp://www.mothur.org/wiki/Cluster.split"; }
+       string getDescription()         { return "splits your sequences by distance or taxonomy then clusters into OTUs"; }
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }  
+
 private:
-       GlobalData* globaldata;
        vector<int> processIDS;   //processid
        vector<string> outputNames;
-
-       string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing;
+       
+       string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, countfile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile;
        double cutoff, splitcutoff;
-       int precision, length, processors;
-       bool print_start, abort, hard;
+       int precision, length, processors, taxLevelCutoff;
+       bool print_start, abort, hard, large, classic, runCluster;
        time_t start;
        ofstream outList, outRabund, outSabund;
        
        void printData(ListVector*);
-       int createProcesses(vector < vector < map<string, string> > >);
+       vector<string> createProcesses(vector< map<string, string> >, set<string>&);
        vector<string> cluster(vector< map<string, string> >, set<string>&);
-       int mergeLists(vector<string>, string, set<string>);
+    string clusterFile(string, string, set<string>&, double&);
+    string clusterClassicFile(string, string, set<string>&, double&);
+       int mergeLists(vector<string>, map<float, int>, ListVector*);
+       map<float, int> completeListFile(vector<string>, string, set<string>&, ListVector*&);
+       int createMergedDistanceFile(vector< map<string, string> >);
+    int createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund);
 };
 
+/////////////////not working for Windows////////////////////////////////////////////////////////////
+// getting an access violation error.  This is most likely caused by the 
+// threads stepping on eachother's structures, as I can run the thread function and the cluster fuction 
+// in separately without errors occuring.  I suspect it may be in the use of the
+// static class mothurOut, but I can't pinpoint the problem.  All other objects are made new
+// within the thread.  MothurOut is used by almost all the classes in mothur, so if this was 
+// really the cause I would expect to see all the windows threaded commands to have issues, but not 
+// all do. So far, shhh.flows and trim.flows have similiar problems. Other thoughts, could it have 
+// anything to do with mothur's use of copy constructors in many of our data structures. ie. listvector 
+// is copied by nameassignment and passed to read which passes to the thread?  -westcott 2-8-12
+////////////////////////////////////////////////////////////////////////////////////////////////////
+/**************************************************************************************************
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct clusterData {
+       set<string> labels;
+       vector < map<string, string> > distNames; 
+       string method; 
+    MothurOut* m;
+       double cutoff, precision;
+    string tag, outputDir;
+    vector<string> listFiles;
+    bool hard;
+    int length, threadID;
+       
+       
+       clusterData(){}
+       clusterData(vector < map<string, string> > dv, MothurOut* mout, double cu, string me, string ou, bool hd, double pre, int len, int th) {
+               distNames = dv;
+               m = mout;
+               cutoff = cu;
+        method = me;
+               outputDir = ou;
+        hard = hd;
+        precision = pre;
+        length = len;
+        threadID = th;
+       }
+};
+
+/**************************************************************************************************
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyClusterThreadFunction(LPVOID lpParam){ 
+       clusterData* pDataArray;
+       pDataArray = (clusterData*)lpParam;
+       
+       try {
+               cout << "starting " << endl;            
+               
+               double smallestCutoff = pDataArray->cutoff;
+               
+               //cluster each distance file
+               for (int i = 0; i < pDataArray->distNames.size(); i++) {
+            
+            Cluster* mycluster = NULL;
+            SparseMatrix* mymatrix = NULL;
+            ListVector* mylist = NULL;
+            ListVector myoldList;
+            RAbundVector* myrabund = NULL;
+                        
+                       if (pDataArray->m->control_pressed) { break; }
+                       
+                       string thisNamefile = pDataArray->distNames[i].begin()->second;
+                       string thisDistFile = pDataArray->distNames[i].begin()->first;
+            cout << thisNamefile << '\t' << thisDistFile << endl;      
+                       pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Reading " + thisDistFile); pDataArray->m->mothurOutEndLine();
+                       
+                       ReadMatrix* myread = new ReadColumnMatrix(thisDistFile);        
+                       myread->setCutoff(pDataArray->cutoff);
+                       NameAssignment* mynameMap = new NameAssignment(thisNamefile);
+                       mynameMap->readMap();
+            cout << "done reading " << thisNamefile << endl;  
+                       myread->read(mynameMap);
+                       cout << "done reading " << thisDistFile << endl;  
+                       if (pDataArray->m->control_pressed) {  delete myread; delete mynameMap; break; }
+            
+                       mylist = myread->getListVector();
+                       myoldList = *mylist;
+                       mymatrix = myread->getMatrix();
+            cout << "here" << endl;    
+                       delete myread; myread = NULL;
+                       delete mynameMap; mynameMap = NULL;
+                       
+            pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Clustering " + thisDistFile); pDataArray->m->mothurOutEndLine();
+            
+                       myrabund = new RAbundVector(mylist->getRAbundVector());
+                        cout << "here" << endl;        
+                       //create cluster
+                       if (pDataArray->method == "furthest")   {       mycluster = new CompleteLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method); }
+                       else if(pDataArray->method == "nearest"){       mycluster = new SingleLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method); }
+                       else if(pDataArray->method == "average"){       mycluster = new AverageLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method);     }
+                       pDataArray->tag = mycluster->getTag();
+             cout << "here" << endl;   
+                       if (pDataArray->outputDir == "") { pDataArray->outputDir += pDataArray->m->hasPath(thisDistFile); }
+                       string fileroot = pDataArray->outputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisDistFile));
+                        cout << "here" << endl;        
+                       ofstream listFile;
+                       pDataArray->m->openOutputFile(fileroot+ pDataArray->tag + ".list",      listFile);
+             cout << "here" << endl;   
+                       pDataArray->listFiles.push_back(fileroot+ pDataArray->tag + ".list");
+            
+                       float previousDist = 0.00000;
+                       float rndPreviousDist = 0.00000;
+                       
+                       myoldList = *mylist;
+        
+                       bool print_start = true;
+                       int start = time(NULL);
+                       double saveCutoff = pDataArray->cutoff;
+            
+                       while (mymatrix->getSmallDist() < pDataArray->cutoff && mymatrix->getNNodes() > 0){
+                
+                               if (pDataArray->m->control_pressed) { //clean up
+                                       delete mymatrix; delete mylist; delete mycluster; delete myrabund;
+                                       listFile.close();
+                                       for (int i = 0; i < pDataArray->listFiles.size(); i++) {        pDataArray->m->mothurRemove(pDataArray->listFiles[i]);  }
+                                       pDataArray->listFiles.clear(); break;
+                               }
+                
+                               mycluster->update(saveCutoff);
+                
+                               float dist = mymatrix->getSmallDist();
+                               float rndDist;
+                               if (pDataArray->hard) {
+                                       rndDist = pDataArray->m->ceilDist(dist, pDataArray->precision); 
+                               }else{
+                                       rndDist = pDataArray->m->roundDist(dist, pDataArray->precision); 
+                               }
+                
+                               if(previousDist <= 0.0000 && dist != previousDist){
+                                       myoldList.setLabel("unique");
+                                       myoldList.print(listFile);
+                                       if (pDataArray->labels.count("unique") == 0) {  pDataArray->labels.insert("unique");  }
+                               }
+                               else if(rndDist != rndPreviousDist){
+                                       myoldList.setLabel(toString(rndPreviousDist,  pDataArray->length-1));
+                                       myoldList.print(listFile);
+                                       if (pDataArray->labels.count(toString(rndPreviousDist,  pDataArray->length-1)) == 0) { pDataArray->labels.insert(toString(rndPreviousDist,  pDataArray->length-1)); }
+                               }
+                       
+                               previousDist = dist;
+                               rndPreviousDist = rndDist;
+                               myoldList = *mylist;
+                       }
+            
+             cout << "here2" << endl;  
+                       if(previousDist <= 0.0000){
+                               myoldList.setLabel("unique");
+                               myoldList.print(listFile);
+                               if (pDataArray->labels.count("unique") == 0) { pDataArray->labels.insert("unique"); }
+                       }
+                       else if(rndPreviousDist<pDataArray->cutoff){
+                               myoldList.setLabel(toString(rndPreviousDist,  pDataArray->length-1));
+                               myoldList.print(listFile);
+                               if (pDataArray->labels.count(toString(rndPreviousDist,  pDataArray->length-1)) == 0) { pDataArray->labels.insert(toString(rndPreviousDist,  pDataArray->length-1)); }
+                       }
+            
+                       delete mymatrix; delete mylist; delete mycluster; delete myrabund; 
+            mymatrix = NULL; mylist = NULL; mycluster = NULL; myrabund = NULL;
+                       listFile.close();
+                       
+                       if (pDataArray->m->control_pressed) { //clean up
+                               for (int i = 0; i < pDataArray->listFiles.size(); i++) {        pDataArray->m->mothurRemove(pDataArray->listFiles[i]);  }
+                               pDataArray->listFiles.clear(); break;
+                       }
+                        cout << "here3" << endl;       
+                       pDataArray->m->mothurRemove(thisDistFile);
+                       pDataArray->m->mothurRemove(thisNamefile);
+                        cout << "here4" << endl;       
+                       if (saveCutoff != pDataArray->cutoff) { 
+                               if (pDataArray->hard)   {  saveCutoff = pDataArray->m->ceilDist(saveCutoff, pDataArray->precision);     }
+                               else            {       saveCutoff = pDataArray->m->roundDist(saveCutoff, pDataArray->precision);  }
+                
+                               pDataArray->m->mothurOut("Cutoff was " + toString(pDataArray->cutoff) + " changed cutoff to " + toString(saveCutoff)); pDataArray->m->mothurOutEndLine();  
+                       }
+                        cout << "here5" << endl;       
+                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
+               }
+               
+               pDataArray->cutoff = smallestCutoff;
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "ClusterSplitCommand", "MyClusterThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+*/
+
+
 #endif