]> git.donarmstrong.com Git - mothur.git/blobdiff - clustersplitcommand.h
moved mothur's source into a folder to make grabbing just the source easier on github
[mothur.git] / clustersplitcommand.h
diff --git a/clustersplitcommand.h b/clustersplitcommand.h
deleted file mode 100644 (file)
index 0e32ffa..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-#ifndef CLUSTERSPLITCOMMAND_H
-#define CLUSTERSPLITCOMMAND_H
-
-/*
- *  clustersplitcommand.h
- *  Mothur
- *
- *  Created by westcott on 5/19/10.
- *  Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-#include "command.hpp"
-#include "rabundvector.hpp"
-#include "sabundvector.hpp"
-#include "listvector.hpp"
-#include "cluster.hpp"
-#include "sparsematrix.hpp"
-#include "readcluster.h"
-#include "splitmatrix.h"
-#include "readphylip.h"
-#include "readcolumn.h"
-#include "readmatrix.hpp"
-#include "inputdata.h"
-#include "clustercommand.h"
-
-class ClusterSplitCommand : public Command {
-       
-public:
-       ClusterSplitCommand(string);
-       ClusterSplitCommand();
-       ~ClusterSplitCommand() {}
-       
-       vector<string> setParameters();
-       string getCommandName()                 { return "cluster.split";               }
-       string getCommandCategory()             { return "Clustering";                  }
-       string getHelpString(); 
-       string getCitation() { return "Schloss PD, Westcott SL (2011). Assessing and improving methods used in OTU-based approaches for 16S rRNA gene sequence analysis. Appl Environ Microbiol. \nhttp://www.mothur.org/wiki/Cluster.split"; }
-       string getDescription()         { return "splits your sequences by distance or taxonomy then clusters into OTUs"; }
-       
-       int execute(); 
-       void help() { m->mothurOut(getHelpString()); }  
-
-private:
-       vector<int> processIDS;   //processid
-       vector<string> outputNames;
-       
-       string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing, splitmethod, taxFile, fastafile;
-       double cutoff, splitcutoff;
-       int precision, length, processors, taxLevelCutoff;
-       bool print_start, abort, hard, large;
-       time_t start;
-       ofstream outList, outRabund, outSabund;
-       
-       void printData(ListVector*);
-       vector<string> createProcesses(vector< map<string, string> >, set<string>&);
-       vector<string> cluster(vector< map<string, string> >, set<string>&);
-       int mergeLists(vector<string>, map<float, int>, ListVector*);
-       map<float, int> completeListFile(vector<string>, string, set<string>&, ListVector*&);
-       int createMergedDistanceFile(vector< map<string, string> >);
-};
-
-/////////////////not working for Windows////////////////////////////////////////////////////////////
-// getting an access violation error.  This is most likely caused by the 
-// threads stepping on eachother's structures, as I can run the thread function and the cluster fuction 
-// in separately without errors occuring.  I suspect it may be in the use of the
-// static class mothurOut, but I can't pinpoint the problem.  All other objects are made new
-// within the thread.  MothurOut is used by almost all the classes in mothur, so if this was 
-// really the cause I would expect to see all the windows threaded commands to have issues, but not 
-// all do. So far, shhh.flows and trim.flows have similiar problems. Other thoughts, could it have 
-// anything to do with mothur's use of copy constructors in many of our data structures. ie. listvector 
-// is copied by nameassignment and passed to read which passes to the thread?  -westcott 2-8-12
-////////////////////////////////////////////////////////////////////////////////////////////////////
-/**************************************************************************************************/
-//custom data structure for threads to use.
-// This is passed by void pointer so it can be any data type
-// that can be passed using a single void pointer (LPVOID).
-struct clusterData {
-       set<string> labels;
-       vector < map<string, string> > distNames; 
-       string method; 
-    MothurOut* m;
-       double cutoff, precision;
-    string tag, outputDir;
-    vector<string> listFiles;
-    bool hard;
-    int length, threadID;
-       
-       
-       clusterData(){}
-       clusterData(vector < map<string, string> > dv, MothurOut* mout, double cu, string me, string ou, bool hd, double pre, int len, int th) {
-               distNames = dv;
-               m = mout;
-               cutoff = cu;
-        method = me;
-               outputDir = ou;
-        hard = hd;
-        precision = pre;
-        length = len;
-        threadID = th;
-       }
-};
-
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyClusterThreadFunction(LPVOID lpParam){ 
-       clusterData* pDataArray;
-       pDataArray = (clusterData*)lpParam;
-       
-       try {
-               cout << "starting " << endl;            
-               
-               double smallestCutoff = pDataArray->cutoff;
-               
-               //cluster each distance file
-               for (int i = 0; i < pDataArray->distNames.size(); i++) {
-            
-            Cluster* mycluster = NULL;
-            SparseMatrix* mymatrix = NULL;
-            ListVector* mylist = NULL;
-            ListVector myoldList;
-            RAbundVector* myrabund = NULL;
-                        
-                       if (pDataArray->m->control_pressed) { break; }
-                       
-                       string thisNamefile = pDataArray->distNames[i].begin()->second;
-                       string thisDistFile = pDataArray->distNames[i].begin()->first;
-            cout << thisNamefile << '\t' << thisDistFile << endl;      
-                       pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Reading " + thisDistFile); pDataArray->m->mothurOutEndLine();
-                       
-                       ReadMatrix* myread = new ReadColumnMatrix(thisDistFile);        
-                       myread->setCutoff(pDataArray->cutoff);
-                       NameAssignment* mynameMap = new NameAssignment(thisNamefile);
-                       mynameMap->readMap();
-            cout << "done reading " << thisNamefile << endl;  
-                       myread->read(mynameMap);
-                       cout << "done reading " << thisDistFile << endl;  
-                       if (pDataArray->m->control_pressed) {  delete myread; delete mynameMap; break; }
-            
-                       mylist = myread->getListVector();
-                       myoldList = *mylist;
-                       mymatrix = myread->getMatrix();
-            cout << "here" << endl;    
-                       delete myread; myread = NULL;
-                       delete mynameMap; mynameMap = NULL;
-                       
-            pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Clustering " + thisDistFile); pDataArray->m->mothurOutEndLine();
-            
-                       myrabund = new RAbundVector(mylist->getRAbundVector());
-                        cout << "here" << endl;        
-                       //create cluster
-                       if (pDataArray->method == "furthest")   {       mycluster = new CompleteLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method); }
-                       else if(pDataArray->method == "nearest"){       mycluster = new SingleLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method); }
-                       else if(pDataArray->method == "average"){       mycluster = new AverageLinkage(myrabund, mylist, mymatrix, pDataArray->cutoff, pDataArray->method);     }
-                       pDataArray->tag = mycluster->getTag();
-             cout << "here" << endl;   
-                       if (pDataArray->outputDir == "") { pDataArray->outputDir += pDataArray->m->hasPath(thisDistFile); }
-                       string fileroot = pDataArray->outputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisDistFile));
-                        cout << "here" << endl;        
-                       ofstream listFile;
-                       pDataArray->m->openOutputFile(fileroot+ pDataArray->tag + ".list",      listFile);
-             cout << "here" << endl;   
-                       pDataArray->listFiles.push_back(fileroot+ pDataArray->tag + ".list");
-            
-                       float previousDist = 0.00000;
-                       float rndPreviousDist = 0.00000;
-                       
-                       myoldList = *mylist;
-        
-                       bool print_start = true;
-                       int start = time(NULL);
-                       double saveCutoff = pDataArray->cutoff;
-            
-                       while (mymatrix->getSmallDist() < pDataArray->cutoff && mymatrix->getNNodes() > 0){
-                
-                               if (pDataArray->m->control_pressed) { //clean up
-                                       delete mymatrix; delete mylist; delete mycluster; delete myrabund;
-                                       listFile.close();
-                                       for (int i = 0; i < pDataArray->listFiles.size(); i++) {        pDataArray->m->mothurRemove(pDataArray->listFiles[i]);  }
-                                       pDataArray->listFiles.clear(); break;
-                               }
-                
-                               mycluster->update(saveCutoff);
-                
-                               float dist = mymatrix->getSmallDist();
-                               float rndDist;
-                               if (pDataArray->hard) {
-                                       rndDist = pDataArray->m->ceilDist(dist, pDataArray->precision); 
-                               }else{
-                                       rndDist = pDataArray->m->roundDist(dist, pDataArray->precision); 
-                               }
-                
-                               if(previousDist <= 0.0000 && dist != previousDist){
-                                       myoldList.setLabel("unique");
-                                       myoldList.print(listFile);
-                                       if (pDataArray->labels.count("unique") == 0) {  pDataArray->labels.insert("unique");  }
-                               }
-                               else if(rndDist != rndPreviousDist){
-                                       myoldList.setLabel(toString(rndPreviousDist,  pDataArray->length-1));
-                                       myoldList.print(listFile);
-                                       if (pDataArray->labels.count(toString(rndPreviousDist,  pDataArray->length-1)) == 0) { pDataArray->labels.insert(toString(rndPreviousDist,  pDataArray->length-1)); }
-                               }
-                       
-                               previousDist = dist;
-                               rndPreviousDist = rndDist;
-                               myoldList = *mylist;
-                       }
-            
-             cout << "here2" << endl;  
-                       if(previousDist <= 0.0000){
-                               myoldList.setLabel("unique");
-                               myoldList.print(listFile);
-                               if (pDataArray->labels.count("unique") == 0) { pDataArray->labels.insert("unique"); }
-                       }
-                       else if(rndPreviousDist<pDataArray->cutoff){
-                               myoldList.setLabel(toString(rndPreviousDist,  pDataArray->length-1));
-                               myoldList.print(listFile);
-                               if (pDataArray->labels.count(toString(rndPreviousDist,  pDataArray->length-1)) == 0) { pDataArray->labels.insert(toString(rndPreviousDist,  pDataArray->length-1)); }
-                       }
-            
-                       delete mymatrix; delete mylist; delete mycluster; delete myrabund; 
-            mymatrix = NULL; mylist = NULL; mycluster = NULL; myrabund = NULL;
-                       listFile.close();
-                       
-                       if (pDataArray->m->control_pressed) { //clean up
-                               for (int i = 0; i < pDataArray->listFiles.size(); i++) {        pDataArray->m->mothurRemove(pDataArray->listFiles[i]);  }
-                               pDataArray->listFiles.clear(); break;
-                       }
-                        cout << "here3" << endl;       
-                       pDataArray->m->mothurRemove(thisDistFile);
-                       pDataArray->m->mothurRemove(thisNamefile);
-                        cout << "here4" << endl;       
-                       if (saveCutoff != pDataArray->cutoff) { 
-                               if (pDataArray->hard)   {  saveCutoff = pDataArray->m->ceilDist(saveCutoff, pDataArray->precision);     }
-                               else            {       saveCutoff = pDataArray->m->roundDist(saveCutoff, pDataArray->precision);  }
-                
-                               pDataArray->m->mothurOut("Cutoff was " + toString(pDataArray->cutoff) + " changed cutoff to " + toString(saveCutoff)); pDataArray->m->mothurOutEndLine();  
-                       }
-                        cout << "here5" << endl;       
-                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
-               }
-               
-               pDataArray->cutoff = smallestCutoff;
-               
-               return 0;
-               
-       }
-       catch(exception& e) {
-               pDataArray->m->errorOut(e, "ClusterSplitCommand", "MyClusterThreadFunction");
-               exit(1);
-       }
-} 
-#endif
-
-
-
-
-#endif
-