changed hard parameter in cluster commands

author westcott <westcott>

Fri, 28 May 2010 12:20:41 +0000 (12:20 +0000)

committer westcott <westcott>

Fri, 28 May 2010 12:20:41 +0000 (12:20 +0000)
author westcott <westcott>
Fri, 28 May 2010 12:20:41 +0000 (12:20 +0000)
committer westcott <westcott>
Fri, 28 May 2010 12:20:41 +0000 (12:20 +0000)
diff --git a/clustercommand.cpp b/clustercommand.cpp

index 9ed67e04433edecd93262c1824732edbb3cd186c..30c45e167ad6a398b23727293577f92362f5d6c7 100644 (file)
--- a/clustercommand.cpp
+++ b/clustercommand.cpp
@@ -62,7 +62,7 @@ ClusterCommand::ClusterCommand(string option)  {
                         temp = validParameter.validFile(parameters, "cutoff", false);
                         if (temp == "not found") { temp = "10"; }
                         convert(temp, cutoff); 
-                       if (!hard) {    cutoff += (5 / (precision * 10.0));  }
+                       cutoff += (5 / (precision * 10.0));  
                         
                         method = validParameter.validFile(parameters, "method", false);
                         if (method == "not found") { method = "furthest"; }
@@ -181,7 +181,12 @@ int ClusterCommand::execute(){
                         cluster->update(cutoff);
         
                         float dist = matrix->getSmallDist();
-                       float rndDist = roundDist(dist, precision);
+                       float rndDist;
+                       if (hard) {
+                               rndDist = ceilDist(dist, precision); 
+                       }else{
+                               rndDist = roundDist(dist, precision); 
+                       }
  
                         if(previousDist <= 0.0000 && dist != previousDist){
                                 printData("unique");
diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp

index e9ab530936af07faf4375b263d82d79e6d30846a..4b3c27b14641059d8a61a8dc6d8c7a858ed41f7e 100644 (file)
--- a/clustersplitcommand.cpp
+++ b/clustersplitcommand.cpp
@@ -27,7 +27,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"phylip","column","name","cutoff","precision","method","showabund","timing","hard","processors","outputdir","inputdir"};
+                       string Array[] =  {"phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","showabund","timing","hard","processors","outputdir","inputdir"};
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -76,6 +76,14 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                 }
+                               
+                               it = parameters.find("taxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
+                               }
                         }
                         
                         //check for required parameters
@@ -93,11 +101,15 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                         if (namefile == "not open") { abort = true; }   
                         else if (namefile == "not found") { namefile = ""; }
                         
+                       taxFile = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxFile == "not open") { abort = true; }    
+                       else if (taxFile == "not found") { taxFile = ""; }
+                       
                         if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a cluster.split command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a cluster.split command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
                 
                         if (columnfile != "") {
-                               if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
+                               if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; }
                         }
                                         
                         //check for optional parameter and set defaults
@@ -119,13 +131,19 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                         temp = validParameter.validFile(parameters, "cutoff", false);
                         if (temp == "not found") { temp = "10"; }
                         convert(temp, cutoff); 
-                       if (!hard) {    cutoff += (5 / (precision * 10.0));  }
+                       cutoff += (5 / (precision * 10.0));  
                         
-                       method = validParameter.validFile(parameters, "method", false);
-                       if (method == "not found") { method = "furthest"; }
+                       method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "furthest"; }
+                       
+                       splitmethod = validParameter.validFile(parameters, "splitmethod", false);               if (splitmethod == "not found") { method = "distance"; }
                         
                         if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
                         else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if ((splitmethod == "distance") || (splitmethod == "classify")) { }
+                       else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance or classify."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if ((splitmethod == "classify") && (taxFile == "")) {  m->mothurOut("You need to provide a taxonomy file if you are going to use the classify splitmethod."); m->mothurOutEndLine(); abort = true;  }
  
                         showabund = validParameter.validFile(parameters, "showabund", false);
                         if (showabund == "not found") { showabund = "T"; }
@@ -145,11 +163,13 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
  
  void ClusterSplitCommand::help(){
         try {
-               m->mothurOut("The cluster command can only be executed after a successful read.dist command.\n");
-               m->mothurOut("The cluster command parameter options are method, cuttoff, hard, precision, showabund and timing. No parameters are required.\n");
-               m->mothurOut("The cluster command should be in the following format: \n");
-               m->mothurOut("cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
-               m->mothurOut("The acceptable cluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");      
+               m->mothurOut("The cluster.split command parameter options are cutoff, splitcutoff, precision, method, splitmethod, phylip, column, name, showabund, timing. Phylip or column and name are required.\n");
+               m->mothurOut("The phylip and column parameter allow you to enter your distance file. \n");
+               m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
+               m->mothurOut("The cluster.split command should be in the following format: \n");
+               m->mothurOut("cluster.split(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
+               m->mothurOut("Example: cluster.split(column=abrecovery.dist, name=abrecovery.names, method=furthest, cutoff=0.10, precision=1000, splitmethod=classify) \n");   
+
         }
         catch(exception& e) {
                 m->errorOut(e, "ClusterSplitCommand", "help");
@@ -203,7 +223,7 @@ int ClusterSplitCommand::execute(){
                 time_t estart = time(NULL);
                 
                 //split matrix into non-overlapping groups
-               SplitMatrix* split = new SplitMatrix(distfile, namefile, cutoff);
+               SplitMatrix* split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod);
                 split->split();
                 
                 if (m->control_pressed) { delete split; return 0; }
@@ -212,11 +232,11 @@ int ClusterSplitCommand::execute(){
                 vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
                 delete split;
                 
+               if (m->control_pressed) { return 0; }
+               
                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
                 estart = time(NULL);
                 
-               if (m->control_pressed) { return 0; }
-               
                 //****************** break up files between processes and cluster each file set ******************************//
                 vector<string> listFileNames;
                 set<string> labels;
@@ -586,7 +606,12 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                                 cluster->update(cutoff);
         
                                 float dist = matrix->getSmallDist();
-                               float rndDist = roundDist(dist, precision);
+                               float rndDist;
+                               if (hard) {
+                                       rndDist = ceilDist(dist, precision); 
+                               }else{
+                                       rndDist = roundDist(dist, precision); 
+                               }
  
                                 if(previousDist <= 0.0000 && dist != previousDist){
                                         oldList.setLabel("unique");
diff --git a/clustersplitcommand.h b/clustersplitcommand.h

index 4d1f4358ccf94382af1bb513e5cfaf10d87c8f34..631a8b6c8b95bfa87c52035cdb91d53bc7b378aa 100644 (file)
--- a/clustersplitcommand.h
+++ b/clustersplitcommand.h
@@ -32,7 +32,7 @@ private:
         vector<int> processIDS;   //processid
         vector<string> outputNames;
  
-       string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing;
+       string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, distfile, format, showabund, timing, splitmethod, taxFile;
         double cutoff, splitcutoff;
         int precision, length, processors;
         bool print_start, abort, hard;
diff --git a/hclustercommand.cpp b/hclustercommand.cpp

index dc14e1670c5221599dd14929dd6d08fde6c6b766..b4d601751630af81741d64ab605c95d5650838a5 100644 (file)
--- a/hclustercommand.cpp
+++ b/hclustercommand.cpp
@@ -110,7 +110,7 @@ HClusterCommand::HClusterCommand(string option)  {
                         temp = validParameter.validFile(parameters, "cutoff", false);
                         if (temp == "not found") { temp = "10"; }
                         convert(temp, cutoff); 
-                       if (!hard) {  cutoff += (5 / (precision * 10.0));  }
+                       cutoff += (5 / (precision * 10.0)); 
                         
                         method = validParameter.validFile(parameters, "method", false);
                         if (method == "not found") { method = "furthest"; }
@@ -163,7 +163,7 @@ void HClusterCommand::help(){
                 m->mothurOut("The name parameter allows you to enter your name file and is required if your distance file is in column format. \n");
                 m->mothurOut("The hcluster command should be in the following format: \n");
                 m->mothurOut("hcluster(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
-               m->mothurOut("The acceptable hcluster methods are furthest and nearest, but we hope to add average in the future.\n\n");        
+               m->mothurOut("The acceptable hcluster methods are furthest, nearest and average.\n\n"); 
         }
         catch(exception& e) {
                 m->errorOut(e, "HClusterCommand", "help");
@@ -279,8 +279,14 @@ int HClusterCommand::execute(){
                                                 return 0;  
                                         }
  
-                                       
-                                       float rndDist = roundDist(seqs[i].dist, precision);
+                       
+                                       float rndDist;
+                                       if (hard) {
+                                               rndDist = ceilDist(seqs[i].dist, precision); 
+                                       }else{
+                                               rndDist = roundDist(seqs[i].dist, precision); 
+                                       }
+
                                         
                                         if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
                                                 printData("unique");
diff --git a/makefile b/makefile

index fdcef2794c06cb81c399376aaf79518246c23aca..f50b5548b982f34189f6ef20fa8bf8ae7979fc78 100644 (file)
--- a/makefile
+++ b/makefile
@@ -1691,7 +1691,7 @@ install : mothur
         $(CC) $(CC_OPTIONS) splitabundcommand.cpp -c $(INCLUDE) -o ./splitabundcommand.o\r
         \r
  # Item # 206 -- splitmatrix --\r
-./splitmatrix.o : splitmatrix.o\r
+./splitmatrix.o : splitmatrix.cpp\r
         $(CC) $(CC_OPTIONS) splitmatrix.cpp -c $(INCLUDE) -o ./splitmatrix.o\r
         \r
  # Item # 207 -- splitmatrix --\r
diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp

index 6eb9c7d72f99590ec22b2f4b8aa758057a7f24f2..023f2142deb8584ad3794170b95e1b1d12889529 100644 (file)
--- a/mgclustercommand.cpp
+++ b/mgclustercommand.cpp
@@ -20,7 +20,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
+                       string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -104,6 +104,9 @@ MGClusterCommand::MGClusterCommand(string option) {
                         
                         temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
                         hclusterWanted = isTrue(temp); 
+                       
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       hard = isTrue(temp);
                 }
  
         }
@@ -214,7 +217,13 @@ int MGClusterCommand::execute(){
                                 }
                                 
                                 float dist = distMatrix->getSmallDist();
-                               float rndDist = roundDist(dist, precision);
+                               float rndDist;
+                               if (hard) {
+                                       rndDist = ceilDist(dist, precision); 
+                               }else{
+                                       rndDist = roundDist(dist, precision); 
+                               }
+
                                 
                                 if(previousDist <= 0.0000 && dist != previousDist){
                                         oldList.setLabel("unique");
@@ -329,7 +338,12 @@ int MGClusterCommand::execute(){
                                                         return 0; 
                                                 }
         
-                                               float rndDist = roundDist(seqs[i].dist, precision);
+                                               float rndDist;
+                                               if (hard) {
+                                                       rndDist = ceilDist(seqs[i].dist, precision); 
+                                               }else{
+                                                       rndDist = roundDist(seqs[i].dist, precision); 
+                                               }
                                                                                                 
                                                 if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
                                                         oldList.setLabel("unique");
diff --git a/mgclustercommand.h b/mgclustercommand.h

index a1dac221b891a8551a36b7493d5bb7a603204cf5..4ef0bd90de3de904e51556e87e31163f4d08d0c7 100644 (file)
--- a/mgclustercommand.h
+++ b/mgclustercommand.h
@@ -43,7 +43,7 @@ private:
         double cutoff;
         float penalty;
         int precision, length, precisionLength;
-       bool abort, minWanted, hclusterWanted, merge;
+       bool abort, minWanted, hclusterWanted, merge, hard;
         
         void printData(ListVector*);
         ListVector* mergeOPFs(map<string, int>, float);
diff --git a/mothur.h b/mothur.h

index ca19aea53139b11da68420de6dc28c239990d626..5d12977383c4d2eb0457cc88190f078a98a5755e 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -254,6 +254,13 @@ inline float roundDist(float dist, int precision){
         
         return int(dist * precision + 0.5)/float(precision);
         
+}
+/***********************************************************************/
+
+inline float ceilDist(float dist, int precision){
+       
+       return int(ceil(dist * precision))/float(precision);
+       
  }
  
  /***********************************************************************/
diff --git a/phylotree.h b/phylotree.h

index a961721a77dad0f32624aa1ff7941651007fc757..50bbb1dd86f3148e10110e3f10407ce754a30766 100644 (file)
--- a/phylotree.h
+++ b/phylotree.h
@@ -45,10 +45,12 @@ public:
         TaxNode get(string seqName);
         string getName(int i);                  
         int getIndex(string seqName);   
-                       
         string getFullTaxonomy(string);  //pass a sequence name return taxonomy
-       int getMaxLevel()                               {       return maxLevel;                                                }
-       int getNumSeqs()  {  return numSeqs;  }
+       
+       int getMaxLevel()               {       return maxLevel;        }
+       int getNumSeqs()                {       return numSeqs;         }
+       int getNumNodes()               {       return tree.size();     }
+       
         bool ErrorCheck(vector<string>);
         
  private:
diff --git a/readdistcommand.cpp b/readdistcommand.cpp

index 414f33087738ba355ad5ff51c2e89349d41b21f9..4ea630f476ae99cef0b9ef8f65b86a2f44a10873 100644 (file)
--- a/readdistcommand.cpp
+++ b/readdistcommand.cpp
@@ -22,7 +22,7 @@ ReadDistCommand::ReadDistCommand(string option) {
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"phylip", "column", "name", "cutoff","hard", "precision", "group","outputdir","inputdir","sim"};
+                       string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -126,12 +126,9 @@ ReadDistCommand::ReadDistCommand(string option) {
                         sim = isTrue(temp); 
                         globaldata->sim = sim;
                         
-                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
-                       hard = isTrue(temp);
-                       
                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
                         convert(temp, cutoff); 
-                       if (!hard) {  cutoff += (5 / (precision * 10.0));  }
+                       cutoff += (5 / (precision * 10.0)); 
                         
                         if (abort == false) {
                                 distFileName = globaldata->inputFileName;
diff --git a/readdistcommand.h b/readdistcommand.h

index 6241f97b5fb1b70f79a326360ace611a1866fa72..937ca3f0797c58bc24dc3a239ac5f7c0e4624339 100644 (file)
--- a/readdistcommand.h
+++ b/readdistcommand.h
@@ -42,7 +42,7 @@ private:
         string phylipfile, columnfile, namefile, groupfile, outputDir;
         NameAssignment* nameMap;
  
-       bool abort, sim, hard;
+       bool abort, sim;
  
  };
  
diff --git a/splitmatrix.cpp b/splitmatrix.cpp

index c52c2878855030932efb7cda659ba5d0f048a32a..0b59fa2e2548ac14c5baa8923510409ad56274b3 100644 (file)
--- a/splitmatrix.cpp
+++ b/splitmatrix.cpp
@@ -8,19 +8,44 @@
   */
  
  #include "splitmatrix.h"
+#include "phylotree.h"
  
  /***********************************************************************/
  
-SplitMatrix::SplitMatrix(string distfile, string name, float c){
+SplitMatrix::SplitMatrix(string distfile, string name, string tax, float c, string t){
         m = MothurOut::getInstance();
         distFile = distfile;
         cutoff = c;
         namefile = name;
+       method = t;
+       taxFile = tax;
  }
  
  /***********************************************************************/
  
  int SplitMatrix::split(){
+       try {
+        
+               if (method == "distance") {  
+                       splitDistance();
+               }else if (method == "classify") {
+                       splitClassify();
+               }else {
+                       m->mothurOut("Unknown splitting method, aborting split."); m->mothurOutEndLine();
+                       map<string, string> temp;
+                       temp[distFile] = namefile;
+                       dists.push_back(temp);
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitMatrix", "split");
+               exit(1);
+       }
+}
+/***********************************************************************/
+int SplitMatrix::splitDistance(){
         try {
          
                 vector<set<string> > groups;
@@ -36,6 +61,8 @@ int SplitMatrix::split(){
  
                         dFile >> seqA >> seqB >> dist;
                         
+                       if (m->control_pressed) {  outFile.close(); dFile.close();  for(int i=0;i<numGroups;i++){       if(groups[i].size() > 0){  remove((distFile + "." + toString(i) + ".temp").c_str()); }  } return 0; }
+                                       
                         if(dist < cutoff){
                                 //cout << "in cutoff: " << dist << endl;
                                 int groupIDA = -1;
@@ -172,7 +199,7 @@ int SplitMatrix::split(){
                                 smallNameFile.close();
                         }
                 }
-                       
+               
                 //names of singletons
                 if (nameMap.size() != 0) {
                         singleton = namefile + ".extra.temp";
@@ -193,12 +220,160 @@ int SplitMatrix::split(){
                                 dists.push_back(temp);
                         }
                 }
+               
+               if (m->control_pressed)  {  
+                       for (int i = 0; i < dists.size(); i++) { 
+                               remove((dists[i].begin()->first).c_str());
+                               remove((dists[i].begin()->second).c_str());
+                       }
+                       dists.clear();
+               }
+               
+               return 0;
+                       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitMatrix", "splitDistance");
+               exit(1);
+       }
+}
+
+/***********************************************************************/
+int SplitMatrix::splitClassify(){
+       try {
+               map<string, int> seqGroup;
+               map<string, int>::iterator it;
+               map<string, int>::iterator it2;
+               
+               int numGroups = 0;
+               
+               //build tree from users taxonomy file
+               PhyloTree* phylo = new PhyloTree();
+               
+               ifstream in;
+               openInputFile(taxFile, in);
+                       
+               //read in users taxonomy file and add sequences to tree
+               string seqname, tax;
+               while(!in.eof()){
+                       in >> seqname >> tax; gobble(in);
+                               
+                       phylo->addSeqToTree(seqname, tax);
+               }
+               in.close();
+               
+               phylo->assignHeirarchyIDs(0);
+
+               //make sure the cutoff is not greater than maxlevel
+               if (cutoff > phylo->getMaxLevel()) { m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo->getMaxLevel())); m->mothurOutEndLine(); cutoff = phylo->getMaxLevel(); }
+               
+               //for each node in tree
+               for (int i = 0; i < phylo->getNumNodes(); i++) {
+               
+                       //is this node within the cutoff
+                       TaxNode taxon = phylo->get(i);
+                       
+                       if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences
+                               if (taxon.children.size() > 1) { //if this taxon just has one seq its a singleton
+                                       for (it = taxon.children.begin(); it != taxon.children.end(); it++) {
+                                               seqGroup[it->first] = numGroups;
+                                       }
+                                       numGroups++;
+                               }
+                       }
+               }
+
+               ifstream dFile;
+               openInputFile(distFile, dFile);
+               ofstream outFile;
+               
+               for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+                       remove((distFile + "." + toString(i) + ".temp").c_str());
+               }
+               
+               //for each distance
+               while(dFile){
+                       string seqA, seqB;
+                       float dist;
+                       
+                       if (m->control_pressed) { dFile.close(); for (int i = 0; i < numGroups; i++) { remove((distFile + "." + toString(i) + ".temp").c_str());        } }
+                       
+                       dFile >> seqA >> seqB >> dist;  gobble(dFile);
+                       
+                       //if both sequences are in the same group then they are within the cutoff
+                       it = seqGroup.find(seqA);
+                       it2 = seqGroup.find(seqB);
+                       
+                       if ((it != seqGroup.end()) && (it2 != seqGroup.end())) { //they are both not singletons 
+                               if (it->second == it2->second) { //they are from the same group so add the distance
+                                       openOutputFileAppend((distFile + "." + toString(it->second) + ".temp"), outFile);
+                                       outFile << seqA << '\t' << seqB << '\t' << dist << endl;
+                                       outFile.close();
+                               }
+                       }
+               }
+               dFile.close();
+       
+               
+               for (int i = 0; i < numGroups; i++) { //remove old temp files, just in case
+                       remove((namefile + "." + toString(i) + ".temp").c_str());
+               }
+               
+               ifstream bigNameFile;
+               openInputFile(namefile, bigNameFile);
+               
+               singleton = namefile + ".extra.temp";
+               ofstream remainingNames;
+               openOutputFile(singleton, remainingNames);
+               
+               bool wroteExtra = false;
+                                               
+               string name, nameList;
+               while(!bigNameFile.eof()){
+                       bigNameFile >> name >> nameList;  gobble(bigNameFile);
+                       
+                       //did this sequence get assigned a group
+                       it = seqGroup.find(name);
+                       
+                       if (it != seqGroup.end()) {  
+                               openOutputFileAppend((namefile + "." + toString(it->second) + ".temp"), outFile);
+                               outFile << name << '\t' << nameList << endl;
+                               outFile.close();
+                       }else{
+                               wroteExtra = true;
+                               remainingNames << name << '\t' << nameList << endl;
+                       }
+               }
+               bigNameFile.close();
+               remainingNames.close();
+               
+               if (!wroteExtra) { 
+                       remove(singleton.c_str());
+                       singleton = "none";
+               }
+                       
+               for(int i=0;i<numGroups;i++){
+                       string tempNameFile = namefile + "." + toString(i) + ".temp";
+                       string tempDistFile = distFile + "." + toString(i) + ".temp";
                                 
+                       map<string, string> temp;
+                       temp[tempDistFile] = tempNameFile;
+                       dists.push_back(temp);
+               }
+               
+               if (m->control_pressed)  {  
+                       for (int i = 0; i < dists.size(); i++) { 
+                               remove((dists[i].begin()->first).c_str());
+                               remove((dists[i].begin()->second).c_str());
+                       }
+                       dists.clear();
+               }
+               
                 return 0;
                         
         }
         catch(exception& e) {
-               m->errorOut(e, "SplitMatrix", "split");
+               m->errorOut(e, "SplitMatrix", "splitClassify");
                 exit(1);
         }
  }
diff --git a/splitmatrix.h b/splitmatrix.h

index 5974ff1b967a5b3313bea6dcbcf967eb4b1c5023..e38f8fc1ba7afd679a7cc308547e2e0f0c11a545 100644 (file)
--- a/splitmatrix.h
+++ b/splitmatrix.h
@@ -19,18 +19,21 @@ class SplitMatrix  {
         
         public:
  
-               SplitMatrix(string, string, float); //column formatted distance file, namesfile, cutoff
+               SplitMatrix(string, string, string, float, string); //column formatted distance file, namesfile, cutoff, method
                 ~SplitMatrix();
                 int split();
                 vector< map<string, string> > getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
                 string getSingletonNames() { return singleton; } //returns namesfile containing singletons
         
         private:
-               string distFile, namefile, singleton;
+               MothurOut* m;
+
+               string distFile, namefile, singleton, method, taxFile;
                 vector< map< string, string> > dists;
                 float cutoff;
-               
-               MothurOut* m;
+                               
+               int splitDistance();
+               int splitClassify();
  };
  
  /******************************************************/
author	westcott <westcott>
	Fri, 28 May 2010 12:20:41 +0000 (12:20 +0000)
committer	westcott <westcott>
	Fri, 28 May 2010 12:20:41 +0000 (12:20 +0000)
clustercommand.cpp		patch \| blob \| history
clustersplitcommand.cpp		patch \| blob \| history
clustersplitcommand.h		patch \| blob \| history
hclustercommand.cpp		patch \| blob \| history
makefile		patch \| blob \| history
mgclustercommand.cpp		patch \| blob \| history
mgclustercommand.h		patch \| blob \| history
mothur.h		patch \| blob \| history
phylotree.h		patch \| blob \| history
readdistcommand.cpp		patch \| blob \| history
readdistcommand.h		patch \| blob \| history
splitmatrix.cpp		patch \| blob \| history
splitmatrix.h		patch \| blob \| history