]> git.donarmstrong.com Git - mothur.git/blobdiff - clustersplitcommand.cpp
adds group parameter to chimera.uchime so you can check for chimeras with template...
[mothur.git] / clustersplitcommand.cpp
index bdc8075342de2f484238c4810ee4da5babb0356c..dc913d8c8895522a39b5ee0bd22be15a82eb7a74 100644 (file)
@@ -183,25 +183,27 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        phylipfile = validParameter.validFile(parameters, "phylip", true);
                        if (phylipfile == "not open") { abort = true; }
                        else if (phylipfile == "not found") { phylipfile = ""; }        
-                       else {  distfile = phylipfile;  format = "phylip";      }
+                       else {  distfile = phylipfile;  format = "phylip";      m->setPhylipFile(phylipfile); }
                        
                        columnfile = validParameter.validFile(parameters, "column", true);
                        if (columnfile == "not open") { abort = true; } 
                        else if (columnfile == "not found") { columnfile = ""; }
-                       else {  distfile = columnfile; format = "column";       }
+                       else {  distfile = columnfile; format = "column";       m->setColumnFile(columnfile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not open") { abort = true; }   
-                       else if (namefile == "not found") { namefile = ""; }
+                       else if (namefile == "not found") { namefile = "";  }
+                       else { m->setNameFile(namefile); }
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
                        if (fastafile == "not open") { abort = true; }  
                        else if (fastafile == "not found") { fastafile = ""; }
-                       else { distfile = fastafile;  splitmethod = "fasta";  }
+                       else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
                        
                        taxFile = validParameter.validFile(parameters, "taxonomy", true);
                        if (taxFile == "not open") { abort = true; }    
                        else if (taxFile == "not found") { taxFile = ""; }
+                       else {  m->setTaxonomyFile(taxFile); }
                        
                        if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) { 
                                //is there are current file available for either of these?
@@ -553,17 +555,23 @@ int ClusterSplitCommand::execute(){
                MPI_Barrier(MPI_COMM_WORLD);
                
        #else
-
+               
+               //sanity check
+               if (processors > distName.size()) { processors = distName.size(); }
+               
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
                                        listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                                }else{
+                                       
+                                       cout << processors << '\t' << distName.size() << endl;
                                        vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
                                        dividedNames.resize(processors);
                                        
                                        //for each file group figure out which process will complete it
                                        //want to divide the load intelligently so the big files are spread between processes
                                        for (int i = 0; i < distName.size(); i++) { 
+                                               cout << i << endl;
                                                int processToAssign = (i+1) % processors; 
                                                if (processToAssign == 0) { processToAssign = processors; }
                                                
@@ -572,6 +580,7 @@ int ClusterSplitCommand::execute(){
                                        
                                        //not lets reverse the order of ever other process, so we balance big files running with little ones
                                        for (int i = 0; i < processors; i++) {
+                                               cout << i << endl;
                                                int remainder = ((i+1) % processors);
                                                if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
                                        }
@@ -594,7 +603,7 @@ int ClusterSplitCommand::execute(){
                                                        listFileNames.push_back(tempName);
                                                }
                                                in.close();
-                                               remove((toString(processIDS[i]) + ".temp").c_str());
+                                               m->mothurRemove((toString(processIDS[i]) + ".temp"));
                                                
                                                //get labels
                                                filename = toString(processIDS[i]) + ".temp.labels";
@@ -611,14 +620,14 @@ int ClusterSplitCommand::execute(){
                                                        if (labels.count(tempName) == 0) { labels.insert(tempName); }
                                                }
                                                in2.close();
-                                               remove((toString(processIDS[i]) + ".temp.labels").c_str());
+                                               m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
                                        }
                                }
                #else
                                listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                #endif
        #endif  
-               if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
+               if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); } return 0; }
                
                if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
                
@@ -635,11 +644,11 @@ int ClusterSplitCommand::execute(){
                ListVector* listSingle;
                map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
                
-               if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+               if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
                
                mergeLists(listFileNames, labelBins, listSingle);
 
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
                
                m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
                
@@ -701,7 +710,7 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
                                listSingle->push_back(secondCol);
                        }
                        in.close();
-                       remove(singleton.c_str());
+                       m->mothurRemove(singleton);
                        
                        numSingleBins = listSingle->getNumBins();
                }else{  listSingle = NULL; numSingleBins = 0;  }
@@ -727,8 +736,8 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
                for (int k = 0; k < listNames.size(); k++) {
        
                        if (m->control_pressed) {  
-                               if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
-                               for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
+                               if (listSingle != NULL) { delete listSingle; listSingle = NULL; m->mothurRemove(singleton);  }
+                               for (int i = 0; i < listNames.size(); i++) {   m->mothurRemove(listNames[i]);  }
                                return labelBin;
                        }
                        
@@ -781,7 +790,7 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
                        delete input;
                        
                        outFilled.close();
-                       remove(listNames[k].c_str());
+                       m->mothurRemove(listNames[k]);
                        rename(filledInList.c_str(), listNames[k].c_str());
                }
                
@@ -831,7 +840,7 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                        //get the list info from each file
                        for (int k = 0; k < listNames.size(); k++) {
        
-                               if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
+                               if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]);  } delete rabund; return 0; }
                                
                                InputData* input = new InputData(listNames[k], "list");
                                ListVector* list = input->getListVector(thisLabel);
@@ -863,7 +872,7 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                
                if (listSingle != NULL) { delete listSingle;  }
                
-               for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
+               for (int i = 0; i < listNames.size(); i++) {  m->mothurRemove(listNames[i]);  }
                
                return 0;
        }
@@ -1045,7 +1054,7 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                                if (m->control_pressed) { //clean up
                                        delete matrix; delete list;     delete cluster; delete rabund;
                                        listFile.close();
-                                       for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
+                                       for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
                                        listFileNames.clear(); return listFileNames;
                                }
                
@@ -1091,12 +1100,12 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                        listFile.close();
                        
                        if (m->control_pressed) { //clean up
-                               for (int i = 0; i < listFileNames.size(); i++) {        remove(listFileNames[i].c_str());       }
+                               for (int i = 0; i < listFileNames.size(); i++) {        m->mothurRemove(listFileNames[i]);      }
                                listFileNames.clear(); return listFileNames;
                        }
                        
-                       remove(thisDistFile.c_str());
-                       remove(thisNamefile.c_str());
+                       m->mothurRemove(thisDistFile);
+                       m->mothurRemove(thisNamefile);
                        
                        if (saveCutoff != cutoff) { 
                                if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
@@ -1135,7 +1144,7 @@ int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> >
                string thisOutputDir = outputDir;
                if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
                string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
-               remove(outputFileName.c_str());
+               m->mothurRemove(outputFileName);
                
                
                for (int i = 0; i < distNames.size(); i++) {