]> git.donarmstrong.com Git - mothur.git/blobdiff - clustersplitcommand.cpp
changes to read.otu(list/shared) - took out substr calls
[mothur.git] / clustersplitcommand.cpp
index 38e839496536b6ae525c94efd0c5b024c7eee384..fb1a7029636d3ee2e28045c8d1b233bec7c8a889 100644 (file)
@@ -34,7 +34,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
                        
-                       ValidParameters validParameter;
+                       ValidParameters validParameter("cluster.split");
                
                        //check to make sure all parameters are valid for command
                        map<string,string>::iterator it;
@@ -57,7 +57,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                it = parameters.find("phylip");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
                                }
@@ -65,7 +65,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                it = parameters.find("column");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["column"] = inputDir + it->second;           }
                                }
@@ -73,7 +73,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                it = parameters.find("name");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
@@ -81,7 +81,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                it = parameters.find("taxonomy");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
@@ -89,7 +89,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                it = parameters.find("fasta");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
                                }
@@ -142,10 +142,10 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        convert(temp, precision); 
                        
                        temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
-                       hard = isTrue(temp);
+                       hard = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "large", false);                    if (temp == "not found") { temp = "F"; }
-                       large = isTrue(temp);
+                       large = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
                        convert(temp, processors); 
@@ -237,6 +237,7 @@ int ClusterSplitCommand::execute(){
                vector<string> listFileNames;
                set<string> labels;
                string singletonName = "";
+               double saveCutoff = cutoff;
 
                //****************** file prep work ******************************//
                #ifdef USE_MPI
@@ -270,7 +271,7 @@ int ClusterSplitCommand::execute(){
                        if (namefile == "") {  //you need to make a namefile for split matrix
                                ofstream out;
                                namefile = phylipfile + ".names";
-                               openOutputFile(namefile, out);
+                               m->openOutputFile(namefile, out);
                                for (int i = 0; i < listToMakeNameFile->getNumBins(); i++) {
                                        string bin = listToMakeNameFile->get(i);
                                        out << bin << '\t' << bin << endl;
@@ -361,6 +362,10 @@ int ClusterSplitCommand::execute(){
                        for(int i = 1; i < processors; i++) { 
                                int num = dividedNames[i].size();
                                
+                               double tempCutoff;
+                               MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
+                               if (tempCutoff < cutoff) { cutoff = tempCutoff; }
+                               
                                //send list filenames to root process
                                for (int j = 0; j < num; j++) {  
                                        int lengthList = 0;
@@ -429,6 +434,9 @@ int ClusterSplitCommand::execute(){
                        //process them
                        listFileNames = cluster(myNames, labels);
                        
+                       //send cutoff
+                       MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
+                       
                        //send list filenames to root process
                        for (int j = 0; j < num; j++) {  
                                char tempListFileName[1024];
@@ -489,13 +497,13 @@ int ClusterSplitCommand::execute(){
                                        for(int i=0;i<processors;i++){
                                                string filename = toString(processIDS[i]) + ".temp";
                                                ifstream in;
-                                               openInputFile(filename, in);
+                                               m->openInputFile(filename, in);
                                                
-                                               in >> tag; gobble(in);
+                                               in >> tag; m->gobble(in);
                                                
                                                while(!in.eof()) {
                                                        string tempName;
-                                                       in >> tempName; gobble(in);
+                                                       in >> tempName; m->gobble(in);
                                                        listFileNames.push_back(tempName);
                                                }
                                                in.close();
@@ -504,11 +512,15 @@ int ClusterSplitCommand::execute(){
                                                //get labels
                                                filename = toString(processIDS[i]) + ".temp.labels";
                                                ifstream in2;
-                                               openInputFile(filename, in2);
+                                               m->openInputFile(filename, in2);
+                                               
+                                               float tempCutoff;
+                                               in2 >> tempCutoff; m->gobble(in2);
+                                               if (tempCutoff < cutoff) { cutoff = tempCutoff; }
                                                
                                                while(!in2.eof()) {
                                                        string tempName;
-                                                       in2 >> tempName; gobble(in2);
+                                                       in2 >> tempName; m->gobble(in2);
                                                        if (labels.count(tempName) == 0) { labels.insert(tempName); }
                                                }
                                                in2.close();
@@ -521,6 +533,8 @@ int ClusterSplitCommand::execute(){
        #endif  
                if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
                
+               if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();  }
+               
                m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
                
                //****************** merge list file and create rabund and sabund files ******************************//
@@ -572,12 +586,12 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
                //read in singletons
                if (singleton != "none") {
                        ifstream in;
-                       openInputFile(singleton, in);
+                       m->openInputFile(singleton, in);
                                
                        string firstCol, secondCol;
                        listSingle = new ListVector();
                        while (!in.eof()) {
-                               in >> firstCol >> secondCol; gobble(in);
+                               in >> firstCol >> secondCol; m->gobble(in);
                                listSingle->push_back(secondCol);
                        }
                        in.close();
@@ -592,9 +606,11 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
 
                        if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
                        else if (*it == "unique")                                                                               {       temp = -1.0;            }
-                                               
-                       orderFloat.push_back(temp);
-                       labelBin[temp] = numSingleBins; //initialize numbins 
+                       
+                       if (temp <= cutoff) {
+                               orderFloat.push_back(temp);
+                               labelBin[temp] = numSingleBins; //initialize numbins 
+                       }
                }
        
                //sort order
@@ -616,7 +632,7 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
                        
                        string filledInList = listNames[k] + "filledInTemp";
                        ofstream outFilled;
-                       openOutputFile(filledInList, outFilled);
+                       m->openOutputFile(filledInList, outFilled);
        
                        //for each label needed
                        for(int l = 0; l < orderFloat.size(); l++){
@@ -673,12 +689,12 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
 //**********************************************************************************************************************
 int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
        try {
-               if (outputDir == "") { outputDir += hasPath(distfile); }
-               fileroot = outputDir + getRootName(getSimpleName(distfile));
+               if (outputDir == "") { outputDir += m->hasPath(distfile); }
+               fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
                
-               openOutputFile(fileroot+ tag + ".sabund",       outSabund);
-               openOutputFile(fileroot+ tag + ".rabund",       outRabund);
-               openOutputFile(fileroot+ tag + ".list",         outList);
+               m->openOutputFile(fileroot+ tag + ".sabund",    outSabund);
+               m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
+               m->openOutputFile(fileroot+ tag + ".list",              outList);
                                
                outputNames.push_back(fileroot+ tag + ".sabund");
                outputNames.push_back(fileroot+ tag + ".rabund");
@@ -702,7 +718,7 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                        if (listSingle != NULL) {
                                for (int j = 0; j < listSingle->getNumBins(); j++) {
                                        outList << listSingle->get(j) << '\t';
-                                       rabund->push_back(getNumNames(listSingle->get(j)));
+                                       rabund->push_back(m->getNumNames(listSingle->get(j)));
                                }
                        }
                        
@@ -719,7 +735,7 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                                else {          
                                        for (int j = 0; j < list->getNumBins(); j++) {
                                                outList << list->get(j) << '\t';
-                                               rabund->push_back(getNumNames(list->get(j)));
+                                               rabund->push_back(m->getNumNames(list->get(j)));
                                        }
                                        delete list;
                                }
@@ -759,7 +775,7 @@ void ClusterSplitCommand::printData(ListVector* oldList){
                RAbundVector oldRAbund = oldList->getRAbundVector();
                
                oldRAbund.setLabel(label);
-               if (isTrue(showabund)) {
+               if (m->isTrue(showabund)) {
                        oldRAbund.getSAbundVector().print(cout);
                }
                oldRAbund.print(outRabund);
@@ -795,7 +811,7 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
                                //write out names to file
                                string filename = toString(getpid()) + ".temp";
                                ofstream out;
-                               openOutputFile(filename, out);
+                               m->openOutputFile(filename, out);
                                out << tag << endl;
                                for (int j = 0; j < listFileNames.size(); j++) { out << listFileNames[j] << endl;  }
                                out.close();
@@ -803,8 +819,9 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
                                //print out labels
                                ofstream outLabels;
                                filename = toString(getpid()) + ".temp.labels";
-                               openOutputFile(filename, outLabels);
-               
+                               m->openOutputFile(filename, outLabels);
+                               
+                               outLabels << cutoff << endl;
                                for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
                                        outLabels << (*it) << endl;
                                }
@@ -841,8 +858,11 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                
                vector<string> listFileNames;
                
+               double smallestCutoff = cutoff;
+               
                //cluster each distance file
                for (int i = 0; i < distNames.size(); i++) {
+                       if (m->control_pressed) { return listFileNames; }
                        
                        string thisNamefile = distNames[i].begin()->second;
                        string thisDistFile = distNames[i].begin()->first;
@@ -897,11 +917,11 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                        else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
                        tag = cluster->getTag();
                
-                       if (outputDir == "") { outputDir += hasPath(thisDistFile); }
-                       fileroot = outputDir + getRootName(getSimpleName(thisDistFile));
+                       if (outputDir == "") { outputDir += m->hasPath(thisDistFile); }
+                       fileroot = outputDir + m->getRootName(m->getSimpleName(thisDistFile));
                        
                        ofstream listFile;
-                       openOutputFile(fileroot+ tag + ".list", listFile);
+                       m->openOutputFile(fileroot+ tag + ".list",      listFile);
                
                        listFileNames.push_back(fileroot+ tag + ".list");
                
@@ -925,14 +945,14 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                                        listFileNames.clear(); return listFileNames;
                                }
                
-                               cluster->update(cutoff);
+                               cluster->update(saveCutoff);
        
                                float dist = matrix->getSmallDist();
                                float rndDist;
                                if (hard) {
-                                       rndDist = ceilDist(dist, precision); 
+                                       rndDist = m->ceilDist(dist, precision); 
                                }else{
-                                       rndDist = roundDist(dist, precision); 
+                                       rndDist = m->roundDist(dist, precision); 
                                }
 
                                if(previousDist <= 0.0000 && dist != previousDist){
@@ -973,8 +993,18 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                        
                        remove(thisDistFile.c_str());
                        remove(thisNamefile.c_str());
+                       
+                       if (saveCutoff != cutoff) { 
+                               if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+                               else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       
+                               m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine();  
+                       }
+                       
+                       if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff;  }
                }
                
+               cutoff = smallestCutoff;
                                        
                return listFileNames;