]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed cluster.split command
authorwestcott <westcott>
Thu, 3 Jun 2010 17:02:17 +0000 (17:02 +0000)
committerwestcott <westcott>
Thu, 3 Jun 2010 17:02:17 +0000 (17:02 +0000)
chopseqscommand.cpp
clustersplitcommand.cpp
clustersplitcommand.h
readtree.cpp
tree.cpp

index 9020d2b4763515e1b6e1ead544247e676b043f41..47f151de14c5e1494485b7eebc445125f74a4d30 100644 (file)
@@ -77,7 +77,7 @@ ChopSeqsCommand::ChopSeqsCommand(string option)  {
 
 void ChopSeqsCommand::help(){
        try {
-               m->mothurOut("The chop.seqs command reads a fasta file and outputs a .chop.fasta with sequences trimmed to the end position.\n");
+               m->mothurOut("The chop.seqs command reads a fasta file and outputs a .chop.fasta containing the trimmed sequences.\n");
                m->mothurOut("The chop.seqs command parameters are fasta, end and fromend, fasta is required.\n");
                m->mothurOut("The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, end=yourEnd).\n");
                m->mothurOut("The end parameter allows you to specify an end base position for your sequences, default = 0.\n");
index ed995fcd9586d5f2e5f86b1c5b733f247925a9f1..d10a51fde293fa203e03a856ba67c63d5895a55b 100644 (file)
@@ -313,8 +313,12 @@ int ClusterSplitCommand::execute(){
                if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
                
                //****************** merge list file and create rabund and sabund files ******************************//
-                               
-               mergeLists(listFileNames, singletonName, labels);
+               ListVector* listSingle;
+               map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
+               
+               if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+               
+               mergeLists(listFileNames, labelBins, listSingle);
 
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
                
@@ -333,21 +337,14 @@ int ClusterSplitCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-int ClusterSplitCommand::mergeLists(vector<string> listNames, string singleton, set<string> userLabels){
+map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames, string singleton, set<string> userLabels, ListVector*& listSingle){
        try {
-               if (outputDir == "") { outputDir += hasPath(distfile); }
-               fileroot = outputDir + getRootName(getSimpleName(distfile));
-               
-               openOutputFile(fileroot+ tag + ".sabund",       outSabund);
-               openOutputFile(fileroot+ tag + ".rabund",       outRabund);
-               openOutputFile(fileroot+ tag + ".list",         outList);
                                
-               outputNames.push_back(fileroot+ tag + ".sabund");
-               outputNames.push_back(fileroot+ tag + ".rabund");
-               outputNames.push_back(fileroot+ tag + ".list");
+               map<float, int> labelBin;
+               vector<float> orderFloat;
+               int numSingleBins;
                
                //read in singletons
-               ListVector* listSingle = NULL;
                if (singleton != "none") {
                        ifstream in;
                        openInputFile(singleton, in);
@@ -359,102 +356,167 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, string singleton,
                                listSingle->push_back(secondCol);
                        }
                        in.close();
-               }
+                       remove(singleton.c_str());
+                       
+                       numSingleBins = listSingle->getNumBins();
+               }else{  listSingle = NULL; numSingleBins = 0;  }
                
-               vector<float> orderFloat;
-       
                //go through users set and make them floats so we can sort them 
                for(set<string>::iterator it = userLabels.begin(); it != userLabels.end(); ++it) {
-                       float temp;
+                       float temp = -10.0;
 
-                       if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
-                               convert(*it, temp);
-                               orderFloat.push_back(temp);
-                       }else if (*it == "unique") { orderFloat.push_back(-1.0); }
-                       else {
-                               userLabels.erase(*it); 
-                               it--;
-                       }
+                       if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) {       convert(*it, temp);     }
+                       else if (*it == "unique")                                                                               {       temp = -1.0;            }
+                       
+                       orderFloat.push_back(temp);
+                       labelBin[temp] = numSingleBins; //initialize numbins 
                }
        
                //sort order
                sort(orderFloat.begin(), orderFloat.end());
-
-               vector<InputData*> inputs;
-               vector<string> lastLabels;
-               for (int i = 0; i < listNames.size(); i++) {
-                       InputData* input = new InputData(listNames[i], "list");
-                       inputs.push_back(input);
+               userLabels.clear();
                        
-                       ifstream in;
-                       openInputFile(listNames[i], in);
-                       ListVector tempList(in);
-                       lastLabels.push_back(tempList.getLabel());
-                       in.close();
-               }
+               //get the list info from each file
+               for (int k = 0; k < listNames.size(); k++) {
        
-               ListVector* merged = NULL;
-                               
-               //for each label needed
-               for(int l = 0; l < orderFloat.size(); l++){
+                       if (m->control_pressed) {  
+                               if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str());  }
+                               for (int i = 0; i < listNames.size(); i++) {   remove(listNames[i].c_str());  }
+                               return labelBin;
+                       }
                        
-                       string thisLabel;
-                       if (orderFloat[l] == -1) { thisLabel = "unique"; }
-                       else { thisLabel = toString(orderFloat[l],  length-1);  } 
-       
-                       //get the list info from each file
-                       for (int k = 0; k < listNames.size(); k++) {
+                       InputData* input = new InputData(listNames[k], "list");
+                       ListVector* list = input->getListVector();
+                       string lastLabel = list->getLabel();
+                       
+                       string filledInList = listNames[k] + "filledInTemp";
+                       ofstream outFilled;
+                       openOutputFile(filledInList, outFilled);
        
-                               if (m->control_pressed) {  
-                                       if (listSingle != NULL) { delete listSingle; remove(singleton.c_str());  }
-                                       for (int i = 0; i < listNames.size(); i++) {  delete inputs[i];  remove(listNames[i].c_str());  }
-                                       delete merged; merged = NULL;
-                                       return 0;
-                               }
-                               
-                               ListVector* list = inputs[k]->getListVector();
-                               
+                       //for each label needed
+                       for(int l = 0; l < orderFloat.size(); l++){
+                       
+                               string thisLabel;
+                               if (orderFloat[l] == -1) { thisLabel = "unique"; }
+                               else { thisLabel = toString(orderFloat[l],  length-1);  } 
+
                                //this file has reached the end
-                               if (list == NULL) { list = inputs[k]->getListVector(lastLabels[k], true); }     
+                               if (list == NULL) { 
+                                       list = input->getListVector(lastLabel, true); 
+                               }else{  //do you have the distance, or do you need to fill in
                                                
-                               float labelFloat;
-                               if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
-                               else { convert(list->getLabel(), labelFloat); }
-
-                               //check for missing labels
-                               if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
-                                       //if its bigger get last label, otherwise keep it
-                                       delete list;
-                                       list = inputs[k]->getListVector(lastLabels[k], true); //get last list vector to use, you actually want to move back in the file
-                               }
-                               lastLabels[k] = list->getLabel();
+                                       float labelFloat;
+                                       if (list->getLabel() == "unique") {  labelFloat = -1.0;  }
+                                       else { convert(list->getLabel(), labelFloat); }
 
-                               //is this the first file
-                               if (merged == NULL) {  merged = new ListVector();  merged->setLabel(thisLabel); }
-                               
-                               for (int j = 0; j < list->getNumBins(); j++) {
-                                       merged->push_back(list->get(j));
+                                       //check for missing labels
+                                       if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
+                                               //if its bigger get last label, otherwise keep it
+                                               delete list;
+                                               list = input->getListVector(lastLabel, true);  //get last list vector to use, you actually want to move back in the file
+                                       }
+                                       lastLabel = list->getLabel();
                                }
                                
+                               //print to new file
+                               list->setLabel(thisLabel);
+                               list->print(outFilled);
+               
+                               //update labelBin
+                               labelBin[orderFloat[l]] += list->getNumBins();
+                                                                       
                                delete list;
+                                                                       
+                               list = input->getListVector();
                        }
                        
+                       if (list != NULL) { delete list; }
+                       delete input;
+                       
+                       outFilled.close();
+                       remove(listNames[k].c_str());
+                       rename(filledInList.c_str(), listNames[k].c_str());
+               }
+               
+               return labelBin;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "completeListFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> userLabels, ListVector* listSingle){
+       try {
+               if (outputDir == "") { outputDir += hasPath(distfile); }
+               fileroot = outputDir + getRootName(getSimpleName(distfile));
+               
+               openOutputFile(fileroot+ tag + ".sabund",       outSabund);
+               openOutputFile(fileroot+ tag + ".rabund",       outRabund);
+               openOutputFile(fileroot+ tag + ".list",         outList);
+                               
+               outputNames.push_back(fileroot+ tag + ".sabund");
+               outputNames.push_back(fileroot+ tag + ".rabund");
+               outputNames.push_back(fileroot+ tag + ".list");
+               
+               map<float, int>::iterator itLabel;
+
+               //for each label needed
+               for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
+                       
+                       string thisLabel;
+                       if (itLabel->first == -1) { thisLabel = "unique"; }
+                       else { thisLabel = toString(itLabel->first,  length-1);  } 
+                       
+                       outList << thisLabel << '\t' << itLabel->second << '\t';
+
+                       RAbundVector* rabund = new RAbundVector();
+                       rabund->setLabel(thisLabel);
+
                        //add in singletons
                        if (listSingle != NULL) {
                                for (int j = 0; j < listSingle->getNumBins(); j++) {
-                                       merged->push_back(listSingle->get(j));
+                                       outList << listSingle->get(j) << '\t';
+                                       rabund->push_back(getNumNames(listSingle->get(j)));
                                }
                        }
                        
-                       //print to files
-                       printData(merged);
+                       //get the list info from each file
+                       for (int k = 0; k < listNames.size(); k++) {
+       
+                               if (m->control_pressed) {  if (listSingle != NULL) { delete listSingle;   } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str());  } delete rabund; return 0; }
+                               
+                               InputData* input = new InputData(listNames[k], "list");
+                               ListVector* list = input->getListVector(thisLabel);
+                               
+                               //this file has reached the end
+                               if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine();  }        
+                               else {          
+                                       for (int j = 0; j < list->getNumBins(); j++) {
+                                               outList << list->get(j) << '\t';
+                                               rabund->push_back(getNumNames(list->get(j)));
+                                       }
+                                       delete list;
+                               }
+                               delete input;
+                       }
+                       
+                       SAbundVector sabund = rabund->getSAbundVector();
+                       
+                       sabund.print(outSabund);
+                       rabund->print(outRabund);
+                       outList << endl;
                        
-                       delete merged; merged = NULL;
+                       delete rabund;
                }
                
-               if (listSingle != NULL) { delete listSingle; remove(singleton.c_str());  }
+               outList.close();
+               outRabund.close();
+               outSabund.close();
                
-               for (int i = 0; i < listNames.size(); i++) {  delete inputs[i];  remove(listNames[i].c_str());  }
+               if (listSingle != NULL) { delete listSingle;  }
+               
+               for (int i = 0; i < listNames.size(); i++) {  remove(listNames[i].c_str());  }
                
                return 0;
        }
@@ -463,6 +525,7 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, string singleton,
                exit(1);
        }
 }
+
 //**********************************************************************************************************************
 
 void ClusterSplitCommand::printData(ListVector* oldList){
index e838bc41ee1b3616d3f0f95e6a512c5bde136d33..05ae8b8b78fe921092acb06ec4ae7ea3d4ba997f 100644 (file)
@@ -42,7 +42,8 @@ private:
        void printData(ListVector*);
        int createProcesses(vector < vector < map<string, string> > >);
        vector<string> cluster(vector< map<string, string> >, set<string>&);
-       int mergeLists(vector<string>, string, set<string>);
+       int mergeLists(vector<string>, map<float, int>, ListVector*);
+       map<float, int> completeListFile(vector<string>, string, set<string>, ListVector*&);
 };
 
 #endif
index 3062f6677fa8527b4eb59a9ab979458465bfb777..0d25f7e2f4c4e0232cd137b184da0077585edefb 100644 (file)
@@ -230,7 +230,7 @@ int ReadNewickTree::readTreeString() {
 
                        lc = readNewickInt(filehandle, n, T);
                        if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
-               
+       
                        if(filehandle.peek()==','){                                                     
                                readSpecialChar(filehandle,',',"comma");
                        }
@@ -291,6 +291,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                if(c == '('){
                        int lc = readNewickInt(f, n, T);
                        if (lc == -1) { return -1; } //reports an error in reading
+                       
                        readSpecialChar(f,',',"comma");
 
                        int rc = readNewickInt(f, n, T);
index b6cfa0b2429a0e7a6c94f4f5ef5c36ad7f04f473..d6634bcdbb59f05c8ef9cc04561a00fe1e489186 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -713,6 +713,10 @@ void Tree::parseTreeFile() {
                        }
                }
                filehandle.close();
+               
+               for (int i = 0; i < globaldata->Treenames.size(); i++) {
+cout << globaldata->Treenames[i] << endl; }
+cout << "done" << endl;
        }
        catch(exception& e) {
                m->errorOut(e, "Tree", "parseTreeFile");