]> git.donarmstrong.com Git - mothur.git/blobdiff - binsequencecommand.cpp
added smart distance feature and optimized all commands using line by line processing
[mothur.git] / binsequencecommand.cpp
index f1ba8b5f38effa755a215e93a87dcf322001dc6d..c21757850b858d4a53173619da911ffcab5d24a3 100644 (file)
@@ -53,7 +53,7 @@ BinSeqCommand::~BinSeqCommand(){
 int BinSeqCommand::execute(){
        try {
                int count = 1;
-               string binnames, name, sequence;
+               int error = 0;
                
                //read fastafile
                fasta->readFastaFile(in);
@@ -72,11 +72,117 @@ int BinSeqCommand::execute(){
                
                input = globaldata->ginput;
                list = globaldata->gListVector;
+               ListVector* lastList = list;
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = globaldata->labels;
+
                                
-               while(list != NULL){
+               while((list != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0))) {
                        
                        if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(list->getLabel()) == 1){
                                
+                               error = process(list, count);   
+                               if (error == 1) { return 0; }   
+                                                       
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+
+                       }
+                       
+                       if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastList->getLabel()) != 1)) {
+                               
+                               error = process(lastList, count);       
+                               if (error == 1) { return 0; }
+                                                                                                       
+                               processedLabels.insert(lastList->getLabel());
+                               userLabels.erase(lastList->getLabel());
+                               
+                       }
+                       
+                       if (count != 1) { delete lastList; }
+                       lastList = list;                        
+
+                       list = input->getListVector();
+                       count++;
+               }
+               
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       cout << "Your file does not include the label "<< *it; 
+                       if (processedLabels.count(lastList->getLabel()) != 1) {
+                               cout << ". I will use " << lastList->getLabel() << "." << endl;
+                               needToRun = true;
+                       }else {
+                               cout << ". Please refer to " << lastList->getLabel() << "." << endl;
+                       }
+               }
+               
+               //run last line if you need to
+               if (needToRun == true)  {
+                       error = process(lastList, count);       
+                       if (error == 1) { return 0; }                   
+               }
+               
+               delete lastList;
+               return 0;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }       
+}
+
+//**********************************************************************************************************************
+void BinSeqCommand::readNamesFile() {
+       try {
+               vector<string> dupNames;
+               openInputFile(namesfile, inNames);
+               
+               string name, names, sequence;
+       
+               while(inNames){
+                       inNames >> name;                        //read from first column  A
+                       inNames >> names;               //read from second column  A,B,C,D
+                       
+                       dupNames.clear();
+                       
+                       //parse names into vector
+                       splitAtComma(names, dupNames);
+                       
+                       //store names in fasta map
+                       sequence = fasta->getSequence(name);
+                       for (int i = 0; i < dupNames.size(); i++) {
+                               fasta->push_back(dupNames[i], sequence);
+                       }
+               
+                       gobble(inNames);
+               }
+               inNames.close();
+
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the BinSeqCommand class function readNamesFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }       
+}
+//**********************************************************************************************************************
+//return 1 if error, 0 otherwise
+int BinSeqCommand::process(ListVector* list, int count) {
+       try {
+                               string binnames, name, sequence;
                                string outputFileName = getRootName(globaldata->getListFile()) + list->getLabel() + ".fasta";
                                openOutputFile(outputFileName, out);
 
@@ -103,7 +209,7 @@ int BinSeqCommand::execute(){
                                                                if (group == "not found") {  
                                                                        cout << name << " is missing from your group file. Please correct. " << endl;
                                                                        remove(outputFileName.c_str());
-                                                                       return 0;
+                                                                       return 1;
                                                                }else{
                                                                        name = name + "|" + group + "|" + toString(i+1);
                                                                        out << ">" << name << endl;
@@ -113,7 +219,7 @@ int BinSeqCommand::execute(){
                                                }else { 
                                                        cout << name << " is missing from your fasta or name file. Please correct. " << endl; 
                                                        remove(outputFileName.c_str());
-                                                       return 0;
+                                                       return 1;
                                                }
                                                
                                        }
@@ -131,7 +237,7 @@ int BinSeqCommand::execute(){
                                                        if (group == "not found") {  
                                                                cout << binnames << " is missing from your group file. Please correct. " << endl;
                                                                remove(outputFileName.c_str());
-                                                               return 0;
+                                                               return 1;
                                                        }else{
                                                                binnames = binnames + "|" + group + "|" + toString(i+1);
                                                                out << ">" << binnames << endl;
@@ -141,56 +247,12 @@ int BinSeqCommand::execute(){
                                        }else { 
                                                cout << binnames << " is missing from your fasta or name file. Please correct. " << endl; 
                                                remove(outputFileName.c_str());
-                                               return 0;
+                                               return 1;
                                        }
-                                       
                                }
+                                       
                                out.close();
-                       }
-                       
-                       delete list;
-                       list = input->getListVector();
-                       count++;
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the BinSeqCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the BinSeqCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }       
-}
-
-//**********************************************************************************************************************
-void BinSeqCommand::readNamesFile() {
-       try {
-               vector<string> dupNames;
-               openInputFile(namesfile, inNames);
-               
-               string name, names, sequence;
-       
-               while(inNames){
-                       inNames >> name;                        //read from first column  A
-                       inNames >> names;               //read from second column  A,B,C,D
-                       
-                       dupNames.clear();
-                       
-                       //parse names into vector
-                       splitAtComma(names, dupNames);
-                       
-                       //store names in fasta map
-                       sequence = fasta->getSequence(name);
-                       for (int i = 0; i < dupNames.size(); i++) {
-                               fasta->push_back(dupNames[i], sequence);
-                       }
-               
-                       gobble(inNames);
-               }
-               inNames.close();
+                               return 0;
 
        }
        catch(exception& e) {
@@ -205,4 +267,3 @@ void BinSeqCommand::readNamesFile() {
 //**********************************************************************************************************************
 
 
-