]> git.donarmstrong.com Git - mothur.git/blobdiff - binsequencecommand.cpp
added code to format fast files for uchime. started work on sff.multiple command
[mothur.git] / binsequencecommand.cpp
index 8e8ec2bc51362d8b8876d6e3cbb10ef9ff5e167e..0c867e35f730cc8e72c66153b14c23b5f54ce6f3 100644 (file)
@@ -50,6 +50,26 @@ string BinSeqCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string BinSeqCommand::getOutputFileNameTag(string type, string inputName=""){  
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "fasta") {  outputFileName =  "fasta"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "BinSeqCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 BinSeqCommand::BinSeqCommand(){        
        try {
                abort = true; calledHelp = true; 
@@ -167,7 +187,7 @@ BinSeqCommand::BinSeqCommand(string option) {
                        }
                        
                        namesfile = validParameter.validFile(parameters, "name", true);
-                       if (namesfile == "not open") { abort = true; }  
+                       if (namesfile == "not open") { namesfile = ""; abort = true; }  
                        else if (namesfile == "not found") { namesfile = ""; }
                        else {  m->setNameFile(namesfile); }
 
@@ -176,6 +196,11 @@ BinSeqCommand::BinSeqCommand(string option) {
                        else if (groupfile == "not found") { groupfile = ""; }
                        else { m->setGroupFile(groupfile); }
                        
+                       if (namesfile == ""){
+                               vector<string> files; files.push_back(fastafile); 
+                               parser.getNameFile(files);
+                       }
+                       
                }
        }
        catch(exception& e) {
@@ -221,12 +246,12 @@ int BinSeqCommand::execute(){
                                
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
-                       if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
+                       if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
                        
                        if(allLines == 1 || labels.count(list->getLabel()) == 1){
                                
                                error = process(list);  
-                               if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
+                               if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
                                                        
                                processedLabels.insert(list->getLabel());
                                userLabels.erase(list->getLabel());
@@ -239,7 +264,7 @@ int BinSeqCommand::execute(){
                                list = input->getListVector(lastLabel);
                                
                                error = process(list);  
-                               if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
+                               if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
                                                                                                        
                                processedLabels.insert(list->getLabel());
                                userLabels.erase(list->getLabel());
@@ -254,7 +279,7 @@ int BinSeqCommand::execute(){
                        list = input->getListVector();
                }
                
-               if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
+               if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
 
                //output error messages about any remaining user labels
                set<string>::iterator it;
@@ -275,7 +300,7 @@ int BinSeqCommand::execute(){
                        list = input->getListVector(lastLabel);
                                
                        error = process(list);  
-                       if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
+                       if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
                        
                        delete list;  
                }
@@ -284,7 +309,7 @@ int BinSeqCommand::execute(){
                delete fasta; 
                if (groupfile != "") {  delete groupMap;   } 
                
-               if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str());         }  return 0; }  
+               if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                }  return 0; }  
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -337,79 +362,48 @@ void BinSeqCommand::readNamesFile() {
 //return 1 if error, 0 otherwise
 int BinSeqCommand::process(ListVector* list) {
        try {
-                               string binnames, name, sequence;
-                               
-                               string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + ".fasta";
-                               m->openOutputFile(outputFileName, out);
-                               
-                               //save to output list of output file names
-                               outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
-
-                               m->mothurOut(list->getLabel()); m->mothurOutEndLine();
-                               
-                               //for each bin in the list vector
-                               for (int i = 0; i < list->size(); i++) {
-                                       
-                                       if (m->control_pressed) {  return 1; }
-                                       
-                                       binnames = list->get(i);
-                                       while (binnames.find_first_of(',') != -1) { 
-                                               name = binnames.substr(0,binnames.find_first_of(','));
-                                               binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-                                               
-                                               //do work for that name
-                                               sequence = fasta->getSequence(name);
-                                               if (sequence != "not found") {
-                                                       //if you don't have groups
-                                                       if (groupfile == "") {
-                                                               name = name + "\t" + toString(i+1);
-                                                               out << ">" << name << endl;
-                                                               out << sequence << endl;
-                                                       }else {//if you do have groups
-                                                               string group = groupMap->getGroup(name);
-                                                               if (group == "not found") {  
-                                                                       m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
-                                                                       return 1;
-                                                               }else{
-                                                                       name = name + "\t" + group + "\t" + toString(i+1);
-                                                                       out << ">" << name << endl;
-                                                                       out << sequence << endl;
-                                                               }
-                                                       }
-                                               }else { 
-                                                       m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine();
-                                                       return 1;
-                                               }
-                                               
-                                       }
-                                       
-                                       //get last name
-                                       sequence = fasta->getSequence(binnames);
-                                       if (sequence != "not found") {
-                                               //if you don't have groups
-                                               if (groupfile == "") {
-                                                       binnames = binnames + "\t" + toString(i+1);
-                                                       out << ">" << binnames << endl;
-                                                       out << sequence << endl;
-                                               }else {//if you do have groups
-                                                       string group = groupMap->getGroup(binnames);
-                                                       if (group == "not found") {  
-                                                               m->mothurOut(binnames + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
-                                                               return 1;
-                                                       }else{
-                                                               binnames = binnames + "\t" + group + "\t" + toString(i+1);
-                                                               out << ">" << binnames << endl;
-                                                               out << sequence << endl;
-                                                       }
-                                               }
-                                       }else { 
-                                               m->mothurOut(binnames + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine();
-                                               return 1;
-                                       }
-                               }
-                                       
-                               out.close();
-                               return 0;
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + getOutputFileNameTag("fasta");
+        m->openOutputFile(outputFileName, out);
+        outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
+        
+        m->mothurOut(list->getLabel()); m->mothurOutEndLine();
+        
+        //for each bin in the list vector
+        for (int i = 0; i < list->size(); i++) {
+            
+            if (m->control_pressed) {  return 1; }
+            
+            string binnames = list->get(i);
+            vector<string> names;
+            m->splitAtComma(binnames, names);
+            for (int j = 0; j < names.size(); j++) {
+                string name = names[j];
+                
+                //do work for that name
+                string sequence = fasta->getSequence(name);
+                if (sequence != "not found") {
+                    //if you don't have groups
+                    if (groupfile == "") {
+                        name = name + "\t" + toString(i+1);
+                        out << ">" << name << endl;
+                        out << sequence << endl;
+                    }else {//if you do have groups
+                        string group = groupMap->getGroup(name);
+                        if (group == "not found") {  
+                            m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
+                            return 1;
+                        }else{
+                            name = name + "\t" + group + "\t" + toString(i+1);
+                            out << ">" << name << endl;
+                            out << sequence << endl;
+                        }
+                    }
+                }else { m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); return 1; }
+            }
+        }
+        
+        out.close();
+        return 0;
 
        }
        catch(exception& e) {