]> git.donarmstrong.com Git - mothur.git/blobdiff - binsequencecommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / binsequencecommand.cpp
index 3f9378fbdb4650078cf27091326c0dc5e87c993a..7798a4d74f749eed04d4554c796776a58697b98b 100644 (file)
 //**********************************************************************************************************************
 vector<string> BinSeqCommand::setParameters(){ 
        try {
-               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(plist);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
        
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -34,7 +35,7 @@ vector<string> BinSeqCommand::setParameters(){
 string BinSeqCommand::getHelpString(){ 
        try {
                string helpString = "";
-               helpString += "The bin.seqs command parameters are list, fasta, name, label and group.  The fasta and list are required, unless you have a valid current list and fasta file.\n";
+               helpString += "The bin.seqs command parameters are list, fasta, name, count, label and group.  The fasta and list are required, unless you have a valid current list and fasta file.\n";
                helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n";
                helpString += "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
                helpString += "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, name=amazon.names).\n";
@@ -50,6 +51,21 @@ string BinSeqCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string BinSeqCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "fasta") {  pattern = "[filename],[distance],fasta"; } //makes file like: amazon.0.03.fasta
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "BinSeqCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 BinSeqCommand::BinSeqCommand(){        
        try {
                abort = true; calledHelp = true; 
@@ -71,6 +87,7 @@ BinSeqCommand::BinSeqCommand(string option) {
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
                        vector<string> myArray = setParameters();
@@ -126,6 +143,14 @@ BinSeqCommand::BinSeqCommand(string option) {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -137,6 +162,7 @@ BinSeqCommand::BinSeqCommand(string option) {
                                else {  m->mothurOut("You have no current fasta file and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }
                        else if (fastafile == "not open") { abort = true; }     
+                       else { m->setFastaFile(fastafile); }
                        
                        listfile = validParameter.validFile(parameters, "list", true);
                        if (listfile == "not found") {                  
@@ -145,6 +171,7 @@ BinSeqCommand::BinSeqCommand(string option) {
                                else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }
                        else if (listfile == "not open") { listfile = ""; abort = true; }       
+                       else { m->setListFile(listfile); }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -164,12 +191,34 @@ BinSeqCommand::BinSeqCommand(string option) {
                        }
                        
                        namesfile = validParameter.validFile(parameters, "name", true);
-                       if (namesfile == "not open") { abort = true; }  
+                       if (namesfile == "not open") { namesfile = ""; abort = true; }  
                        else if (namesfile == "not found") { namesfile = ""; }
+                       else {  m->setNameFile(namesfile); }
 
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { abort = true; }
                        else if (groupfile == "not found") { groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namesfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+                       
+            if (countfile == "") {
+                if (namesfile == ""){
+                    vector<string> files; files.push_back(fastafile); 
+                    parser.getNameFile(files);
+                }
+            }
                        
                }
        }
@@ -199,9 +248,8 @@ int BinSeqCommand::execute(){
                fasta->readFastaFile(fastafile);
                
                //if user gave a namesfile then use it
-               if (namesfile != "") {
-                       readNamesFile();
-               }
+               if (namesfile != "") {  readNamesFile();  }
+        if (countfile != "") {  ct.readTable(countfile);  }
                
                input = new InputData(listfile, "list");
                list = input->getListVector();
@@ -216,12 +264,12 @@ int BinSeqCommand::execute(){
                                
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
-                       if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
+                       if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
                        
                        if(allLines == 1 || labels.count(list->getLabel()) == 1){
                                
                                error = process(list);  
-                               if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
+                               if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
                                                        
                                processedLabels.insert(list->getLabel());
                                userLabels.erase(list->getLabel());
@@ -234,7 +282,7 @@ int BinSeqCommand::execute(){
                                list = input->getListVector(lastLabel);
                                
                                error = process(list);  
-                               if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
+                               if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
                                                                                                        
                                processedLabels.insert(list->getLabel());
                                userLabels.erase(list->getLabel());
@@ -249,7 +297,7 @@ int BinSeqCommand::execute(){
                        list = input->getListVector();
                }
                
-               if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
+               if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; } 
 
                //output error messages about any remaining user labels
                set<string>::iterator it;
@@ -270,7 +318,7 @@ int BinSeqCommand::execute(){
                        list = input->getListVector(lastLabel);
                                
                        error = process(list);  
-                       if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
+                       if (error == 1) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);                } delete input;  delete fasta; if (groupfile != "") {  delete groupMap;   } return 0; }
                        
                        delete list;  
                }
@@ -279,11 +327,14 @@ int BinSeqCommand::execute(){
                delete fasta; 
                if (groupfile != "") {  delete groupMap;   } 
                
-               if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str());         }  return 0; }  
-               
-               delete input;  
-               delete fasta; 
-               if (groupfile != "") {  delete groupMap;   } 
+               if(m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);                }  return 0; }  
+        
+        //set align file as new current fastafile
+               string currentFasta = "";
+               itTypes = outputTypes.find("fasta");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
+               }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -336,79 +387,75 @@ void BinSeqCommand::readNamesFile() {
 //return 1 if error, 0 otherwise
 int BinSeqCommand::process(ListVector* list) {
        try {
-                               string binnames, name, sequence;
-                               
-                               string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + ".fasta";
-                               m->openOutputFile(outputFileName, out);
-                               
-                               //save to output list of output file names
-                               outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
-
-                               m->mothurOut(list->getLabel()); m->mothurOutEndLine();
-                               
-                               //for each bin in the list vector
-                               for (int i = 0; i < list->size(); i++) {
-                                       
-                                       if (m->control_pressed) {  return 1; }
-                                       
-                                       binnames = list->get(i);
-                                       while (binnames.find_first_of(',') != -1) { 
-                                               name = binnames.substr(0,binnames.find_first_of(','));
-                                               binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-                                               
-                                               //do work for that name
-                                               sequence = fasta->getSequence(name);
-                                               if (sequence != "not found") {
-                                                       //if you don't have groups
-                                                       if (groupfile == "") {
-                                                               name = name + "\t" + toString(i+1);
-                                                               out << ">" << name << endl;
-                                                               out << sequence << endl;
-                                                       }else {//if you do have groups
-                                                               string group = groupMap->getGroup(name);
-                                                               if (group == "not found") {  
-                                                                       m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
-                                                                       return 1;
-                                                               }else{
-                                                                       name = name + "\t" + group + "\t" + toString(i+1);
-                                                                       out << ">" << name << endl;
-                                                                       out << sequence << endl;
-                                                               }
-                                                       }
-                                               }else { 
-                                                       m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine();
-                                                       return 1;
-                                               }
-                                               
-                                       }
-                                       
-                                       //get last name
-                                       sequence = fasta->getSequence(binnames);
-                                       if (sequence != "not found") {
-                                               //if you don't have groups
-                                               if (groupfile == "") {
-                                                       binnames = binnames + "\t" + toString(i+1);
-                                                       out << ">" << binnames << endl;
-                                                       out << sequence << endl;
-                                               }else {//if you do have groups
-                                                       string group = groupMap->getGroup(binnames);
-                                                       if (group == "not found") {  
-                                                               m->mothurOut(binnames + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
-                                                               return 1;
-                                                       }else{
-                                                               binnames = binnames + "\t" + group + "\t" + toString(i+1);
-                                                               out << ">" << binnames << endl;
-                                                               out << sequence << endl;
-                                                       }
-                                               }
-                                       }else { 
-                                               m->mothurOut(binnames + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine();
-                                               return 1;
-                                       }
-                               }
-                                       
-                               out.close();
-                               return 0;
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[distance]"] = list->getLabel();
+        string outputFileName = getOutputFileName("fasta", variables);
+        
+        m->openOutputFile(outputFileName, out);
+        outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
+        
+        m->mothurOut(list->getLabel()); m->mothurOutEndLine();
+        
+        //for each bin in the list vector
+        for (int i = 0; i < list->size(); i++) {
+            
+            if (m->control_pressed) {  return 1; }
+            
+            string binnames = list->get(i);
+            vector<string> names;
+            m->splitAtComma(binnames, names);
+            for (int j = 0; j < names.size(); j++) {
+                string name = names[j];
+                
+                //do work for that name
+                string sequence = fasta->getSequence(name);
+                
+                if (countfile != "") {
+                    if (sequence != "not found") {
+                        if (ct.hasGroupInfo()) {
+                            vector<string> groups = ct.getGroups(name);
+                            string groupInfo = "";
+                            for (int k = 0; k < groups.size()-1; k++) {
+                                groupInfo += groups[k] + "-";
+                            }
+                            if (groups.size() != 0) { groupInfo += groups[groups.size()-1]; }
+                            else { groupInfo = "not found";  }
+                            name = name + "\t" + groupInfo + "\t" + toString(i+1)+ "\tNumRep=" + toString(ct.getNumSeqs(name));
+                            out << ">" << name << endl;
+                            out << sequence << endl;
+                        }else {
+                            name = name + "\t" + toString(i+1) + "\tNumRep=" + toString(ct.getNumSeqs(name));
+                            out << ">" << name << endl;
+                            out << sequence << endl;
+                        }
+                        
+                    }else { m->mothurOut(name + " is missing from your fasta. Does your list file contain all sequence names or just the uniques?"); m->mothurOutEndLine(); return 1; }
+                }else {
+                    if (sequence != "not found") {
+                        //if you don't have groups
+                        if (groupfile == "") {
+                            name = name + "\t" + toString(i+1);
+                            out << ">" << name << endl;
+                            out << sequence << endl;
+                        }else {//if you do have groups
+                            string group = groupMap->getGroup(name);
+                            if (group == "not found") {  
+                                m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
+                                return 1;
+                            }else{
+                                name = name + "\t" + group + "\t" + toString(i+1);
+                                out << ">" << name << endl;
+                                out << sequence << endl;
+                            }
+                        }
+                    }else { m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); return 1; }
+                }
+            }
+        }
+        
+        out.close();
+        return 0;
 
        }
        catch(exception& e) {