]> git.donarmstrong.com Git - mothur.git/blobdiff - aligncommand.cpp
added shared file type to get.groups and remove.groups
[mothur.git] / aligncommand.cpp
index 5f64411f0d9d8df558fdf6d8b9282227899e1e57..a32ec4df279b2fa1158ec5e818ce03545de4ed0c 100644 (file)
@@ -24,6 +24,7 @@
 
 #include "nast.hpp"
 #include "nastreport.hpp"
+#include "referencedb.h"
 
 //**********************************************************************************************************************
 vector<string> AlignCommand::setParameters(){  
@@ -39,6 +40,7 @@ vector<string> AlignCommand::setParameters(){
                CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
                CommandParameter pthreshold("threshold", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(pthreshold);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -56,24 +58,25 @@ vector<string> AlignCommand::setParameters(){
 string AlignCommand::getHelpString(){  
        try {
                string helpString = "";
-               helpString += "The align.seqs command reads a file containing sequences and creates an alignment file and a report file.\n";
-               helpString += "The align.seqs command parameters are reference, fasta, search, ksize, align, match, mismatch, gapopen, gapextend and processors.\n";
-               helpString += "The reference and fasta parameters are required. You may leave fasta blank if you have a valid fasta file. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
-               helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer and blast. The default is kmer.\n";
-               helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n";
-               helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n";
-               helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
-               helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
-               helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
-               helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
-               helpString += "The flip parameter is used to specify whether or not you want mothur to try the reverse complement if a sequence falls below the threshold.  The default is false.\n";
-               helpString += "The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.\n";
-               helpString += "If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.\n";
-               helpString += "The default for the threshold parameter is 0.50, meaning at least 50% of the bases must remain or the sequence is reported as potentially reversed.\n";
-               helpString += "The align.seqs command should be in the following format: \n";
-               helpString += "align.seqs(reference=yourTemplateFile, fasta=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) \n";
-               helpString += "Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)\n";
-               helpString += "Note: No spaces between parameter labels (i.e. candidate), '=' and parameters (i.e.yourFastaFile).\n\n";
+               helpString += "The align.seqs command reads a file containing sequences and creates an alignment file and a report file.";
+               helpString += "The align.seqs command parameters are reference, fasta, search, ksize, align, match, mismatch, gapopen, gapextend and processors.";
+               helpString += "The reference and fasta parameters are required. You may leave fasta blank if you have a valid fasta file. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta.";
+               helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer and blast. The default is kmer.";
+               helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.";
+               helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.";
+               helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.";
+               helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.";
+               helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.";
+               helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.";
+               helpString += "The flip parameter is used to specify whether or not you want mothur to try the reverse complement if a sequence falls below the threshold.  The default is false.";
+               helpString += "The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.";
+               helpString += "If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
+               helpString += "The default for the threshold parameter is 0.50, meaning at least 50% of the bases must remain or the sequence is reported as potentially reversed.";
+               helpString += "The align.seqs command should be in the following format:";
+               helpString += "align.seqs(reference=yourTemplateFile, fasta=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty)";
+               helpString += "Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)";
+               helpString += "Note: No spaces between parameter labels (i.e. candidate), '=' and parameters (i.e.yourFastaFile).";
                return helpString;
        }
        catch(exception& e) {
@@ -99,10 +102,12 @@ AlignCommand::AlignCommand(){
 //**********************************************************************************************************************
 AlignCommand::AlignCommand(string option)  {
        try {
-               abort = false; calledHelp = false;   
-       
+               abort = false; calledHelp = false;  
+               ReferenceDB* rdb = ReferenceDB::getInstance();
+               
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true;}
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
                        vector<string> myArray = setParameters();
@@ -145,15 +150,6 @@ AlignCommand::AlignCommand(string option)  {
                                }
                        }
 
-                       //check for required parameters
-                       templateFileName = validParameter.validFile(parameters, "reference", true);
-                       
-                       if (templateFileName == "not found") { 
-                               m->mothurOut("reference is a required parameter for the align.seqs command."); 
-                               m->mothurOutEndLine();
-                               abort = true; 
-                       }else if (templateFileName == "not open") { abort = true; }     
-                       
                        candidateFileName = validParameter.validFile(parameters, "fasta", false);
                        if (candidateFileName == "not found") { 
                                //if there is a current fasta file, use it
@@ -167,50 +163,66 @@ AlignCommand::AlignCommand(string option)  {
                                for (int i = 0; i < candidateFileNames.size(); i++) {
                                        //candidateFileNames[i] = m->getFullPathName(candidateFileNames[i]);
                                        
-                                       if (inputDir != "") {
-                                               string path = m->hasPath(candidateFileNames[i]);
-                                               //if the user has not given a path then, add inputdir. else leave path alone.
-                                               if (path == "") {       candidateFileNames[i] = inputDir + candidateFileNames[i];               }
-                                       }
-       
-                                       int ableToOpen;
-                                       ifstream in;
-                                       ableToOpen = m->openInputFile(candidateFileNames[i], in, "noerror");
-                                       in.close();     
-                                       
-                                       //if you can't open it, try default location
-                                       if (ableToOpen == 1) {
-                                               if (m->getDefaultPath() != "") { //default path is set
-                                                       string tryPath = m->getDefaultPath() + m->getSimpleName(candidateFileNames[i]);
-                                                       m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
-                                                       ifstream in2;
-                                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                                       in2.close();
-                                                       candidateFileNames[i] = tryPath;
+                                       bool ignore = false;
+                                       if (candidateFileNames[i] == "current") { 
+                                               candidateFileNames[i] = m->getFastaFile(); 
+                                               if (candidateFileNames[i] != "") {  m->mothurOut("Using " + candidateFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       candidateFileNames.erase(candidateFileNames.begin()+i);
+                                                       i--;
                                                }
                                        }
                                        
-                                       //if you can't open it, try output location
-                                       if (ableToOpen == 1) {
-                                               if (m->getOutputDir() != "") { //default path is set
-                                                       string tryPath = m->getOutputDir() + m->getSimpleName(candidateFileNames[i]);
-                                                       m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
-                                                       ifstream in2;
-                                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                                       in2.close();
-                                                       candidateFileNames[i] = tryPath;
-                                               }
-                                       }
+                                       if (!ignore) {
                                        
-                                                                       
-
-                                       if (ableToOpen == 1) { 
-                                               m->mothurOut("Unable to open " + candidateFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
-                                               //erase from file list
-                                               candidateFileNames.erase(candidateFileNames.begin()+i);
-                                               i--;
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(candidateFileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       candidateFileNames[i] = inputDir + candidateFileNames[i];               }
+                                               }
+               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               ableToOpen = m->openInputFile(candidateFileNames[i], in, "noerror");
+                                               in.close();     
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(candidateFileNames[i]);
+                                                               m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               candidateFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               //if you can't open it, try output location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(candidateFileNames[i]);
+                                                               m->mothurOut("Unable to open " + candidateFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               candidateFileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                                                               
+
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + candidateFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       candidateFileNames.erase(candidateFileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setFastaFile(candidateFileNames[i]);
+                                               }
                                        }
-                                       
                                }
                                
                                //make sure there is at least one valid file left
@@ -242,6 +254,27 @@ AlignCommand::AlignCommand(string option)  {
                        temp = validParameter.validFile(parameters, "flip", false);                     if (temp == "not found"){       temp = "f";                             }
                        flip = m->isTrue(temp); 
                        
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templateFileName = validParameter.validFile(parameters, "reference", true);
+                       if (templateFileName == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templateFileName = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the align.seqs command."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templateFileName == "not open") { abort = true; }     
+                       else {  if (save) {     rdb->setSavedReference(templateFileName);       }       }
+                       
                        temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found"){       temp = "0.50";                  }
                        convert(temp, threshold); 
                        
@@ -403,7 +436,7 @@ int AlignCommand::execute(){
                                                //MPI_Info info;
                                                //MPI_File_delete(outAccnosFilename, info);
                                                hasAccnos = false;      
-                                               remove(accnosFileName.c_str()); 
+                                               m->mothurRemove(accnosFileName); 
                                        }
                                }
                                
@@ -423,10 +456,10 @@ int AlignCommand::execute(){
                        numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
        #endif
                        
-                       if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); outputTypes.clear();  return 0; }
+                       if (m->control_pressed) { m->mothurRemove(accnosFileName); m->mothurRemove(alignFileName); m->mothurRemove(reportFileName); outputTypes.clear();  return 0; }
                        
                        //delete accnos file if its blank else report to user
-                       if (m->isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
+                       if (m->isBlank(accnosFileName)) {  m->mothurRemove(accnosFileName);  hasAccnos = false; }
                        else { 
                                m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
                                if (!flip) {
@@ -816,24 +849,24 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                
                vector<string> nonBlankAccnosFiles;
                if (!(m->isBlank(accnosFName))) { nonBlankAccnosFiles.push_back(accnosFName); }
-               else { remove(accnosFName.c_str()); } //remove so other files can be renamed to it
+               else { m->mothurRemove(accnosFName); } //remove so other files can be renamed to it
                        
                for (int i = 0; i < processIDS.size(); i++) {
                        ifstream in;
                        string tempFile =  alignFileName + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
-                       in.close(); remove(tempFile.c_str());
+                       in.close(); m->mothurRemove(tempFile);
                        
                        appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
-                       remove((alignFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((alignFileName + toString(processIDS[i]) + ".temp"));
                        
                        appendReportFiles((reportFileName + toString(processIDS[i]) + ".temp"), reportFileName);
-                       remove((reportFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((reportFileName + toString(processIDS[i]) + ".temp"));
                        
                        if (!(m->isBlank(accnosFName + toString(processIDS[i]) + ".temp"))) {
                                nonBlankAccnosFiles.push_back(accnosFName + toString(processIDS[i]) + ".temp");
-                       }else { remove((accnosFName + toString(processIDS[i]) + ".temp").c_str());  }
+                       }else { m->mothurRemove((accnosFName + toString(processIDS[i]) + ".temp"));  }
                        
                }
                
@@ -843,7 +876,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                        
                        for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
                                appendAlignFiles(nonBlankAccnosFiles[h], accnosFName);
-                               remove(nonBlankAccnosFiles[h].c_str());
+                               m->mothurRemove(nonBlankAccnosFiles[h]);
                        }
                }else { //recreate the accnosfile if needed
                        ofstream out;