]> git.donarmstrong.com Git - mothur.git/commitdiff
v 19.3
authorwestcott <westcott>
Thu, 26 May 2011 14:20:36 +0000 (14:20 +0000)
committerwestcott <westcott>
Thu, 26 May 2011 14:20:36 +0000 (14:20 +0000)
blastdb.cpp
chimeraslayer.cpp
consensusseqscommand.cpp
consensusseqscommand.h
tree.cpp

index b995bd378a46d064ea36773e05bd9b0afe84fefa..48ae1686a35aae2f1a19e980c8b057c827c40021 100644 (file)
@@ -20,6 +20,7 @@ gapOpen(gO), gapExtend(gE), match(m), misMatch(mM) {
        count = 0;
 
        int randNumber = rand();
+       //int randNumber = 12345;
        dbFileName = tag + toString(randNumber) + ".template.unaligned.fasta";
        queryFileName = tag + toString(randNumber) + ".candidate.unaligned.fasta";
        blastFileName = tag + toString(randNumber) + ".blast";
@@ -32,6 +33,7 @@ BlastDB::BlastDB() : Database() {
                count = 0;
 
                int randNumber = rand();
+               //int randNumber = 12345;
                dbFileName = toString(randNumber) + ".template.unaligned.fasta";
                queryFileName = toString(randNumber) + ".candidate.unaligned.fasta";
                blastFileName = toString(randNumber) + ".blast";
@@ -89,6 +91,7 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
                        //wrap entire string in ""
                        blastCommand = "\"" + blastCommand + "\"";
                #endif
+               
                system(blastCommand.c_str());
                
                ifstream m8FileHandle;
@@ -129,6 +132,7 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
                
                ofstream queryFile;
                int randNumber = rand();
+               //int randNumber = 12345;
                m->openOutputFile((queryFileName+toString(randNumber)), queryFile);
                queryFile << '>' << seq->getName() << endl;
                queryFile << seq->getUnaligned() << endl;
@@ -143,13 +147,16 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
                        blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
                        blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber));
                #else
+               //blastCommand = path + "blast\\bin\\megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
+               //blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber));
+
                        blastCommand =  "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + "\"" + dbFileName + "\"" + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
                        blastCommand += (" -i " + (queryFileName+toString(randNumber)) + " -o " + blastFileName+toString(randNumber));
                        //wrap entire string in ""
                        blastCommand = "\"" + blastCommand + "\"";
 
                #endif
-               
+               //cout << blastCommand << endl;
                system(blastCommand.c_str());
 
                ifstream m8FileHandle;
@@ -219,10 +226,13 @@ void BlastDB::generateDB() {
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;       //      format the database, -o option gives us the ability
                #else
+                       //formatdbCommand = path + "blast\\bin\\formatdb -p F -o T -i " + dbFileName;   //      format the database, -o option gives us the ability
+
                        formatdbCommand = "\"" + path + "blast\\bin\\formatdb\" -p F -o T -i " + "\"" +  dbFileName + "\"";
                        //wrap entire string in ""
                        formatdbCommand = "\"" + formatdbCommand + "\"";
                #endif
+               //cout << formatdbCommand << endl;
                system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
                                                                                                                                        //      option tells formatdb that seqs are DNA, not prot
                //m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine(); cout.flush();
index 729ef1f854fa445aed5a84b5ca5b90a3b2255aaa..70208607a00373eac0e13cfa4eb639d9025be99d 100644 (file)
@@ -1117,18 +1117,20 @@ vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db
                //cout << qname << endl;        
                
                for (int i = 0; i < mergedResults.size(); i++) {
-                       //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
+                       //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
                        if (db[mergedResults[i]]->getName() != q->getName()) { 
                                Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
                                refResults.push_back(temp);
                                
                        }
                }
-               
+               //cout << endl << endl;
 
                delete queryRight;
                delete queryLeft;
                
+               if (refResults.size() == 0) { m->mothurOut("[WARNING]: mothur found 0 potential parents, so we are not able to check " + q->getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+               
                return refResults;
        }
        catch(exception& e) {
index e81627df1be2e26b2c40d6322416d96d9bdcf893..323dd45a5690cd047742b16b9bdb9f9c08972a5a 100644 (file)
@@ -18,6 +18,7 @@ vector<string> ConsensusSeqsCommand::setParameters(){
                CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
                CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plist);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+               CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "",false,false); parameters.push_back(pcutoff);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -35,11 +36,12 @@ string ConsensusSeqsCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The consensus.seqs command can be used in 2 ways: create a consensus sequence from a fastafile, or with a listfile create a consensus sequence for each otu. Sequences must be aligned.\n";
-               helpString += "The consensus.seqs command parameters are fasta, list, name and label.\n";
+               helpString += "The consensus.seqs command parameters are fasta, list, name, cutoff and label.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The list parameter allows you to enter a your list file. \n";
                helpString += "The name parameter allows you to enter a names file associated with the fasta file. \n";
                helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n";
+               helpString += "The cutoff parameter allows you set a percentage of sequences that support the base. For example: cutoff=97 would only return a sequence that only showed ambiguities for bases that were not supported by at least 97% of sequences.\n";
                helpString += "The consensus.seqs command should be in the following format: \n";
                helpString += "consensus.seqs(fasta=yourFastaFile, list=yourListFile) \n";      
                helpString += "Example: consensus.seqs(fasta=abrecovery.align, list=abrecovery.fn.list) \n";
@@ -154,6 +156,9 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                                else { allLines = 1;  }
                        }
                        
+                       string temp = validParameter.validFile(parameters, "cutoff", false);  if (temp == "not found") { temp = "100"; }
+                       convert(temp, cutoff); 
+                       
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
 
@@ -234,7 +239,7 @@ int ConsensusSeqsCommand::execute(){
                                }
                                
                                char conBase = '.';
-                               if (numDots != seqs.size()) { conBase = getBase(counts); }
+                               if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); }
                                
                                consSeq += conBase;
                                
@@ -477,7 +482,7 @@ string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, string
                        }
                        
                        char conBase = '.';
-                       if (numDots != seqs.size()) { conBase = getBase(counts); }
+                       if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); }
                        
                        consSeq += conBase;
                        
@@ -503,7 +508,7 @@ string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, string
 }
 //***************************************************************************************************************
 
-char ConsensusSeqsCommand::getBase(vector<int> counts){  //A,T,G,C,Gap
+char ConsensusSeqsCommand::getBase(vector<int> counts, int size){  //A,T,G,C,Gap
        try{
                /* A = adenine
                * C = cytosine
@@ -523,6 +528,15 @@ char ConsensusSeqsCommand::getBase(vector<int> counts){  //A,T,G,C,Gap
                
                char conBase = 'N';
                
+               //zero out counts that don't make the cutoff
+               float percentage = (100.0 - cutoff) / 100.0;
+               int zeroCutoff = percentage * size;
+               
+               for (int i = 0; i < counts.size(); i++) {
+                       if (counts[i] < zeroCutoff) { counts[i] = 0; }
+               }
+               
+               
                //any
                if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'n'; }
                //any no gap
@@ -585,6 +599,8 @@ char ConsensusSeqsCommand::getBase(vector<int> counts){  //A,T,G,C,Gap
                else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'C'; }  
                //only gap
                else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = '-'; }
+               //cutoff removed all counts
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  m->mothurOut("cutoff ...."); m->mothurOutEndLine(); }
                else{ m->mothurOut("[ERROR]: cannot find consensus base."); m->mothurOutEndLine(); }
                
                return conBase;
index 226b4c959bf25f9c0629a556755641809401baa4..fd2f4f535dfa3ca7b4be2c99b2e323a2d4d0b49c 100644 (file)
@@ -38,12 +38,13 @@ private:
        map<string, string> fastaMap;
        map<string, string> nameMap;
        map<string, string> nameFileMap;
+       int cutoff;
        
        int readFasta();
        int readNames();
        int processList(ListVector*&);
        string getConsSeq(string, ofstream&, string&, int);
-       char getBase(vector<int>);
+       char getBase(vector<int>, int);
 };
 
 #endif
index 53aa4025bef940b9d3fcb830e6686760338ee8ef..cd3d7a6789c9179066899ac62f0db115f14e295b 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -808,7 +808,7 @@ void Tree::print(ostream& out, string mode) {
 void Tree::createNewickFile(string f) {
        try {
                int root = findRoot();
-               //filename = m->getRootName(globaldata->getTreeFile()) + "newick";
+       
                filename = f;
 
                m->openOutputFile(filename, out);