From: pschloss Date: Thu, 9 Jun 2011 15:21:36 +0000 (+0000) Subject: some of pat's small modifications X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=ef2b09798e381dbb9b9836e09c72f8af57825830 some of pat's small modifications --- diff --git a/bayesian.cpp b/bayesian.cpp index 9ae0777..f715787 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -134,13 +134,16 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { count[temp]++; //increment count of seq in this genus who have this word } - //probabilityInTemplate = (# of seqs with that word in template + 0.05) / (total number of seqs in template + 1); + //probabilityInTemplate = (# of seqs with that word in template + 0.50) / (total number of seqs in template + 1); float probabilityInTemplate = (seqsWithWordi.size() + 0.50) / (float) (names.size() + 1); int numNotZero = 0; for (int k = 0; k < genusNodes.size(); k++) { //probabilityInThisTaxonomy = (# of seqs with that word in this taxonomy + probabilityInTemplate) / (total number of seqs in this taxonomy + 1); + + wordGenusProb[i][k] = log((count[genusNodes[k]] + probabilityInTemplate) / (float) (genusTotals[k] + 1)); + if (count[genusNodes[k]] != 0) { #ifdef USE_MPI int pid; @@ -228,7 +231,7 @@ string Bayesian::getTaxonomy(Sequence* seq) { string queryKmerString = kmer.getKmerString(seq->getUnaligned()); vector queryKmers; - for (int i = 0; i < queryKmerString.length(); i++) { + for (int i = 0; i < queryKmerString.length()-1; i++) { // the -1 is to ignore any kmer with an N in it if (queryKmerString[i] != '!') { //this kmer is in the query queryKmers.push_back(i); } @@ -244,7 +247,7 @@ string Bayesian::getTaxonomy(Sequence* seq) { //bootstrap - to set confidenceScore int numToSelect = queryKmers.size() / 8; - tax = bootstrapResults(queryKmers, index, numToSelect); +// tax = bootstrapResults(queryKmers, index, numToSelect); return tax; } @@ -273,7 +276,7 @@ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { map::iterator itBoot; map::iterator itBoot2; map::iterator itConvert; - + for (int i = 0; i < iters; i++) { if (m->control_pressed) { return "control"; } @@ -344,20 +347,29 @@ int Bayesian::getMostProbableTaxonomy(vector queryKmer) { double maxProbability = -1000000.0; //find taxonomy with highest probability that this sequence is from it + + + cout << genusNodes.size() << endl; + + for (int k = 0; k < genusNodes.size(); k++) { //for each taxonomy calc its probability - double prob = 1.0; + + double prob = 0.0000; for (int i = 0; i < queryKmer.size(); i++) { prob += wordGenusProb[queryKmer[i]][k]; } + cout << phyloTree->get(genusNodes[k]).name << '\t' << prob << endl; + //is this the taxonomy with the greatest probability? if (prob > maxProbability) { indexofGenus = genusNodes[k]; maxProbability = prob; } } -// cout << phyloTree->get(indexofGenus).name << '\t' << maxProbability << endl; + + return indexofGenus; } catch(exception& e) { diff --git a/engine.cpp b/engine.cpp index 469ff40..3108f37 100644 --- a/engine.cpp +++ b/engine.cpp @@ -222,7 +222,7 @@ string Engine::getCommand() { if(nextCommand != NULL) { add_history(nextCommand); } else{ //^D causes null string and we want it to quit mothur - strcpy(nextCommand, "quit"); + nextCommand = strdup("quit"); mout->mothurOut(nextCommand); } diff --git a/shhhercommand.cpp b/shhhercommand.cpp index 459dca3..469bd4d 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -167,11 +167,12 @@ ShhherCommand::ShhherCommand(string option) { else{ ofstream temp; - compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta"; + //flow.files = 9 character offset + compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-9) + "shhh.fasta"; m->openOutputFile(compositeFASTAFileName, temp); temp.close(); - compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.names"; + compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-9) + "shhh.names"; m->openOutputFile(compositeNamesFileName, temp); temp.close(); } diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 6ee8655..bcf996b 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -18,8 +18,6 @@ vector TrimFlowsCommand::setParameters(){ CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop); CommandParameter pmaxflows("maxflows", "Number", "", "720", "", "", "",false,false); parameters.push_back(pmaxflows); CommandParameter pminflows("minflows", "Number", "", "360", "", "", "",false,false); parameters.push_back(pminflows); - CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength); - CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs); CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs); CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); @@ -172,13 +170,7 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { temp = validParameter.validFile(parameters, "noise", false); if (temp == "not found"){ temp = "0.70"; } convert(temp, noise); - - temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found"){ temp = "0"; } - convert(temp, minLength); - - temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found"){ temp = "0"; } - convert(temp, maxLength); - + temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; } convert(temp, bdiffs); @@ -190,9 +182,6 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { convert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } - temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found"){ temp = "T"; } - allFiles = m->isTrue(temp); - temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); convert(temp, processors); @@ -203,7 +192,8 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { m->mothurOut("The value of the order option must be four bases long\n"); } - if(oligoFileName == ""){ allFiles = 0; } + if(oligoFileName == "") { allFiles = 0; } + else { allFiles = 1; } numFPrimers = 0; numRPrimers = 0; @@ -380,14 +370,6 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN trashCode += 'l'; } - if(minLength > 0 || maxLength > 0){ //screen to see if sequence is above and below a specific number of bases - int seqLength = currSeq.getNumBases(); - if(seqLength < minLength || seqLength > maxLength){ - success = 0; - trashCode += 'l'; - } - } - int primerIndex = 0; int barcodeIndex = 0;