X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifyseqscommand.cpp;h=e6eb560e6880ff500f47d8c629f1a0b0c04066b7;hb=c3f0a9c8f932b923f3a6fbbf143e8f4b85fd6f5f;hp=52248979080ce4a8b3676fb3fa96baa3e7ddd24a;hpb=92f998cc7debc4bf3e8594848586b8153d96db16;p=mothur.git diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 5224897..e6eb560 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -25,19 +25,45 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option){ else { //valid paramters for this command - string AlignArray[] = {"template","fasta","name","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff","probs","iters"}; + string AlignArray[] = {"template","fasta","name","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff","probs","iters", "outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); OptionParser parser(option); map parameters = parser.getParameters(); ValidParameters validParameter; + map::iterator it; //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("template"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["template"] = inputDir + it->second; } + } + + it = parameters.find("taxonomy"); + //user has given a template file + if(it != parameters.end()){ + path = hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["taxonomy"] = inputDir + it->second; } + } + } + //check for required parameters templateFileName = validParameter.validFile(parameters, "template", true); if (templateFileName == "not found") { @@ -54,6 +80,12 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option){ //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < fastaFileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(fastaFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; } + } + int ableToOpen; ifstream in; ableToOpen = openInputFile(fastaFileNames[i], in); @@ -81,12 +113,19 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option){ namefile = validParameter.validFile(parameters, "name", false); - if (fastaFileName == "not found") { namefile = ""; } + if (namefile == "not found") { namefile = ""; } + else { splitAtDash(namefile, namefileNames); //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < namefileNames.size(); i++) { + if (inputDir != "") { + string path = hasPath(namefileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { namefileNames[i] = inputDir + namefileNames[i]; } + } + int ableToOpen; ifstream in; ableToOpen = openInputFile(namefileNames[i], in); @@ -165,9 +204,10 @@ ClassifySeqsCommand::~ClassifySeqsCommand(){ void ClassifySeqsCommand::help(){ try { mothurOut("The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n"); - mothurOut("The classify.seqs command parameters are template, fasta, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n"); + mothurOut("The classify.seqs command parameters are template, fasta, name, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n"); mothurOut("The template, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"); - mothurOut("The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer and blast. The default is kmer.\n"); + mothurOut("The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer, blast and distance. The default is kmer.\n"); + mothurOut("The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n"); mothurOut("The method parameter allows you to specify classification method to use. Your options are: bayesian and knn. The default is bayesian.\n"); mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 8.\n"); mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n"); @@ -199,8 +239,8 @@ int ClassifySeqsCommand::execute(){ try { if (abort == true) { return 0; } - if(method == "bayesian") { classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters); } - else if(method == "knn") { classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted); } + if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters); } + else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted); } else { mothurOut(search + " is not a valid method option. I will run the command using bayesian."); mothurOutEndLine(); @@ -226,9 +266,11 @@ int ClassifySeqsCommand::execute(){ } mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); mothurOutEndLine(); - string newTaxonomyFile = getRootName(fastaFileNames[s]) + getRootName(taxonomyFileName) + "taxonomy"; - string tempTaxonomyFile = getRootName(fastaFileNames[s]) + "taxonomy.temp"; - string taxSummary = getRootName(fastaFileNames[s]) + getRootName(taxonomyFileName) + "tax.summary"; + + if (outputDir == "") { outputDir += hasPath(fastaFileNames[s]); } + string newTaxonomyFile = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + getRootName(getSimpleName(taxonomyFileName)) + "taxonomy"; + string tempTaxonomyFile = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "taxonomy.temp"; + string taxSummary = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + getRootName(getSimpleName(taxonomyFileName)) + "tax.summary"; int start = time(NULL); int numFastaSeqs = 0; @@ -297,7 +339,7 @@ int ClassifySeqsCommand::execute(){ #endif //make taxonomy tree from new taxonomy file PhyloTree taxaBrowser; - + ifstream in; openInputFile(tempTaxonomyFile, in); @@ -379,7 +421,7 @@ string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) { //keep what you have counting the levels while (tax.find_first_of(';') != -1) { //get taxon - taxon = tax.substr(0,tax.find_first_of(';')); + taxon = tax.substr(0,tax.find_first_of(';'))+';'; tax = tax.substr(tax.find_first_of(';')+1, tax.length()); newTax += taxon; level++; @@ -479,13 +521,14 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa if (candidateSeq->getName() != "") { taxonomy = classify->getTaxonomy(candidateSeq); - + if (taxonomy != "bad seq") { //output confidence scores or not if (probs) { outTax << candidateSeq->getName() << '\t' << taxonomy << endl; }else{ outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl; + cout << classify->getSimpleTax() << endl; } outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;