X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifyseqscommand.cpp;h=a95f436dd7ffdf4403d316be77a1f8ab580f21f5;hb=9013e13ecfb2fda3c2664a76f76cc99b8c7fa74c;hp=eee316093d81b9385cac7ff428ed2671cf3aee1e;hpb=725a3d4ff2442c79bfde0a75ed3e0904edcf03b7;p=mothur.git diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index eee3160..a95f436 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -32,7 +32,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { OptionParser parser(option); map parameters = parser.getParameters(); - ValidParameters validParameter; + ValidParameters validParameter("classify.seqs"); map::iterator it; //check to make sure all parameters are valid for command @@ -107,7 +107,17 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { #endif ifstream in; - ableToOpen = openInputFile(fastaFileNames[i], in); + ableToOpen = openInputFile(fastaFileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + getSimpleName(fastaFileNames[i]); + m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = openInputFile(tryPath, in, "noerror"); + fastaFileNames[i] = tryPath; + } + } in.close(); #ifdef USE_MPI @@ -122,7 +132,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { #endif if (ableToOpen == 1) { - m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); + m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); //erase from file list fastaFileNames.erase(fastaFileNames.begin()+i); i--; @@ -168,7 +178,17 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { #endif ifstream in; - ableToOpen = openInputFile(namefileNames[i], in); + ableToOpen = openInputFile(namefileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + getSimpleName(namefileNames[i]); + m->mothurOut("Unable to open " + namefileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = openInputFile(tryPath, in, "noerror"); + namefileNames[i] = tryPath; + } + } in.close(); #ifdef USE_MPI @@ -181,8 +201,14 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { } #endif - if (ableToOpen == 1) { m->mothurOut("Unable to match name file with fasta file."); m->mothurOutEndLine(); abort = true; } + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + namefileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); abort = true; + //erase from file list + namefileNames.erase(namefileNames.begin()+i); + i--; + } + } } @@ -213,7 +239,17 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { #endif ifstream in; - ableToOpen = openInputFile(groupfileNames[i], in); + ableToOpen = openInputFile(groupfileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + getSimpleName(groupfileNames[i]); + m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ableToOpen = openInputFile(tryPath, in, "noerror"); + groupfileNames[i] = tryPath; + } + } in.close(); #ifdef USE_MPI @@ -226,8 +262,13 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { } #endif - if (ableToOpen == 1) { m->mothurOut("Unable to match group file with fasta file, not using " + groupfileNames[i] + "."); m->mothurOutEndLine(); groupfileNames[i] = ""; } + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + groupfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); groupfileNames[i] = ""; + //erase from file list + groupfileNames.erase(groupfileNames.begin()+i); + i--; + } } } @@ -320,7 +361,7 @@ void ClassifySeqsCommand::help(){ m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment. The default is -1.0.\n"); m->mothurOut("The numwanted parameter allows you to specify the number of sequence matches you want with the knn method. The default is 10.\n"); m->mothurOut("The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy. The default is 0.\n"); - m->mothurOut("The probs parameter shut off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be run.\n"); + m->mothurOut("The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n"); m->mothurOut("The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method. The default is 100.\n"); m->mothurOut("The classify.seqs command should be in the following format: \n"); m->mothurOut("classify.seqs(template=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n"); @@ -358,10 +399,15 @@ int ClassifySeqsCommand::execute(){ m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); + string RippedTaxName = getRootName(getSimpleName(taxonomyFileName)); + RippedTaxName = getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1)); + if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); } + RippedTaxName += "."; + if (outputDir == "") { outputDir += hasPath(fastaFileNames[s]); } - string newTaxonomyFile = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + getRootName(getSimpleName(taxonomyFileName)) + "taxonomy"; + string newTaxonomyFile = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy"; string tempTaxonomyFile = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "taxonomy.temp"; - string taxSummary = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + getRootName(getSimpleName(taxonomyFileName)) + "tax.summary"; + string taxSummary = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary"; outputNames.push_back(newTaxonomyFile); outputNames.push_back(taxSummary); @@ -373,7 +419,7 @@ int ClassifySeqsCommand::execute(){ #ifdef USE_MPI int pid, end, numSeqsPerProcessor; int tag = 2001; - vector MPIPos; + vector MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are @@ -470,7 +516,7 @@ int ClassifySeqsCommand::execute(){ if(processors == 1){ ifstream inFASTA; openInputFile(fastaFileNames[s], inFASTA); - numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + getNumSeqs(inFASTA, numFastaSeqs); inFASTA.close(); lines.push_back(new linePair(0, numFastaSeqs)); @@ -478,7 +524,7 @@ int ClassifySeqsCommand::execute(){ driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); } else{ - vector positions; + vector positions; processIDS.resize(0); ifstream inFASTA; @@ -488,7 +534,7 @@ int ClassifySeqsCommand::execute(){ while(!inFASTA.eof()){ input = getline(inFASTA); if (input.length() != 0) { - if(input[0] == '>'){ int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } } } inFASTA.close(); @@ -496,9 +542,9 @@ int ClassifySeqsCommand::execute(){ numFastaSeqs = positions.size(); int numSeqsPerProcessor = numFastaSeqs / processors; - + for (int i = 0; i < processors; i++) { - int startPos = positions[ i * numSeqsPerProcessor ]; + unsigned long int startPos = positions[ i * numSeqsPerProcessor ]; if(i == processors - 1){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } @@ -520,7 +566,7 @@ int ClassifySeqsCommand::execute(){ #else ifstream inFASTA; openInputFile(fastaFileNames[s], inFASTA); - numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + getNumSeqs(inFASTA, numFastaSeqs); inFASTA.close(); lines.push_back(new linePair(0, numFastaSeqs)); @@ -635,7 +681,7 @@ int ClassifySeqsCommand::execute(){ rename(unclass.c_str(), newTaxonomyFile.c_str()); m->mothurOutEndLine(); - m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); #ifdef USE_MPI } @@ -761,9 +807,9 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa for(int i=0;inumSeqs;i++){ if (m->control_pressed) { return 0; } - - Sequence* candidateSeq = new Sequence(inFASTA); - + + Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA); + if (candidateSeq->getName() != "") { taxonomy = classify->getTaxonomy(candidateSeq); @@ -800,7 +846,7 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa } //********************************************************************************************************************** #ifdef USE_MPI -int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector& MPIPos){ +int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector& MPIPos){ try { MPI_Status statusNew; MPI_Status statusTemp;