]> git.donarmstrong.com Git - mothur.git/blobdiff - classifyseqscommand.cpp
fixes while testing 1.12.0
[mothur.git] / classifyseqscommand.cpp
index 85883282ed841c4f22bba3dda6e39e83fe5287c7..a20a383b6125e135fc1aefeec47e2bc6b892f1e9 100644 (file)
@@ -107,7 +107,17 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                        #endif
                                        
                                        ifstream in;
-                                       ableToOpen = openInputFile(fastaFileNames[i], in);
+                                       ableToOpen = openInputFile(fastaFileNames[i], in, "noerror");
+                               
+                                       //if you can't open it, try default location
+                                       if (ableToOpen == 1) {
+                                               if (m->getDefaultPath() != "") { //default path is set
+                                                       string tryPath = m->getDefaultPath() + getSimpleName(fastaFileNames[i]);
+                                                       m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                       ableToOpen = openInputFile(tryPath, in, "noerror");
+                                                       fastaFileNames[i] = tryPath;
+                                               }
+                                       }
                                        in.close();
                                        
                                        #ifdef USE_MPI  
@@ -122,7 +132,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                        #endif
                                        
                                        if (ableToOpen == 1) { 
-                                               m->mothurOut(fastaFileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
+                                               m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
                                                //erase from file list
                                                fastaFileNames.erase(fastaFileNames.begin()+i);
                                                i--;
@@ -168,7 +178,17 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                        #endif
 
                                        ifstream in;
-                                       ableToOpen = openInputFile(namefileNames[i], in);
+                                       ableToOpen = openInputFile(namefileNames[i], in, "noerror");
+                               
+                                       //if you can't open it, try default location
+                                       if (ableToOpen == 1) {
+                                               if (m->getDefaultPath() != "") { //default path is set
+                                                       string tryPath = m->getDefaultPath() + getSimpleName(namefileNames[i]);
+                                                       m->mothurOut("Unable to open " + namefileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                       ableToOpen = openInputFile(tryPath, in, "noerror");
+                                                       namefileNames[i] = tryPath;
+                                               }
+                                       }
                                        in.close();
                                        
                                        #ifdef USE_MPI  
@@ -181,8 +201,14 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                                }
                                                
                                        #endif
-                                       if (ableToOpen == 1) {  m->mothurOut("Unable to match name file with fasta file."); m->mothurOutEndLine(); abort = true;        }
                                        
+                                       if (ableToOpen == 1) { 
+                                               m->mothurOut("Unable to open " + namefileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();  abort = true;
+                                               //erase from file list
+                                               namefileNames.erase(namefileNames.begin()+i);
+                                               i--;
+                                       }
+
                                }
                        }
 
@@ -213,7 +239,17 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                        #endif
 
                                        ifstream in;
-                                       ableToOpen = openInputFile(groupfileNames[i], in);
+                                       ableToOpen = openInputFile(groupfileNames[i], in, "noerror");
+                               
+                                       //if you can't open it, try default location
+                                       if (ableToOpen == 1) {
+                                               if (m->getDefaultPath() != "") { //default path is set
+                                                       string tryPath = m->getDefaultPath() + getSimpleName(groupfileNames[i]);
+                                                       m->mothurOut("Unable to open " + groupfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                       ableToOpen = openInputFile(tryPath, in, "noerror");
+                                                       groupfileNames[i] = tryPath;
+                                               }
+                                       }
                                        in.close();
                                        
                                        #ifdef USE_MPI  
@@ -226,8 +262,13 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                                }
                                                
                                        #endif
-                                       if (ableToOpen == 1) {  m->mothurOut("Unable to match group file with fasta file, not using " + groupfileNames[i] + "."); m->mothurOutEndLine(); groupfileNames[i] = "";        }
                                        
+                                       if (ableToOpen == 1) { 
+                                               m->mothurOut("Unable to open " + groupfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); groupfileNames[i] = "";
+                                               //erase from file list
+                                               groupfileNames.erase(groupfileNames.begin()+i);
+                                               i--;
+                                       }
                                }
                        }
 
@@ -320,7 +361,7 @@ void ClassifySeqsCommand::help(){
                m->mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n");
                m->mothurOut("The numwanted parameter allows you to specify the number of sequence matches you want with the knn method.  The default is 10.\n");
                m->mothurOut("The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy.  The default is 0.\n");
-               m->mothurOut("The probs parameter shut off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be run.\n");
+               m->mothurOut("The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n");
                m->mothurOut("The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method.  The default is 100.\n");
                m->mothurOut("The classify.seqs command should be in the following format: \n");
                m->mothurOut("classify.seqs(template=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n");
@@ -419,8 +460,10 @@ int ClassifySeqsCommand::execute(){
                                        MPIPos = setFilePosFasta(fastaFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
                                        
                                        //send file positions to all processes
-                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos   
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       }
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = numFastaSeqs / processors;
@@ -438,9 +481,9 @@ int ClassifySeqsCommand::execute(){
                                                MPI_Recv(&done, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
                                        }
                                }else{ //you are a child process
-                                       MPI_Bcast(&numFastaSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                                       MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
                                        MPIPos.resize(numFastaSeqs+1);
-                                       MPI_Bcast(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                                       MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                        
                                        //figure out how many sequences you have to align
                                        numSeqsPerProcessor = numFastaSeqs / processors;
@@ -461,13 +504,14 @@ int ClassifySeqsCommand::execute(){
                                MPI_File_close(&inMPI);
                                MPI_File_close(&outMPINewTax);
                                MPI_File_close(&outMPITempTax);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors == 1){
                                ifstream inFASTA;
                                openInputFile(fastaFileNames[s], inFASTA);
-                               numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               getNumSeqs(inFASTA, numFastaSeqs);
                                inFASTA.close();
                                
                                lines.push_back(new linePair(0, numFastaSeqs));
@@ -475,7 +519,7 @@ int ClassifySeqsCommand::execute(){
                                driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
                        }
                        else{
-                               vector<int> positions;
+                               vector<unsigned long int> positions;
                                processIDS.resize(0);
                                
                                ifstream inFASTA;
@@ -485,7 +529,7 @@ int ClassifySeqsCommand::execute(){
                                while(!inFASTA.eof()){
                                        input = getline(inFASTA);
                                        if (input.length() != 0) {
-                                               if(input[0] == '>'){    int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);       }
+                                               if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                        }
                                }
                                inFASTA.close();
@@ -493,9 +537,9 @@ int ClassifySeqsCommand::execute(){
                                numFastaSeqs = positions.size();
                                
                                int numSeqsPerProcessor = numFastaSeqs / processors;
-                               
+       
                                for (int i = 0; i < processors; i++) {
-                                       int startPos = positions[ i * numSeqsPerProcessor ];
+                                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                                        if(i == processors - 1){
                                                numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
                                        }
@@ -517,7 +561,7 @@ int ClassifySeqsCommand::execute(){
        #else
                        ifstream inFASTA;
                        openInputFile(fastaFileNames[s], inFASTA);
-                       numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       getNumSeqs(inFASTA, numFastaSeqs);
                        inFASTA.close();
                        
                        lines.push_back(new linePair(0, numFastaSeqs));
@@ -526,6 +570,11 @@ int ClassifySeqsCommand::execute(){
        #endif  
 #endif
 
+               m->mothurOutEndLine();
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
+               start = time(NULL);
+
+
                #ifdef USE_MPI  
                        if (pid == 0) {  //this part does not need to be paralellized
                        
@@ -556,10 +605,6 @@ int ClassifySeqsCommand::execute(){
                        }
                #endif
 
-                       m->mothurOutEndLine();
-                       m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
-                       start = time(NULL);
-                       
                        string group = "";
                        if (groupfile != "") {  group = groupfileNames[s]; }
                        
@@ -631,7 +676,7 @@ int ClassifySeqsCommand::execute(){
                        rename(unclass.c_str(), newTaxonomyFile.c_str());
                        
                        m->mothurOutEndLine();
-                       m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for  " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
                        
                        #ifdef USE_MPI  
                                }
@@ -757,9 +802,9 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
 
                for(int i=0;i<line->numSeqs;i++){
                        if (m->control_pressed) { return 0; }
-                       
-                       Sequence* candidateSeq = new Sequence(inFASTA);
-                       
+               
+                       Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);
+               
                        if (candidateSeq->getName() != "") {
                                taxonomy = classify->getTaxonomy(candidateSeq);