]> git.donarmstrong.com Git - mothur.git/blobdiff - classifyseqscommand.cpp
fixed problem with class.fy.seqs where group totals did not add up. added constructo...
[mothur.git] / classifyseqscommand.cpp
index 580dd9b6e056197bfc48cb39abcfb526237e1487..7ae2ee5041af97ff462909095aa2af0ec24e5b0d 100644 (file)
@@ -226,7 +226,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                                }
                                                
                                        #endif
-                                       if (ableToOpen == 1) {  m->mothurOut("Unable to match group file with fasta file."); m->mothurOutEndLine(); abort = true;       }
+                                       if (ableToOpen == 1) {  m->mothurOut("Unable to match group file with fasta file, not using " + groupfileNames[i] + "."); m->mothurOutEndLine(); groupfileNames[i] = "";        }
                                        
                                }
                        }
@@ -413,8 +413,6 @@ int ClassifySeqsCommand::execute(){
                                //delete inFileName;
 
                                if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
-
-                               if(namefile != "") {  MPIReadNamesFile(namefileNames[s]);  }
                                
                                if (pid == 0) { //you are the root process 
                                        
@@ -465,22 +463,7 @@ int ClassifySeqsCommand::execute(){
                                MPI_File_close(&outMPITempTax);
                                
 #else
-                       //read namefile
-                       if(namefile != "") {
-                               nameMap.clear(); //remove old names
-                               
-                               ifstream inNames;
-                               openInputFile(namefileNames[s], inNames);
-                               
-                               string firstCol, secondCol;
-                               while(!inNames.eof()) {
-                                       inNames >> firstCol >> secondCol; gobble(inNames);
-                                       nameMap[firstCol] = getNumNames(secondCol);  //ex. seq1 seq1,seq3,seq5 -> seq1 = 3.
-                               }
-                               inNames.close();
-                       }
-
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors == 1){
                                ifstream inFASTA;
                                openInputFile(fastaFileNames[s], inFASTA);
@@ -543,18 +526,48 @@ int ClassifySeqsCommand::execute(){
        #endif  
 #endif
 
+               m->mothurOutEndLine();
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
+               start = time(NULL);
+
+
                #ifdef USE_MPI  
                        if (pid == 0) {  //this part does not need to be paralellized
+                       
+                               if(namefile != "") { m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();  MPIReadNamesFile(namefileNames[s]);  m->mothurOut("  Done."); m->mothurOutEndLine(); }
+               #else
+                       //read namefile
+                       if(namefile != "") {
+                       
+                           m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
+                               
+                               nameMap.clear(); //remove old names
+                               
+                               ifstream inNames;
+                               openInputFile(namefileNames[s], inNames);
+                               
+                               string firstCol, secondCol;
+                               while(!inNames.eof()) {
+                                       inNames >> firstCol >> secondCol; gobble(inNames);
+                                       
+                                       vector<string> temp;
+                                       splitAtComma(secondCol, temp);
+                       
+                                       nameMap[firstCol] = temp;  
+                               }
+                               inNames.close();
+                               
+                               m->mothurOut("  Done."); m->mothurOutEndLine();
+                       }
                #endif
 
-                       m->mothurOutEndLine();
-                       m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
-                       start = time(NULL);
+                       string group = "";
+                       if (groupfile != "") {  group = groupfileNames[s]; }
                        
-                       PhyloSummary taxaSum(taxonomyFileName, groupfileNames[s]);
+                       PhyloSummary taxaSum(taxonomyFileName, group);
                        
                        if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } delete classify; return 0; }
-                       
+               
                        if (namefile == "") {  taxaSum.summarize(tempTaxonomyFile);  }
                        else {
                                ifstream in;
@@ -562,6 +575,7 @@ int ClassifySeqsCommand::execute(){
                                
                                //read in users taxonomy file and add sequences to tree
                                string name, taxon;
+                               
                                while(!in.eof()){
                                        in >> name >> taxon; gobble(in);
                                        
@@ -570,9 +584,11 @@ int ClassifySeqsCommand::execute(){
                                        if (itNames == nameMap.end()) { 
                                                m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
                                        }else{
-                                               for (int i = 0; i < itNames->second; i++) { 
-                                                       taxaSum.addSeqToTree(name, taxon);  //add it as many times as there are identical seqs
+                                               for (int i = 0; i < itNames->second.size(); i++) { 
+                                                       taxaSum.addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
                                                }
+                                               itNames->second.clear();
+                                               nameMap.erase(itNames->first);
                                        }
                                }
                                in.close();
@@ -883,7 +899,11 @@ int ClassifySeqsCommand::MPIReadNamesFile(string nameFilename){
                string firstCol, secondCol;
                while(!iss.eof()) {
                        iss >> firstCol >> secondCol; gobble(iss);
-                       nameMap[firstCol] = getNumNames(secondCol);  //ex. seq1 seq1,seq3,seq5 -> seq1 = 3.
+                       
+                       vector<string> temp;
+                       splitAtComma(secondCol, temp);
+                       
+                       nameMap[firstCol] = temp;  
                }
        
                MPI_File_close(&inMPI);