]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerauchimecommand.cpp
fixes while testing 1.33.0
[mothur.git] / chimerauchimecommand.cpp
index 3ed61707385f4859a4b4558bd35124fac1b2b417..7a48cf70d092f7d05c8b3bc3656bd66bf17c02d8 100644 (file)
@@ -564,14 +564,12 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
             
             
                        temp = validParameter.validFile(parameters, "dereplicate", false);      
-                       if (temp == "not found") { 
-                               if (groupfile != "")    {  temp = "false";                                      }
-                               else                    {  temp = "true";       }
-                       }
+                       if (temp == "not found") { temp = "false";                      }
                        dups = m->isTrue(temp);
 
                        
                        if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+            if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
                        if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
                        
                        //look for uchime exe
@@ -677,7 +675,7 @@ int ChimeraUchimeCommand::execute(){
                 int error;
                 if (hasCount) {
                     CountTable ct;
-                    ct.readTable(nameFile);
+                    ct.readTable(nameFile, true, false);
                     for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
                         int num = ct.getNumSeqs(it->first);
                         if (num == 0) { error = 1; }
@@ -727,10 +725,8 @@ int ChimeraUchimeCommand::execute(){
                                
                                if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
                     
-                    //read my own
-                    if (hasCount && !dups) {
-                        CountTable newCount; newCount.readTable(nameFile);
-                        
+                    if (hasCount && dups) {
+                        CountTable c; c.readTable(nameFile, true, false);
                         if (!m->isBlank(newCountFile)) {
                             ifstream in2;
                             m->openInputFile(newCountFile, in2);
@@ -738,12 +734,12 @@ int ChimeraUchimeCommand::execute(){
                             string name, group;
                             while (!in2.eof()) {
                                 in2 >> name >> group; m->gobble(in2);
-                                newCount.setAbund(name, group, 0);
+                                c.setAbund(name, group, 0);
                             }
                             in2.close();
                         }
                         m->mothurRemove(newCountFile);
-                        newCount.printTable(newCountFile);
+                        c.printTable(newCountFile);
                     }
 
                 }else                          {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
@@ -757,25 +753,27 @@ int ChimeraUchimeCommand::execute(){
                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");     m->mothurOutEndLine();
                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
                                }else {
-                    /*if (hasCount) {  //removed empty seqs
+                    
+                    if (hasCount) {
+                        set<string> doNotRemove;
+                        CountTable c; c.readTable(newCountFile, true, true);
+                        vector<string> namesInTable = c.getNamesOfSeqs();
+                        for (int i = 0; i < namesInTable.size(); i++) {
+                            int temp = c.getNumSeqs(namesInTable[i]);
+                            if (temp == 0) {  c.remove(namesInTable[i]);  }
+                            else { doNotRemove.insert((namesInTable[i])); }
+                        }
+                        //remove names we want to keep from accnos file.
+                        set<string> accnosNames = m->readAccnos(accnosFileName);
                         ofstream out2;
                         m->openOutputFile(accnosFileName, out2);
-                        
-                        CountTable c; c.readTable(newCountFile);
-                        vector<string> nseqs = c.getNamesOfSeqs();
-                        vector<string> ngroups = c.getNamesOfGroups();
-                        for (int l = 0; l < nseqs.size(); l++) {
-                            if (c.getNumSeqs(nseqs[l]) == 0) {
-                                c.remove(nseqs[l]);
-                                out2 << nseqs[l] << endl;
-                            }
-                        }
-                        for (int l = 0; l < ngroups.size(); l++) {
-                            if (c.getGroupCount(ngroups[l]) == 0) {  c.removeGroup(ngroups[l]); }
+                        for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
+                            if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
                         }
                         out2.close();
                         c.printTable(newCountFile);
-                    }*/
+                        outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
+                    }
                 }
                 
                 if (hasCount) { delete cparser; }
@@ -820,6 +818,11 @@ int ChimeraUchimeCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
                }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -840,10 +843,6 @@ int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, s
                map<string, string>::iterator itUnique;
                int total = 0;
                
-               //edit accnos file
-               ifstream in2; 
-               m->openInputFile(accnosFileName, in2);
-               
                ofstream out2;
                m->openOutputFile(accnosFileName+".temp", out2);
                
@@ -853,27 +852,32 @@ int ChimeraUchimeCommand::deconvoluteResults(map<string, string>& uniqueNames, s
                set<string> chimerasInFile;
                set<string>::iterator itChimeras;
 
-               
-               while (!in2.eof()) {
-                       if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
-                       
-                       in2 >> name; m->gobble(in2);
-                       
-                       //find unique name
-                       itUnique = uniqueNames.find(name);
-                       
-                       if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
-                       else {
-                               itChimeras = chimerasInFile.find((itUnique->second));
-                               
-                               if (itChimeras == chimerasInFile.end()) {
-                                       out2 << itUnique->second << endl;
-                                       chimerasInFile.insert((itUnique->second));
-                                       total++;
-                               }
-                       }
-               }
-               in2.close();
+        if (!m->isBlank(accnosFileName)) {
+            //edit accnos file
+            ifstream in2;
+            m->openInputFile(accnosFileName, in2);
+            
+            while (!in2.eof()) {
+                if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
+                
+                in2 >> name; m->gobble(in2);
+                
+                //find unique name
+                itUnique = uniqueNames.find(name);
+                
+                if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                else {
+                    itChimeras = chimerasInFile.find((itUnique->second));
+                    
+                    if (itChimeras == chimerasInFile.end()) {
+                        out2 << itUnique->second << endl;
+                        chimerasInFile.insert((itUnique->second));
+                        total++;
+                    }
+                }
+            }
+            in2.close();
+        }
                out2.close();
                
                m->mothurRemove(accnosFileName);
@@ -1182,6 +1186,7 @@ int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, stri
                int totalSeqs = 0;
                int numChimeras = 0;
         
+        
         ofstream outCountList;
         if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
         
@@ -1791,21 +1796,23 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                int num = 0;
         
         CountTable newCount;
-        if (hasCount && dups) { newCount.readTable(nameFile); }
+        if (hasCount && dups) { newCount.readTable(nameFile, true, false); }
                
                //sanity check
                if (groups.size() < processors) { processors = groups.size(); }
                
                //divide the groups between the processors
                vector<linePair> lines;
-               int numGroupsPerProcessor = groups.size() / processors;
-               for (int i = 0; i < processors; i++) {
-                       int startIndex =  i * numGroupsPerProcessor;
-                       int endIndex = (i+1) * numGroupsPerProcessor;
-                       if(i == (processors - 1)){      endIndex = groups.size();       }
-                       lines.push_back(linePair(startIndex, endIndex));
-               }
-               
+        int remainingPairs = groups.size();
+        int startIndex = 0;
+        for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
+            int numPairs = remainingPairs; //case for last processor
+            if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
+            lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex
+            startIndex = startIndex + numPairs;
+            remainingPairs = remainingPairs - numPairs;
+        }
+
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                                
                //loop through and create all the processes you want
@@ -1893,7 +1900,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
         
         
 #endif         
-               
+      
         //read my own
         if (hasCount && dups) {
             if (!m->isBlank(accnos + ".byCount")) {
@@ -1909,7 +1916,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
             }
             m->mothurRemove(accnos + ".byCount");
         }
-                               
+       
                //append output files
                for(int i=0;i<processIDS.size();i++){
                        m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
@@ -1941,8 +1948,8 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                }
         
         //print new *.pick.count_table
-        if (hasCount && dups) { newCount.printTable(newCountFile); }
-               
+        if (hasCount && dups) {  newCount.printTable(newCountFile);   }
+               
                return num;     
                
        }