]> git.donarmstrong.com Git - mothur.git/commitdiff
<= change for anova and homova. finished chimera.uchime dereplicate=t issue.
authorSarahsWork <sarahswork@imac.westcotts.net>
Wed, 6 Mar 2013 13:54:53 +0000 (08:54 -0500)
committerSarahsWork <sarahswork@imac.westcotts.net>
Wed, 6 Mar 2013 13:54:53 +0000 (08:54 -0500)
amovacommand.cpp
chimerauchimecommand.cpp
counttable.cpp
homovacommand.cpp

index a78aafcbd92a344420c29598f6cac4456404a890..492a096f76cc1de12514731f66514e8d632c0c5f 100644 (file)
@@ -307,7 +307,7 @@ double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > gro
                for(int i=0;i<iters;i++){
                        map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
                        double ssWithinRand = calcSSWithin(randomizedGroup);
-                       if(ssWithinRand < ssWithinOrig){        counter++;      }
+                       if(ssWithinRand <= ssWithinOrig){       counter++;      }
                }
                
                double pValue = (double)counter / (double) iters;
index 3ed61707385f4859a4b4558bd35124fac1b2b417..cb155eb2e53c3b8ba47c4ed36aa495efaef4460a 100644 (file)
@@ -727,10 +727,8 @@ int ChimeraUchimeCommand::execute(){
                                
                                if(processors == 1)     {       totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
                     
-                    //read my own
-                    if (hasCount && !dups) {
-                        CountTable newCount; newCount.readTable(nameFile);
-                        
+                    if (hasCount && dups) {
+                        CountTable c; c.readTable(nameFile);
                         if (!m->isBlank(newCountFile)) {
                             ifstream in2;
                             m->openInputFile(newCountFile, in2);
@@ -738,12 +736,12 @@ int ChimeraUchimeCommand::execute(){
                             string name, group;
                             while (!in2.eof()) {
                                 in2 >> name >> group; m->gobble(in2);
-                                newCount.setAbund(name, group, 0);
+                                c.setAbund(name, group, 0);
                             }
                             in2.close();
                         }
                         m->mothurRemove(newCountFile);
-                        newCount.printTable(newCountFile);
+                        c.printTable(newCountFile);
                     }
 
                 }else                          {       totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]);                        }
@@ -757,25 +755,26 @@ int ChimeraUchimeCommand::execute(){
                     m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found.");     m->mothurOutEndLine();
                     m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); 
                                }else {
-                    /*if (hasCount) {  //removed empty seqs
-                        ofstream out2;
-                        m->openOutputFile(accnosFileName, out2);
-                        
+                    
+                    if (hasCount) {
+                        set<string> doNotRemove;
                         CountTable c; c.readTable(newCountFile);
-                        vector<string> nseqs = c.getNamesOfSeqs();
-                        vector<string> ngroups = c.getNamesOfGroups();
-                        for (int l = 0; l < nseqs.size(); l++) {
-                            if (c.getNumSeqs(nseqs[l]) == 0) {
-                                c.remove(nseqs[l]);
-                                out2 << nseqs[l] << endl;
-                            }
+                        vector<string> namesInTable = c.getNamesOfSeqs();
+                        for (int i = 0; i < namesInTable.size(); i++) {
+                            int temp = c.getNumSeqs(namesInTable[i]);
+                            if (temp == 0) {  c.remove(namesInTable[i]);  }
+                            else { doNotRemove.insert((namesInTable[i])); }
                         }
-                        for (int l = 0; l < ngroups.size(); l++) {
-                            if (c.getGroupCount(ngroups[l]) == 0) {  c.removeGroup(ngroups[l]); }
+                        //remove names we want to keep from accnos file.
+                        set<string> accnosNames = m->readAccnos(accnosFileName);
+                        ofstream out2;
+                        m->openOutputFile(accnosFileName, out2);
+                        for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
+                            if (doNotRemove.count(*it) == 0) {  out2 << (*it) << endl; }
                         }
                         out2.close();
                         c.printTable(newCountFile);
-                    }*/
+                    }
                 }
                 
                 if (hasCount) { delete cparser; }
@@ -1893,7 +1892,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
         
         
 #endif         
-               
+      
         //read my own
         if (hasCount && dups) {
             if (!m->isBlank(accnos + ".byCount")) {
@@ -1909,7 +1908,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
             }
             m->mothurRemove(accnos + ".byCount");
         }
-                               
+       
                //append output files
                for(int i=0;i<processIDS.size();i++){
                        m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
@@ -1941,8 +1940,8 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                }
         
         //print new *.pick.count_table
-        if (hasCount && dups) { newCount.printTable(newCountFile); }
-               
+        if (hasCount && dups) {  newCount.printTable(newCountFile);   }
+               
                return num;     
                
        }
index ad0b2dadfcba7eb2f866b44d3d9b3011242b096a..c16bf9227a51580cac00de78a215b89e2a93e001 100644 (file)
@@ -283,7 +283,23 @@ int CountTable::printTable(string file) {
         for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
         out << endl;
         
-        for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
+        map<int, string> reverse; //use this to preserve order
+        for (map<string, int>::iterator it = indexNameMap.begin(); it !=indexNameMap.end(); it++) { reverse[it->second] = it->first;  }
+        
+        for (int i = 0; i < totals.size(); i++) {
+            map<int, string>::iterator itR = reverse.find(i);
+            
+            if (itR != reverse.end()) { //will equal end if seqs were removed because remove just removes from indexNameMap
+                out << itR->second << '\t' << totals[i] << '\t';
+                if (hasGroups) {
+                    for (int j = 0; j < groups.size(); j++) {
+                        out << counts[i][j] << '\t';
+                    }
+                }
+                out << endl;
+            }
+        }
+        /*for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
             out << itNames->first << '\t' << totals[itNames->second] << '\t';
             if (hasGroups) {
                 
@@ -292,7 +308,7 @@ int CountTable::printTable(string file) {
                 }
             }
             out << endl;
-        }
+        }*/
         out.close();
         return 0;
     }
@@ -364,7 +380,7 @@ int CountTable::getGroupCount(string groupName) {
         if (hasGroups) {
             map<string, int>::iterator it = indexGroupMap.find(groupName);
             if (it == indexGroupMap.end()) {
-                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
             }else { 
                 return totalGroups[it->second];
             }
@@ -384,11 +400,11 @@ int CountTable::getGroupCount(string seqName, string groupName) {
         if (hasGroups) {
             map<string, int>::iterator it = indexGroupMap.find(groupName);
             if (it == indexGroupMap.end()) {
-                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                m->mothurOut("[ERROR]: group " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
             }else { 
                 map<string, int>::iterator it2 = indexNameMap.find(seqName);
                 if (it2 == indexNameMap.end()) {
-                    m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                    m->mothurOut("[ERROR]: seq " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
                 }else { 
                     return counts[it2->second][it->second];
                 }
index 49b8b64a0ebea8d5c6dae20118a19e0b814806b1..488b68e0b6b69a8eb8e351d39be9ff09193fb36f 100644 (file)
@@ -308,7 +308,7 @@ double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> >
                        vector<double> ssWithinRandVector;
                        map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
                        double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
-                       if(bValueRand > bValueOrig){    counter++;      }
+                       if(bValueRand >= bValueOrig){   counter++;      }
                }
                
                double pValue = (double) counter / (double) iters;