]> git.donarmstrong.com Git - mothur.git/commitdiff
*** empty log message ***
authorpschloss <pschloss>
Thu, 3 Feb 2011 12:29:09 +0000 (12:29 +0000)
committerpschloss <pschloss>
Thu, 3 Feb 2011 12:29:09 +0000 (12:29 +0000)
Mothur.xcodeproj/project.pbxproj
corraxescommand.cpp
distancedb.cpp
eachgapdist.h
getseqscommand.cpp
seqerrorcommand.cpp
seqerrorcommand.h

index 2f3cc5520b1b3454081b9db51fe4d75662070c18..112cf3c7fdc7305c62bce17667f81232492860f6 100644 (file)
@@ -7,6 +7,7 @@
        objects = {
 
 /* Begin PBXBuildFile section */
+               7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; };
                8DD76FB00486AB0100D96B5E /* mothur.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* mothur.1 */; };
                A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; };
                A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; };
 /* End PBXCopyFilesBuildPhase section */
 
 /* Begin PBXFileReference section */
+               7E6BE10812F710D8007ADDBE /* refchimeratest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = refchimeratest.h; sourceTree = "<group>"; };
+               7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = refchimeratest.cpp; sourceTree = "<group>"; };
                8DD76FB20486AB0100D96B5E /* Mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Mothur; sourceTree = BUILT_PRODUCTS_DIR; };
                A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
                A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = "<group>"; };
                A7E9BA3812D3956100DA6239 /* commands */ = {
                        isa = PBXGroup;
                        children = (
+                               7E6BE10812F710D8007ADDBE /* refchimeratest.h */,
+                               7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */,
                                A7E9B6AE12D37EC400DA6239 /* command.hpp */,
                                A7E9B65112D37EC300DA6239 /* aligncommand.cpp */,
                                A7E9B65212D37EC300DA6239 /* aligncommand.h */,
                        attributes = {
                                ORGANIZATIONNAME = "Schloss Lab";
                        };
-                       buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */;
+                       buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */;
                        compatibilityVersion = "Xcode 3.1";
                        developmentRegion = English;
                        hasScannedForEncodings = 1;
                                A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */,
                                A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */,
                                A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */,
+                               7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
                                DEPLOYMENT_LOCATION = NO;
                                GCC_C_LANGUAGE_STANDARD = gnu99;
-                               GCC_OPTIMIZATION_LEVEL = 0;
+                               GCC_OPTIMIZATION_LEVEL = 3;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
                                        "VERSION=\"\\\"1.16.0\\\"\"",
                                GCC_WARN_ABOUT_RETURN_TYPE = YES;
                                GCC_WARN_UNUSED_VARIABLE = YES;
                                INSTALL_PATH = "";
+                               MACH_O_TYPE = mh_execute;
                                ONLY_ACTIVE_ARCH = YES;
                                OTHER_CPLUSPLUSFLAGS = (
                                        "-DUSE_READLINE",
                                ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
                                DEPLOYMENT_LOCATION = NO;
                                GCC_C_LANGUAGE_STANDARD = gnu99;
+                               GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
+                               GCC_MODEL_TUNING = "";
+                               GCC_OPTIMIZATION_LEVEL = 3;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
                                        "VERSION=\"\\\"1.15.0\\\"\"",
                                GCC_WARN_UNUSED_VALUE = YES;
                                GCC_WARN_UNUSED_VARIABLE = YES;
                                INSTALL_PATH = "";
+                               MACH_O_TYPE = mh_execute;
                                OTHER_CPLUSPLUSFLAGS = (
                                        "-DUSE_READLINE",
                                        "-DBIT_VERSION",
                        defaultConfigurationIsVisible = 0;
                        defaultConfigurationName = Release;
                };
-               1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */ = {
+               1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */ = {
                        isa = XCConfigurationList;
                        buildConfigurations = (
                                1DEB928A08733DD80010E9CD /* Debug */,
index 8ebd22c50144bf60ee5c7ff0a6f608745707a045..db2e9891738cdf57b78dffe1a9c232f18e5de31c 100644 (file)
@@ -283,11 +283,11 @@ int CorrAxesCommand::execute(){
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
                //output headings
-               if (metadatafile == "") {  out << "OTU\t";      }
-               else {  out << "Feature\t";                                             }
+               if (metadatafile == "") {  out << "OTU";        }
+               else {  out << "Feature";                                               }
 
-               for (int i = 0; i < numaxes; i++) { out << "axis" << (i+1) << '\t'; }
-               out << endl;
+               for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1); }
+               out << "\tlength" << endl;
                
                if (method == "pearson")                {  calcPearson(axes, out);      }
                else if (method == "spearman")  {  calcSpearman(axes, out); }
@@ -329,9 +329,9 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
           //for each otu
           for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                   
-                  if (metadatafile == "") {  out << i+1 << '\t';       }
-                  else {  out << metadataLabels[i] << '\t';            }
-                  
+                  if (metadatafile == "") {  out << i+1;       }
+                  else {  out << metadataLabels[i];            }
+                                  
                   //find the averages this otu - Y
                   float sumOtu = 0.0;
                   for (int j = 0; j < lookupFloat.size(); j++) {
@@ -339,6 +339,8 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
                   }
                   float Ybar = sumOtu / (float) lookupFloat.size();
                   
+                  vector<float> rValues(averageAxes.size());
+
                   //find r value for each axis
                   for (int k = 0; k < averageAxes.size(); k++) {
                           
@@ -358,11 +360,15 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
                           double denom = (sqrt(denomTerm1) * sqrt(denomTerm2));
                           
                           r = numerator / denom;
-                          
-                          out << r << '\t'
+                          rValues[k] = r;
+                          out << '\t' << r
                   }
                   
-                  out << endl;
+                  double sum = 0;
+                  for(int k=0;k<rValues.size();k++){
+                          sum += rValues[k] * rValues[k];
+                  }
+                  out << '\t' << sqrt(sum) << endl;
           }
                   
           return 0;
@@ -422,8 +428,8 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                        
-                       if (metadatafile == "") {  out << i+1 << '\t';  }
-                       else {  out << metadataLabels[i] << '\t';               }
+                       if (metadatafile == "") {  out << i+1;  }
+                       else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
                        vector<spearmanRank> otuScores;
@@ -458,6 +464,7 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                }
                        }
                        
+                       vector<double> pValues(numaxes);
                        //calc spearman ranks for each axis for this otu
                        for (int j = 0; j < numaxes; j++) {
                                
@@ -473,11 +480,16 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                                int n = lookupFloat.size();
                                double p = 1.0 - ((6 * di) / (float) (n * ((n*n) - 1)));
                                
-                               out << p << '\t';
+                               out  << '\t' << p;
+                               pValues[j] = p;
+
                        }
-                       
-                       
-                       out << endl;
+
+                       double sum = 0;
+                       for(int k=0;k<numaxes;k++){
+                               sum += pValues[k] * pValues[k];
+                       }
+                       out << '\t' << sqrt(sum) << endl;
                }
                
                return 0;
@@ -534,8 +546,8 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                
-                       if (metadatafile == "") {  out << i+1 << '\t';  }
-                       else {  out << metadataLabels[i] << '\t';               }
+                       if (metadatafile == "") {  out << i+1;  }
+                       else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
                        vector<spearmanRank> otuScores;
@@ -569,6 +581,7 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                        }
                                }
                        }
+                       vector<double> pValues(numaxes);
                        
                        //calc spearman ranks for each axis for this otu
                        for (int j = 0; j < numaxes; j++) {
@@ -597,10 +610,16 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                
                                double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0;
                                
-                               out << p << '\t';
+                               out << '\t' << p;
+                               pValues[j] = p;
+
                        }
                        
-                       out << endl;
+                       double sum = 0;
+                       for(int k=0;k<numaxes;k++){
+                               sum += pValues[k] * pValues[k];
+                       }
+                       out << '\t' << sqrt(sum) << endl;
                }
                
                return 0;
index b5c22b35bf7a58addcff79651e6841b9506ec8e2..8d0c6298578c4311a59407610ae28a87f48ee493 100644 (file)
 #include "database.hpp"
 #include "sequence.hpp"
 #include "distancedb.hpp"
-#include "eachgapdist.h"
+#include "onegapignore.h"
 
 /**************************************************************************************************/
 DistanceDB::DistanceDB() { 
        try {
                templateAligned = true;  
                templateSeqsLength = 0; 
-               distCalculator = new eachGapDist();
+               distCalculator = new oneGapIgnoreTermGapDist();
        }
        catch(exception& e) {
                m->errorOut(e, "DistanceDB", "DistanceDB");
@@ -29,7 +29,11 @@ DistanceDB::DistanceDB() {
 void DistanceDB::addSequence(Sequence seq) {
        try {
                //are the template sequences aligned
-               if (!isAligned(seq.getAligned())) { templateAligned = false; m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); m->mothurOutEndLine(); }
+               if (!isAligned(seq.getAligned())) {
+                       templateAligned = false;
+                       m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
+                       m->mothurOutEndLine(); 
+               }
                
                if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
                                
@@ -51,7 +55,11 @@ vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
                
                searchScore = -1.0;
        
-               if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); }
+               if (numWanted > data.size()){
+                       m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + ".");
+                       m->mothurOutEndLine();
+                       numWanted = data.size();
+               }
                
                if (sequence.length() != templateSeqsLength) { templateSameLength = false; }
                
@@ -93,13 +101,13 @@ vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
                                                smallDist = dist;
                                        }
                                }
-                               
                                searchScore = smallDist;
                                topMatches.push_back(bestIndex);
                        }
                
                }else{
-                       m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); m->mothurOutEndLine();
+                       m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length.");
+                       m->mothurOutEndLine();
                        exit(1);
                }
                
index 9034dbebbeb8ddb49fbf256663987b7f5a60df8c..d66b40c8878cb41cf933fbf3589e82369b72cd4c 100644 (file)
@@ -49,8 +49,6 @@ public:
                
                if(length == 0) {       dist = 1.0000;                                                          }
                else                    {       dist = ((double)diff  / (double)length);        }
-
-               
        }
 };
 
index a191515cb601408b2c7d884b895f7e14d668d798..9f2b5060fcaa715f9a2fd3fc484ba585d6322da9 100644 (file)
@@ -188,8 +188,6 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        if (accnosfile == "not open") { abort = true; }
                        else if (accnosfile == "not found") {  accnosfile = "";  m->mothurOut("You must provide an accnos file."); m->mothurOutEndLine(); abort = true; }       
                        
-                       accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
-                       if (accnosfile2 == "not open") { abort = true; }
                        if (accnosfile2 == "not found") { accnosfile2 = ""; }
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
index fe6d4ddf8699e03eab527033f763692288b44072..d2c0fcb64f92652a14dee26e40e89ae5dc9a19dd 100644 (file)
@@ -10,6 +10,7 @@
 #include "seqerrorcommand.h"
 #include "reportfile.h"
 #include "qualityscores.h"
+#include "refchimeratest.h"
 
 //**********************************************************************************************************************
 vector<string> SeqErrorCommand::getValidParameters(){  
@@ -81,7 +82,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        string temp;
                        
                        //valid paramters for this command
-                       string AlignArray[] =  {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"};
+                       string AlignArray[] =  {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "ignorechimeras", "outputdir"};
                        
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                        
@@ -192,6 +193,9 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found") { temp = "1.00"; }
                        convert(temp, threshold);  
                        
+                       temp = validParameter.validFile(parameters, "ignorechimeras", false);   if (temp == "not found") { temp = "1"; }
+                       convert(temp, ignoreChimeras);  
+
                        substitutionMatrix.resize(6);
                        for(int i=0;i<6;i++){   substitutionMatrix[i].assign(6,0);      }
                }
@@ -228,16 +232,15 @@ SeqErrorCommand::~SeqErrorCommand(){
 int SeqErrorCommand::execute(){
        try{
                if (abort == true) { return 0; }
-               
-               errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
+
+               string errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
                m->openOutputFile(errorSummaryFileName, errorSummaryFile);
                outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName);
                printErrorHeader();
                
-               errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq";
+               string errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq";
                m->openOutputFile(errorSeqFileName, errorSeqFile);
                outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName);
-               printErrorHeader();
 
                getReferences();        //read in reference sequences - make sure there's no ambiguous bases
 
@@ -281,8 +284,6 @@ int SeqErrorCommand::execute(){
                qScoreErrorMap['i'].assign(41, 0);
                qScoreErrorMap['a'].assign(41, 0);
                
-               
-               
                map<char, vector<int> > errorForward;
                errorForward['m'].assign(1000,0);
                errorForward['s'].assign(1000,0);
@@ -295,39 +296,47 @@ int SeqErrorCommand::execute(){
                errorReverse['s'].assign(1000,0);
                errorReverse['i'].assign(1000,0);
                errorReverse['d'].assign(1000,0);
-               errorReverse['a'].assign(1000,0);
+               errorReverse['a'].assign(1000,0);       
                
+
+               string errorChimeraFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.chimera";
+               RefChimeraTest chimeraTest(referenceSeqs, errorChimeraFileName);
+               outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName);
                
+               int index = 0;
+               bool ignoreSeq = 0;
                
                while(queryFile){
                        
                        if (m->control_pressed) { errorSummaryFile.close();     errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-                       
-                       Compare minCompare;
+               
                        Sequence query(queryFile);
-                       
-                       for(int i=0;i<numRefs;i++){
-                               Compare currCompare = getErrors(query, referenceSeqs[i]);
-                               
-                               if(currCompare.errorRate < minCompare.errorRate){
-                                       minCompare = currCompare;
-                               }
-                       }
+                                               
+                       int numParentSeqs = chimeraTest.analyzeQuery(query.getName(), query.getAligned());
+                       int closestRefIndex = chimeraTest.getClosestRefIndex();
+
+                       if(numParentSeqs > 1 && ignoreChimeras == 1)    {       ignoreSeq = 1;  }
+                       else                                                                                    {       ignoreSeq = 0;  }
+
 
+                       Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]);
+                       
                        if(namesFileName != ""){
                                it = weights.find(query.getName());
                                minCompare.weight = it->second;
                        }
                        else    {       minCompare.weight = 1;  }
 
-                       printErrorData(minCompare);
+                       printErrorData(minCompare, numParentSeqs);
 
-                       for(int i=0;i<minCompare.total;i++){
-                               char letter = minCompare.sequence[i];
-                               errorForward[letter][i] += minCompare.weight;
-                               errorReverse[letter][minCompare.total-i-1] += minCompare.weight;                                
+                       if(!ignoreSeq){
+                               for(int i=0;i<minCompare.total;i++){
+                                       char letter = minCompare.sequence[i];
+                                       errorForward[letter][i] += minCompare.weight;
+                                       errorReverse[letter][minCompare.total-i-1] += minCompare.weight;                                
+                               }
                        }
-                                               
+                       
                        if(qualFileName != "" && reportFileName != ""){
                                report = ReportFile(reportFile);
                                
@@ -336,12 +345,15 @@ int SeqErrorCommand::execute(){
                                int endBase = report.getQueryEnd();
 
                                quality = QualityScores(qualFile, origLength);
-                               quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight);
-                               quality.updateForwardMap(qualForwardMap, startBase, endBase, minCompare.weight);
-                               quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight);
+
+                               if(!ignoreSeq){
+                                       quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight);
+                                       quality.updateForwardMap(qualForwardMap, startBase, endBase, minCompare.weight);
+                                       quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight);
+                               }
                        }                       
                        
-                       if(minCompare.errorRate < threshold){
+                       if(minCompare.errorRate < threshold && !ignoreSeq){
                                totalBases += (minCompare.total * minCompare.weight);
                                totalMatches += minCompare.matches * minCompare.weight;
                                if(minCompare.mismatches > maxMismatch){
@@ -352,116 +364,21 @@ int SeqErrorCommand::execute(){
                                numSeqs++;
                        }
                        
-                       
+                       index++;
+                       if(index % 1000 == 0){  cout << index << endl;  }
                }
                queryFile.close();
-                               
-               if(qualFileName != "" && reportFileName != ""){
-                       string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality";
-                       ofstream errorQualityFile;
-                       m->openOutputFile(errorQualityFileName, errorQualityFile);
-                       outputNames.push_back(errorQualityFileName);  outputTypes["error.quality"].push_back(errorQualityFileName);
-                       
-                       errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl;
-                       for(int i=0;i<41;i++){
-                               errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl;
-                       }
-                       errorQualityFile.close();
-                       
-
-                       
-                       int lastRow = 0;
-                       int lastColumn = 0;
-                       
-                       for(int i=0;i<qualForwardMap.size();i++){
-                               for(int j=0;j<qualForwardMap[i].size();j++){
-                                       if(qualForwardMap[i][j] != 0){
-                                               if(lastRow < i)         {       lastRow = i+2;  }
-                                               if(lastColumn < j)      {       lastColumn = j+2;       }
-                                       }
-                                       
-                               }
-                       }
-
-                       
-                       string qualityForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.forward";
-                       ofstream qualityForwardFile;
-                       m->openOutputFile(qualityForwardFileName, qualityForwardFile);
-                       outputNames.push_back(errorQualityFileName);  outputTypes["error.qual.forward"].push_back(qualityForwardFileName);
-                       
-                       for(int i=0;i<lastColumn;i++){  qualityForwardFile << '\t' << i;        }       qualityForwardFile << endl;
-                       for(int i=0;i<lastRow;i++){
-                               
-                               if (m->control_pressed) { qualityForwardFile.close(); errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-                               
-                               qualityForwardFile << i+1;
-                               for(int j=0;j<lastColumn;j++){
-                                       qualityForwardFile << '\t' << qualForwardMap[i][j];
-                               }
-                               qualityForwardFile << endl;
-                       }
-                       qualityForwardFile.close();
-                       
-                       
-                       string qualityReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.reverse";
-                       ofstream qualityReverseFile;
-                       m->openOutputFile(qualityReverseFileName, qualityReverseFile);
-                       outputNames.push_back(errorQualityFileName);  outputTypes["error.qual.reverse"].push_back(qualityReverseFileName);
+               errorSummaryFile.close();       
+               errorSeqFile.close();
 
-                       for(int i=0;i<lastColumn;i++){  qualityReverseFile << '\t' << i;        }       qualityReverseFile << endl;
-                       for(int i=0;i<lastRow;i++){
-                               
-                               if (m->control_pressed) { qualityReverseFile.close(); errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-                               
-                               qualityReverseFile << i+1;
-                               for(int j=0;j<lastColumn;j++){
-                                       qualityReverseFile << '\t' << qualReverseMap[i][j];
-                               }
-                               qualityReverseFile << endl;
-                       }
-                       qualityReverseFile.close();
-               }
-               
-               
-               string errorForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.forward";
-               ofstream errorForwardFile;
-               m->openOutputFile(errorForwardFileName, errorForwardFile);
-               outputNames.push_back(errorForwardFileName);  outputTypes["error.forward"].push_back(errorForwardFileName);
-               
-               errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
-               for(int i=0;i<1000;i++){
-                       float match = (float)errorForward['m'][i];
-                       float subst = (float)errorForward['s'][i];
-                       float insert = (float)errorForward['i'][i];
-                       float del = (float)errorForward['d'][i];
-                       float amb = (float)errorForward['a'][i];
-                       float total = match + subst + insert + del + amb;
-                       if(total == 0){ break;  }
-                       errorForwardFile << i+1 << '\t' << total << '\t' << match/total  << '\t' << subst/total  << '\t' << insert/total  << '\t' << del/total  << '\t' << amb/total << endl;
+               if(qualFileName != "" && reportFileName != ""){         
+                       printErrorQuality(qScoreErrorMap);
+                       printQualityFR(qualForwardMap, qualReverseMap);
                }
-               errorForwardFile.close();
-               
-               if (m->control_pressed) { errorSummaryFile.close();     errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-               
-               string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse";
-               ofstream errorReverseFile;
-               m->openOutputFile(errorReverseFileName, errorReverseFile);
-               outputNames.push_back(errorReverseFileName);  outputTypes["error.reverse"].push_back(errorReverseFileName);
                
-               errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
-               for(int i=0;i<1000;i++){
-                       float match = (float)errorReverse['m'][i];
-                       float subst = (float)errorReverse['s'][i];
-                       float insert = (float)errorReverse['i'][i];
-                       float del = (float)errorReverse['d'][i];
-                       float amb = (float)errorReverse['a'][i];
-                       float total = match + subst + insert + del + amb;
-                       if(total == 0){ break;  }
-                       errorReverseFile << i+1 << '\t' << total << '\t' << match/total  << '\t' << subst/total  << '\t' << insert/total  << '\t' << del/total  << '\t' << amb/total << endl;
-               }
-               errorReverseFile.close();
+               printErrorFRFile(errorForward, errorReverse);
                
-               if (m->control_pressed) { errorSummaryFile.close();     errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 
                string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
                ofstream errorCountFile;
@@ -476,50 +393,10 @@ int SeqErrorCommand::execute(){
                }
                errorCountFile.close();
                
-               if (m->control_pressed) { errorSummaryFile.close();     errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
 
-               string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix";
-               ofstream subMatrixFile;
-               m->openOutputFile(subMatrixFileName, subMatrixFile);
-               outputNames.push_back(subMatrixFileName);  outputTypes["error.matrix"].push_back(subMatrixFileName);
-               vector<string> bases(6);
-               bases[0] = "A";
-               bases[1] = "T";
-               bases[2] = "G";
-               bases[3] = "C";
-               bases[4] = "Gap";
-               bases[5] = "N";
-               vector<int> refSums(5,1);
-               
-               for(int i=0;i<5;i++){
-                       subMatrixFile << "\tr" << bases[i];
-                       
-                       for(int j=0;j<6;j++){
-                               refSums[i] += substitutionMatrix[i][j];                         
-                       }
-                       
-               }
-               subMatrixFile << endl;
-               
-               for(int i=0;i<6;i++){
-                       subMatrixFile << 'q' << bases[i];
-                       for(int j=0;j<5;j++){
-                               subMatrixFile << '\t' << substitutionMatrix[j][i];                              
-                       }
-                       subMatrixFile << endl;
-               }
-               subMatrixFile << "total";
-               for(int i=0;i<5;i++){
-                       subMatrixFile << '\t' << refSums[i];
-               }
-               subMatrixFile << endl;
-               subMatrixFile.close();
-               
-               errorSummaryFile.close();       
-               errorSeqFile.close();
-               
-               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
-               
+               printSubMatrix();
+                               
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
@@ -675,7 +552,7 @@ void SeqErrorCommand::printErrorHeader(){
        try {
                errorSummaryFile << "query\treference\tweight\t";
                errorSummaryFile << "AA\tAT\tAG\tAC\tTA\tTT\tTG\tTC\tGA\tGT\tGG\tGC\tCA\tCT\tCG\tCC\tNA\tNT\tNG\tNC\tAi\tTi\tGi\tCi\tNi\tdA\tdT\tdG\tdC\t";
-               errorSummaryFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\n";
+               errorSummaryFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\tnumparents\n";
                
                errorSummaryFile << setprecision(6);
                errorSummaryFile.setf(ios::fixed);
@@ -688,7 +565,7 @@ void SeqErrorCommand::printErrorHeader(){
 
 //***************************************************************************************************************
 
-void SeqErrorCommand::printErrorData(Compare error){
+void SeqErrorCommand::printErrorData(Compare error, int numParentSeqs){
        try {
                errorSummaryFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t';
                errorSummaryFile << error.AA << '\t' << error.AT << '\t' << error.AG << '\t' << error.AC << '\t';
@@ -703,48 +580,48 @@ void SeqErrorCommand::printErrorData(Compare error){
                errorSummaryFile << error.dA + error.dT + error.dG + error.dC << '\t';                  //deletions
                errorSummaryFile << error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni) << '\t';      //substitutions
                errorSummaryFile << error.NA + error.NT + error.NG + error.NC + error.Ni << '\t';       //ambiguities
-               errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << endl;
+               errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << '\t' << numParentSeqs << endl;
                
                errorSeqFile << '>' << error.queryName << "\tref:" << error.refName << '\n' << error.sequence << endl;
                
                
                int a=0;                int t=1;                int g=2;                int c=3;
                int gap=4;              int n=5;
-               
-               substitutionMatrix[a][a] += error.weight * error.AA;
-               substitutionMatrix[a][t] += error.weight * error.TA;
-               substitutionMatrix[a][g] += error.weight * error.GA;
-               substitutionMatrix[a][c] += error.weight * error.CA;
-               substitutionMatrix[a][gap] += error.weight * error.dA;
-               substitutionMatrix[a][n] += error.weight * error.NA;
-
-               substitutionMatrix[t][a] += error.weight * error.AT;
-               substitutionMatrix[t][t] += error.weight * error.TT;
-               substitutionMatrix[t][g] += error.weight * error.GT;
-               substitutionMatrix[t][c] += error.weight * error.CT;
-               substitutionMatrix[t][gap] += error.weight * error.dT;
-               substitutionMatrix[t][n] += error.weight * error.NT;
-
-               substitutionMatrix[g][a] += error.weight * error.AG;
-               substitutionMatrix[g][t] += error.weight * error.TG;
-               substitutionMatrix[g][g] += error.weight * error.GG;
-               substitutionMatrix[g][c] += error.weight * error.CG;
-               substitutionMatrix[g][gap] += error.weight * error.dG;
-               substitutionMatrix[g][n] += error.weight * error.NG;
-
-               substitutionMatrix[c][a] += error.weight * error.AC;
-               substitutionMatrix[c][t] += error.weight * error.TC;
-               substitutionMatrix[c][g] += error.weight * error.GC;
-               substitutionMatrix[c][c] += error.weight * error.CC;
-               substitutionMatrix[c][gap] += error.weight * error.dC;
-               substitutionMatrix[c][n] += error.weight * error.NC;
-
-               substitutionMatrix[gap][a] += error.weight * error.Ai;
-               substitutionMatrix[gap][t] += error.weight * error.Ti;
-               substitutionMatrix[gap][g] += error.weight * error.Gi;
-               substitutionMatrix[gap][c] += error.weight * error.Ci;
-               substitutionMatrix[gap][n] += error.weight * error.Ni;
-               
+               if(numParentSeqs == 1 || ignoreChimeras == 0){
+                       substitutionMatrix[a][a] += error.weight * error.AA;
+                       substitutionMatrix[a][t] += error.weight * error.TA;
+                       substitutionMatrix[a][g] += error.weight * error.GA;
+                       substitutionMatrix[a][c] += error.weight * error.CA;
+                       substitutionMatrix[a][gap] += error.weight * error.dA;
+                       substitutionMatrix[a][n] += error.weight * error.NA;
+
+                       substitutionMatrix[t][a] += error.weight * error.AT;
+                       substitutionMatrix[t][t] += error.weight * error.TT;
+                       substitutionMatrix[t][g] += error.weight * error.GT;
+                       substitutionMatrix[t][c] += error.weight * error.CT;
+                       substitutionMatrix[t][gap] += error.weight * error.dT;
+                       substitutionMatrix[t][n] += error.weight * error.NT;
+
+                       substitutionMatrix[g][a] += error.weight * error.AG;
+                       substitutionMatrix[g][t] += error.weight * error.TG;
+                       substitutionMatrix[g][g] += error.weight * error.GG;
+                       substitutionMatrix[g][c] += error.weight * error.CG;
+                       substitutionMatrix[g][gap] += error.weight * error.dG;
+                       substitutionMatrix[g][n] += error.weight * error.NG;
+
+                       substitutionMatrix[c][a] += error.weight * error.AC;
+                       substitutionMatrix[c][t] += error.weight * error.TC;
+                       substitutionMatrix[c][g] += error.weight * error.GC;
+                       substitutionMatrix[c][c] += error.weight * error.CC;
+                       substitutionMatrix[c][gap] += error.weight * error.dC;
+                       substitutionMatrix[c][n] += error.weight * error.NC;
+
+                       substitutionMatrix[gap][a] += error.weight * error.Ai;
+                       substitutionMatrix[gap][t] += error.weight * error.Ti;
+                       substitutionMatrix[gap][g] += error.weight * error.Gi;
+                       substitutionMatrix[gap][c] += error.weight * error.Ci;
+                       substitutionMatrix[gap][n] += error.weight * error.Ni;
+               }
        }
        catch(exception& e) {
                m->errorOut(e, "SeqErrorCommand", "printErrorData");
@@ -754,9 +631,176 @@ void SeqErrorCommand::printErrorData(Compare error){
 
 //***************************************************************************************************************
 
+void SeqErrorCommand::printSubMatrix(){
+       try {
+               string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix";
+               ofstream subMatrixFile;
+               m->openOutputFile(subMatrixFileName, subMatrixFile);
+               outputNames.push_back(subMatrixFileName);  outputTypes["error.matrix"].push_back(subMatrixFileName);
+               vector<string> bases(6);
+               bases[0] = "A";
+               bases[1] = "T";
+               bases[2] = "G";
+               bases[3] = "C";
+               bases[4] = "Gap";
+               bases[5] = "N";
+               vector<int> refSums(5,1);
+
+               for(int i=0;i<5;i++){
+                       subMatrixFile << "\tr" << bases[i];
+                       
+                       for(int j=0;j<6;j++){
+                               refSums[i] += substitutionMatrix[i][j];                         
+                       }
+               }
+               subMatrixFile << endl;
+               
+               for(int i=0;i<6;i++){
+                       subMatrixFile << 'q' << bases[i];
+                       for(int j=0;j<5;j++){
+                               subMatrixFile << '\t' << substitutionMatrix[j][i];                              
+                       }
+                       subMatrixFile << endl;
+               }
+
+               subMatrixFile << "total";
+               for(int i=0;i<5;i++){
+                       subMatrixFile << '\t' << refSums[i];
+               }
+               subMatrixFile << endl;
+               subMatrixFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "printSubMatrix");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+void SeqErrorCommand::printErrorFRFile(map<char, vector<int> > errorForward, map<char, vector<int> > errorReverse){
+       try{
+               string errorForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.forward";
+               ofstream errorForwardFile;
+               m->openOutputFile(errorForwardFileName, errorForwardFile);
+               outputNames.push_back(errorForwardFileName);  outputTypes["error.forward"].push_back(errorForwardFileName);
+
+               errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
+               for(int i=0;i<1000;i++){
+                       float match = (float)errorForward['m'][i];
+                       float subst = (float)errorForward['s'][i];
+                       float insert = (float)errorForward['i'][i];
+                       float del = (float)errorForward['d'][i];
+                       float amb = (float)errorForward['a'][i];
+                       float total = match + subst + insert + del + amb;
+                       if(total == 0){ break;  }
+                       errorForwardFile << i+1 << '\t' << total << '\t' << match/total  << '\t' << subst/total  << '\t' << insert/total  << '\t' << del/total  << '\t' << amb/total << endl;
+               }
+               errorForwardFile.close();
+
+               string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse";
+               ofstream errorReverseFile;
+               m->openOutputFile(errorReverseFileName, errorReverseFile);
+               outputNames.push_back(errorReverseFileName);  outputTypes["error.reverse"].push_back(errorReverseFileName);
+
+               errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
+               for(int i=0;i<1000;i++){
+                       float match = (float)errorReverse['m'][i];
+                       float subst = (float)errorReverse['s'][i];
+                       float insert = (float)errorReverse['i'][i];
+                       float del = (float)errorReverse['d'][i];
+                       float amb = (float)errorReverse['a'][i];
+                       float total = match + subst + insert + del + amb;
+                       if(total == 0){ break;  }
+                       errorReverseFile << i+1 << '\t' << total << '\t' << match/total  << '\t' << subst/total  << '\t' << insert/total  << '\t' << del/total  << '\t' << amb/total << endl;
+               }
+               errorReverseFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+void SeqErrorCommand::printErrorQuality(map<char, vector<int> > qScoreErrorMap){
+       try{
 
+               string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality";
+               ofstream errorQualityFile;
+               m->openOutputFile(errorQualityFileName, errorQualityFile);
+               outputNames.push_back(errorQualityFileName);  outputTypes["error.quality"].push_back(errorQualityFileName);
 
+               errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl;
+               for(int i=0;i<41;i++){
+                       errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl;
+               }
+               errorQualityFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
+               exit(1);
+       }
+}
+
+
+//***************************************************************************************************************
+
+void SeqErrorCommand::printQualityFR(vector<vector<int> > qualForwardMap, vector<vector<int> > qualReverseMap){
+       try{
+
+
+               int lastRow = 0;
+               int lastColumn = 0;
+
+               for(int i=0;i<qualForwardMap.size();i++){
+                       for(int j=0;j<qualForwardMap[i].size();j++){
+                               if(qualForwardMap[i][j] != 0){
+                                       if(lastRow < i)         {       lastRow = i+2;          }
+                                       if(lastColumn < j)      {       lastColumn = j+2;       }
+                               }
+                       }
+               }
 
+               string qualityForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.forward";
+               ofstream qualityForwardFile;
+               m->openOutputFile(qualityForwardFileName, qualityForwardFile);
+               outputNames.push_back(qualityForwardFileName);  outputTypes["error.qual.forward"].push_back(qualityForwardFileName);
 
+               for(int i=0;i<lastColumn;i++){  qualityForwardFile << '\t' << i;        }       qualityForwardFile << endl;
 
+               for(int i=0;i<lastRow;i++){
+                       qualityForwardFile << i+1;
+                       for(int j=0;j<lastColumn;j++){
+                               qualityForwardFile << '\t' << qualForwardMap[i][j];
+                       }
 
+                       qualityForwardFile << endl;
+               }
+               qualityForwardFile.close();
+
+               
+               string qualityReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.reverse";
+               ofstream qualityReverseFile;
+               m->openOutputFile(qualityReverseFileName, qualityReverseFile);
+               outputNames.push_back(qualityReverseFileName);  outputTypes["error.qual.reverse"].push_back(qualityReverseFileName);
+               
+               for(int i=0;i<lastColumn;i++){  qualityReverseFile << '\t' << i;        }       qualityReverseFile << endl;
+               for(int i=0;i<lastRow;i++){
+                       
+                       qualityReverseFile << i+1;
+                       for(int j=0;j<lastColumn;j++){
+                               qualityReverseFile << '\t' << qualReverseMap[i][j];
+                       }
+                       qualityReverseFile << endl;
+               }
+               qualityReverseFile.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
+               exit(1);
+       }
+}
+
+
+//***************************************************************************************************************
index e8ca96a6328ee56cf56ff1e9a1869107b359896f..00b536771ca730945324a3674d2a34646cddf25d 100644 (file)
@@ -58,10 +58,15 @@ private:
        map<string,int> getWeights();
        Compare getErrors(Sequence, Sequence);
        void printErrorHeader();
-       void printErrorData(Compare);
-       
-       string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, errorSummaryFileName, errorSeqFileName, outputDir;
+       void printErrorData(Compare, int);
+       void printSubMatrix();
+       void printErrorFRFile(map<char, vector<int> >, map<char, vector<int> >);
+       void printErrorQuality(map<char, vector<int> >);
+       void printQualityFR(vector<vector<int> >, vector<vector<int> >);
+
+       string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir;
        double threshold;
+       bool ignoreChimeras;
        int numRefs;
        ofstream errorSummaryFile, errorSeqFile;
        vector<string> outputNames;