]> git.donarmstrong.com Git - mothur.git/blobdiff - seqerrorcommand.cpp
changes to seqerrorcommand
[mothur.git] / seqerrorcommand.cpp
index 82af49ed27f5e6dffbe52e50d413439c3983c270..ea049544c818f217f84931577b604af9e24e4d6e 100644 (file)
@@ -9,6 +9,55 @@
 
 #include "seqerrorcommand.h"
 
+//**********************************************************************************************************************
+vector<string> SeqErrorCommand::getValidParameters(){  
+       try {
+               string Array[] =  {"query", "reference", "name", "threshold", "inputdir", "outputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SeqErrorCommand::SeqErrorCommand(){    
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["error"] = tempOutNames;
+               outputTypes["count"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SeqErrorCommand::getRequiredParameters(){       
+       try {
+               string Array[] =  {"query","reference"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SeqErrorCommand::getRequiredFiles(){    
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqErrorCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
 //***************************************************************************************************************
 
 SeqErrorCommand::SeqErrorCommand(string option)  {
@@ -23,7 +72,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        string temp;
                        
                        //valid paramters for this command
-                       string AlignArray[] =  {"query", "reference", "name", "threshold"};
+                       string AlignArray[] =  {"query", "reference", "name", "threshold", "inputdir", "outputdir"};
                        
 //need to implement name file option
                        
@@ -40,6 +89,11 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["error"] = tempOutNames;
+                       outputTypes["count"] = tempOutNames;
+                       
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
@@ -48,7 +102,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                it = parameters.find("query");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["query"] = inputDir + it->second;            }
                                }
@@ -56,7 +110,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                it = parameters.find("reference");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["reference"] = inputDir + it->second;                }
                                }
@@ -64,7 +118,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                it = parameters.find("name");
                                //user has given a template file
                                if(it != parameters.end()){ 
-                                       path = hasPath(it->second);
+                                       path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
@@ -87,7 +141,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        outputDir = validParameter.validFile(parameters, "outputdir", false);
                        if (outputDir == "not found"){  
                                outputDir = ""; 
-                               outputDir += hasPath(queryFileName); //if user entered a file with a path then preserve it      
+                               outputDir += m->hasPath(queryFileName); //if user entered a file with a path then preserve it   
                        }
                        
                        //check for optional parameter and set defaults
@@ -95,9 +149,16 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found") { temp = "1.00"; }
                        convert(temp, threshold);  
                                                
-                       errorFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".errors";
-                       openOutputFile(errorFileName, errorFile);
+                       errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
+                       m->openOutputFile(errorSummaryFileName, errorSummaryFile);
+                       outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName);
+                       printErrorHeader();
+
+                       errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq";
+                       m->openOutputFile(errorSeqFileName, errorSeqFile);
+                       outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName);
                        printErrorHeader();
+                       
                }
        }
        catch(exception& e) {
@@ -126,7 +187,10 @@ void SeqErrorCommand::help(){
 
 //***************************************************************************************************************
 
-SeqErrorCommand::~SeqErrorCommand(){   errorFile.close();      }
+SeqErrorCommand::~SeqErrorCommand(){
+       errorSummaryFile.close();       
+       errorSeqFile.close();   
+}
 
 //***************************************************************************************************************
 
@@ -140,7 +204,7 @@ int SeqErrorCommand::execute(){
                if(namesFileName != ""){        weights = getWeights(); }
                
                ifstream queryFile;
-               openInputFile(queryFileName, queryFile);
+               m->openInputFile(queryFileName, queryFile);
                                
                int totalBases = 0;
                int totalMatches = 0;
@@ -192,9 +256,10 @@ int SeqErrorCommand::execute(){
                int total = 0;
                
                
-               string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".count";
+               string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
                ofstream errorCountFile;
-               openOutputFile(errorCountFileName, errorCountFile);
+               m->openOutputFile(errorCountFileName, errorCountFile);
+               outputNames.push_back(errorCountFileName);  outputTypes["error.count"].push_back(errorCountFileName);
                
                m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n\n");
                m->mothurOut("Errors\tSequences\n");
@@ -220,7 +285,7 @@ void SeqErrorCommand::getReferences(){
        try {
                
                ifstream referenceFile;
-               openInputFile(referenceFileName, referenceFile);
+               m->openInputFile(referenceFileName, referenceFile);
                
                while(referenceFile){
                        Sequence currentSeq(referenceFile);
@@ -231,7 +296,7 @@ void SeqErrorCommand::getReferences(){
                                currentSeq.removeAmbigBases();
                        }
                        referenceSeqs.push_back(currentSeq);
-                       gobble(referenceFile);
+                       m->gobble(referenceFile);
                }
                numRefs = referenceSeqs.size();
                
@@ -255,6 +320,7 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                string q = query.getAligned();
                string r = reference.getAligned();
 
+               
                int started = 0;
                Compare errors;
 
@@ -263,45 +329,45 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
                                started = 1;
                                
                                if(q[i] == 'A'){
-                                       if(r[i] == 'A'){        errors.AA++;    errors.matches++;       }
-                                       if(r[i] == 'T'){        errors.AT++;    }
-                                       if(r[i] == 'G'){        errors.AG++;    }
-                                       if(r[i] == 'C'){        errors.AC++;    }
-                                       if(r[i] == '-'){        errors.Ai++;    }
+                                       if(r[i] == 'A'){        errors.AA++;    errors.matches++;       errors.sequence += 'm'; }
+                                       if(r[i] == 'T'){        errors.AT++;    errors.sequence += 's'; }
+                                       if(r[i] == 'G'){        errors.AG++;    errors.sequence += 's'; }
+                                       if(r[i] == 'C'){        errors.AC++;    errors.sequence += 's'; }
+                                       if(r[i] == '-'){        errors.Ai++;    errors.sequence += 'i'; }
                                }
                                else if(q[i] == 'T'){
-                                       if(r[i] == 'A'){        errors.TA++;    }
-                                       if(r[i] == 'T'){        errors.TT++;    errors.matches++;       }
-                                       if(r[i] == 'G'){        errors.TG++;    }
-                                       if(r[i] == 'C'){        errors.TC++;    }
-                                       if(r[i] == '-'){        errors.Ti++;    }
+                                       if(r[i] == 'A'){        errors.TA++;    errors.sequence += 's'; }
+                                       if(r[i] == 'T'){        errors.TT++;    errors.matches++;       errors.sequence += 'm'; }
+                                       if(r[i] == 'G'){        errors.TG++;    errors.sequence += 's'; }
+                                       if(r[i] == 'C'){        errors.TC++;    errors.sequence += 's'; }
+                                       if(r[i] == '-'){        errors.Ti++;    errors.sequence += 'i'; }
                                }
                                else if(q[i] == 'G'){
-                                       if(r[i] == 'A'){        errors.GA++;    }
-                                       if(r[i] == 'T'){        errors.GT++;    }
-                                       if(r[i] == 'G'){        errors.GG++;    errors.matches++;       }
-                                       if(r[i] == 'C'){        errors.GC++;    }
-                                       if(r[i] == '-'){        errors.Gi++;    }
+                                       if(r[i] == 'A'){        errors.GA++;    errors.sequence += 's'; }
+                                       if(r[i] == 'T'){        errors.GT++;    errors.sequence += 's'; }
+                                       if(r[i] == 'G'){        errors.GG++;    errors.matches++;       errors.sequence += 'm'; }
+                                       if(r[i] == 'C'){        errors.GC++;    errors.sequence += 's'; }
+                                       if(r[i] == '-'){        errors.Gi++;    errors.sequence += 'i'; }
                                }
                                else if(q[i] == 'C'){
-                                       if(r[i] == 'A'){        errors.CA++;    }
-                                       if(r[i] == 'T'){        errors.CT++;    }
-                                       if(r[i] == 'G'){        errors.CG++;    }
-                                       if(r[i] == 'C'){        errors.CC++;    errors.matches++;       }
-                                       if(r[i] == '-'){        errors.Ci++;    }
+                                       if(r[i] == 'A'){        errors.CA++;    errors.sequence += 's'; }
+                                       if(r[i] == 'T'){        errors.CT++;    errors.sequence += 's'; }
+                                       if(r[i] == 'G'){        errors.CG++;    errors.sequence += 's'; }
+                                       if(r[i] == 'C'){        errors.CC++;    errors.matches++;       errors.sequence += 'm'; }
+                                       if(r[i] == '-'){        errors.Ci++;    errors.sequence += 'i'; }
                                }
                                else if(q[i] == 'N'){
-                                       if(r[i] == 'A'){        errors.NA++;    }
-                                       if(r[i] == 'T'){        errors.NT++;    }
-                                       if(r[i] == 'G'){        errors.NG++;    }
-                                       if(r[i] == 'C'){        errors.NC++;    }
-                                       if(r[i] == '-'){        errors.Ni++;    }
+                                       if(r[i] == 'A'){        errors.NA++;    errors.sequence += 'a'; }
+                                       if(r[i] == 'T'){        errors.NT++;    errors.sequence += 'a'; }
+                                       if(r[i] == 'G'){        errors.NG++;    errors.sequence += 'a'; }
+                                       if(r[i] == 'C'){        errors.NC++;    errors.sequence += 'a'; }
+                                       if(r[i] == '-'){        errors.Ni++;    errors.sequence += 'a'; }
                                }
                                else if(q[i] == '-' && r[i] != '-'){
-                                       if(r[i] == 'A'){        errors.dA++;    }
-                                       if(r[i] == 'T'){        errors.dT++;    }
-                                       if(r[i] == 'G'){        errors.dG++;    }
-                                       if(r[i] == 'C'){        errors.dC++;    }
+                                       if(r[i] == 'A'){        errors.dA++;    errors.sequence += 'd'; }
+                                       if(r[i] == 'T'){        errors.dT++;    errors.sequence += 'd'; }
+                                       if(r[i] == 'G'){        errors.dG++;    errors.sequence += 'd'; }
+                                       if(r[i] == 'C'){        errors.dC++;    errors.sequence += 'd'; }
                                }
                                errors.total++; 
                                
@@ -335,7 +401,7 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
 
 map<string, int> SeqErrorCommand::getWeights(){
        ifstream nameFile;
-       openInputFile(namesFileName, nameFile);
+       m->openInputFile(namesFileName, nameFile);
        
        string seqName;
        string redundantSeqs;
@@ -343,8 +409,8 @@ map<string, int> SeqErrorCommand::getWeights(){
        
        while(nameFile){
                nameFile >> seqName >> redundantSeqs;
-               nameCountMap[seqName] = getNumNames(redundantSeqs); 
-               gobble(nameFile);
+               nameCountMap[seqName] = m->getNumNames(redundantSeqs); 
+               m->gobble(nameFile);
        }
        return nameCountMap;
 }
@@ -354,12 +420,12 @@ map<string, int> SeqErrorCommand::getWeights(){
 
 void SeqErrorCommand::printErrorHeader(){
        try {
-               errorFile << "query\treference\tweight\t";
-               errorFile << "AA\tAT\tAG\tAC\tTA\tTT\tTG\tTC\tGA\tGT\tGG\tGC\tCA\tCT\tCG\tCC\tNA\tNT\tNG\tNC\tAi\tTi\tGi\tCi\tNi\tdA\tdT\tdG\tdC\t";
-               errorFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\n";
+               errorSummaryFile << "query\treference\tweight\t";
+               errorSummaryFile << "AA\tAT\tAG\tAC\tTA\tTT\tTG\tTC\tGA\tGT\tGG\tGC\tCA\tCT\tCG\tCC\tNA\tNT\tNG\tNC\tAi\tTi\tGi\tCi\tNi\tdA\tdT\tdG\tdC\t";
+               errorSummaryFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\n";
                
-               errorFile << setprecision(6);
-               errorFile.setf(ios::fixed);
+               errorSummaryFile << setprecision(6);
+               errorSummaryFile.setf(ios::fixed);
        }
        catch(exception& e) {
                m->errorOut(e, "SeqErrorCommand", "printErrorHeader");
@@ -371,21 +437,22 @@ void SeqErrorCommand::printErrorHeader(){
 
 void SeqErrorCommand::printErrorData(Compare error){
        try {
-               errorFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t';
-               errorFile << error.AA << '\t' << error.AT << '\t' << error.AG << '\t' << error.AC << '\t';
-               errorFile << error.TA << '\t' << error.TT << '\t' << error.TG << '\t' << error.TC << '\t';
-               errorFile << error.GA << '\t' << error.GT << '\t' << error.GG << '\t' << error.GC << '\t';
-               errorFile << error.CA << '\t' << error.CT << '\t' << error.CG << '\t' << error.CC << '\t';
-               errorFile << error.NA << '\t' << error.NT << '\t' << error.NG << '\t' << error.NC << '\t';
-               errorFile << error.Ai << '\t' << error.Ti << '\t' << error.Gi << '\t' << error.Ci << '\t' << error.Ni << '\t' ;
-               errorFile << error.dA << '\t' << error.dT << '\t' << error.dG << '\t' << error.dC << '\t';
+               errorSummaryFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t';
+               errorSummaryFile << error.AA << '\t' << error.AT << '\t' << error.AG << '\t' << error.AC << '\t';
+               errorSummaryFile << error.TA << '\t' << error.TT << '\t' << error.TG << '\t' << error.TC << '\t';
+               errorSummaryFile << error.GA << '\t' << error.GT << '\t' << error.GG << '\t' << error.GC << '\t';
+               errorSummaryFile << error.CA << '\t' << error.CT << '\t' << error.CG << '\t' << error.CC << '\t';
+               errorSummaryFile << error.NA << '\t' << error.NT << '\t' << error.NG << '\t' << error.NC << '\t';
+               errorSummaryFile << error.Ai << '\t' << error.Ti << '\t' << error.Gi << '\t' << error.Ci << '\t' << error.Ni << '\t' ;
+               errorSummaryFile << error.dA << '\t' << error.dT << '\t' << error.dG << '\t' << error.dC << '\t';
                
-               errorFile << error.Ai + error.Ti + error.Gi + error.Ci << '\t';                 //insertions
-               errorFile << error.dA + error.dT + error.dG + error.dC << '\t';                 //deletions
-               errorFile << error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni) << '\t';     //substitutions
-               errorFile << error.NA + error.NT + error.NG + error.NC + error.Ni << '\t';      //ambiguities
-               errorFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << endl;
+               errorSummaryFile << error.Ai + error.Ti + error.Gi + error.Ci << '\t';                  //insertions
+               errorSummaryFile << error.dA + error.dT + error.dG + error.dC << '\t';                  //deletions
+               errorSummaryFile << error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni) << '\t';      //substitutions
+               errorSummaryFile << error.NA + error.NT + error.NG + error.NC + error.Ni << '\t';       //ambiguities
+               errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << endl;
                
+               errorSeqFile << '>' << error.queryName << "\tref:" << error.refName << '\n' << error.sequence << endl;
        }
        catch(exception& e) {
                m->errorOut(e, "SeqErrorCommand", "printErrorData");