]> git.donarmstrong.com Git - mothur.git/blobdiff - seqerrorcommand.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / seqerrorcommand.cpp
index 67e43aa0d4b7f6a6ff251bc45a8254ca49b8b162..4e5abc95b7189bd8d5178fae77c00ac17117311d 100644 (file)
@@ -23,7 +23,8 @@ vector<string> SeqErrorCommand::setParameters(){
                CommandParameter preference("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(preference);
                CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "QualReport","",false,false); parameters.push_back(pqfile);
                CommandParameter preport("report", "InputTypes", "", "", "none", "none", "QualReport","",false,false); parameters.push_back(preport);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","",false,false,true); parameters.push_back(pcount);
                CommandParameter pignorechimeras("ignorechimeras", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pignorechimeras);
                CommandParameter pthreshold("threshold", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pthreshold);
                CommandParameter paligned("aligned", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(paligned);
@@ -52,7 +53,8 @@ string SeqErrorCommand::getHelpString(){
                helpString += "The reference parameter...\n";
                helpString += "The qfile parameter...\n";
                helpString += "The report parameter...\n";
-               helpString += "The name parameter...\n";
+               helpString += "The name parameter allows you to provide a name file associated with the fasta file.\n";
+        helpString += "The count parameter allows you to provide a count file associated with the fasta file.\n";
                helpString += "The ignorechimeras parameter...\n";
                helpString += "The threshold parameter...\n";
                helpString += "The processors parameter...\n";
@@ -190,6 +192,14 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a names file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
 
                                it = parameters.find("qfile");
                                //user has given a quality score file
@@ -227,6 +237,12 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        if(namesFileName == "not found"){       namesFileName = "";     }
                        else if (namesFileName == "not open") { namesFileName = ""; abort = true; }     
                        else { m->setNameFile(namesFileName); }
+            
+            //check for optional parameters
+                       countfile = validParameter.validFile(parameters, "count", true);
+                       if(countfile == "not found"){   countfile = ""; }
+                       else if (countfile == "not open") { countfile = ""; abort = true; }
+                       else { m->setCountTableFile(countfile); }
                        
                        qualFileName = validParameter.validFile(parameters, "qfile", true);
                        if(qualFileName == "not found"){        qualFileName = "";      }
@@ -243,6 +259,8 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                outputDir += m->hasPath(queryFileName); //if user entered a file with a path then preserve it   
                        }
                        
+            if ((countfile != "") && (namesFileName != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
                        temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found") { temp = "1.00"; }
@@ -337,7 +355,12 @@ int SeqErrorCommand::execute(){
                
                getReferences();        //read in reference sequences - make sure there's no ambiguous bases
 
-               if(namesFileName != ""){        weights = getWeights(); }
+               if(namesFileName != "")     {   weights = getWeights();         }
+        else if (countfile != "")   {
+            CountTable ct;
+            ct.readTable(countfile, false, false);
+            weights = ct.getNameMap();
+        }
                
                vector<unsigned long long> fastaFilePos;
                vector<unsigned long long> qFilePos;
@@ -432,18 +455,18 @@ int SeqErrorCommand::createProcesses(string filename, string qFileName, string r
                
                //loop through and create all the processes you want
                while (process != processors) {
-                       int pid = fork();
+                       pid_t pid = fork();
                        
                        if (pid > 0) {
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
                                
-                               num = driver(filename, qFileName, rFileName, summaryFileName + toString(getpid()) + ".temp", errorOutputFileName+ toString(getpid()) + ".temp", chimeraOutputFileName + toString(getpid()) + ".temp", lines[process], qLines[process], rLines[process]);
+                               num = driver(filename, qFileName, rFileName, summaryFileName + m->mothurGetpid(process) + ".temp", errorOutputFileName+ m->mothurGetpid(process) + ".temp", chimeraOutputFileName + m->mothurGetpid(process) + ".temp", lines[process], qLines[process], rLines[process]);
                                
                                //pass groupCounts to parent
                                ofstream out;
-                               string tempFile = filename + toString(getpid()) + ".info.temp";
+                               string tempFile = filename + m->mothurGetpid(process) + ".info.temp";
                                m->openOutputFile(tempFile, out);
                                
                                //output totalBases and totalMatches
@@ -636,7 +659,7 @@ int SeqErrorCommand::createProcesses(string filename, string qFileName, string r
                        int misMatchSize;
                        in >> misMatchSize; m->gobble(in);
                        if (misMatchSize > misMatchCounts.size()) {     misMatchCounts.resize(misMatchSize, 0); }
-                       for (int j = 0; j < misMatchCounts.size(); j++) {
+                       for (int j = 0; j < misMatchSize; j++) {
                                in >> tempNum; misMatchCounts[j] += tempNum;
                        }
                        m->gobble(in);
@@ -673,10 +696,10 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
                int numSeqs = 0;
                
                map<string, int>::iterator it;
-               qScoreErrorMap['m'].assign(41, 0);
-               qScoreErrorMap['s'].assign(41, 0);
-               qScoreErrorMap['i'].assign(41, 0);
-               qScoreErrorMap['a'].assign(41, 0);
+               qScoreErrorMap['m'].assign(101, 0);
+               qScoreErrorMap['s'].assign(101, 0);
+               qScoreErrorMap['i'].assign(101, 0);
+               qScoreErrorMap['a'].assign(101, 0);
                
                errorForward['m'].assign(maxLength,0);
                errorForward['s'].assign(maxLength,0);
@@ -712,8 +735,8 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
                        qualForwardMap.resize(maxLength);
                        qualReverseMap.resize(maxLength);
                        for(int i=0;i<maxLength;i++){
-                               qualForwardMap[i].assign(41,0);
-                               qualReverseMap[i].assign(41,0);
+                               qualForwardMap[i].assign(101,0);
+                               qualReverseMap[i].assign(101,0);
                        }       
                }
                else if(qFileName != "" && !aligned){
@@ -724,8 +747,8 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
                        qualForwardMap.resize(maxLength);
                        qualReverseMap.resize(maxLength);
                        for(int i=0;i<maxLength;i++){
-                               qualForwardMap[i].assign(41,0);
-                               qualReverseMap[i].assign(41,0);
+                               qualForwardMap[i].assign(101,0);
+                               qualReverseMap[i].assign(101,0);
                        }       
         }
         
@@ -778,7 +801,7 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
             
             getErrors(query, reference, minCompare);
                        
-                       if(namesFileName != ""){
+                       if((namesFileName != "") || (countfile != "")){
                                it = weights.find(query.getName());
                                minCompare.weight = it->second;
                        }
@@ -851,7 +874,7 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
                                if (queryFile.eof()) { break; }
                        #endif
                        
-                       if(index % 100 == 0){   m->mothurOut(toString(index));  m->mothurOutEndLine(); }
+                       if(index % 100 == 0){   m->mothurOutJustToScreen(toString(index)+"\n");  }
                }
                queryFile.close();
                outChimeraReport.close();
@@ -862,7 +885,7 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
                else if(qFileName != "" && aligned == false){   qualFile.close();                       }
         
                //report progress
-               m->mothurOut(toString(index));  m->mothurOutEndLine();
+               m->mothurOutJustToScreen(toString(index)+"\n"); 
                
                return index;
        }
@@ -1271,7 +1294,7 @@ void SeqErrorCommand::printErrorQuality(map<char, vector<int> > qScoreErrorMap){
                outputNames.push_back(errorQualityFileName);  outputTypes["errorquality"].push_back(errorQualityFileName);
 
                errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl;
-               for(int i=0;i<41;i++){
+               for(int i=0;i<101;i++){
                        errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl;
                }
                errorQualityFile.close();
@@ -1381,6 +1404,8 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                                        
                                        string sname = "";  nameStream >> sname;
                                        sname = sname.substr(1);
+                    
+                    m->checkName(sname);
                                        
                                        map<string, int>::iterator it = firstSeqNames.find(sname);
                                        
@@ -1441,6 +1466,8 @@ int SeqErrorCommand::setLines(string filename, string qfilename, string rfilenam
                     istringstream nameStream(input);
                     string sname = "";  nameStream >> sname;
                     
+                    m->checkName(sname);
+                    
                     map<string, int>::iterator it = firstSeqNamesReport.find(sname);
                 
                     if(it != firstSeqNamesReport.end()) { //this is the start of a new chunk