]> git.donarmstrong.com Git - mothur.git/commitdiff
added pacbio parameter to fastq.info. fixed issue with paired primers in trim.seqs
authorSarahsWork <sarahswork@imac.westcotts.net>
Wed, 6 Mar 2013 19:45:11 +0000 (14:45 -0500)
committerSarahsWork <sarahswork@imac.westcotts.net>
Wed, 6 Mar 2013 19:45:11 +0000 (14:45 -0500)
parsefastaqcommand.cpp
parsefastaqcommand.h
qualityscores.h
trimseqscommand.cpp
trimseqscommand.h

index 704e752341320dd75b9f213658d74530583ec9cb..74e3e2bc9f0bdedc88f90e7f923eafdb511470d6 100644 (file)
@@ -16,6 +16,7 @@ vector<string> ParseFastaQCommand::setParameters(){
                CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
                CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
                CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
+        CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
                CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -39,6 +40,7 @@ string ParseFastaQCommand::getHelpString(){
                helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
         helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
         helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
+        helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n";
                helpString += "Example fastq.info(fastaq=test.fastaq).\n";
                helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
                return helpString;
@@ -132,7 +134,11 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                        fasta = m->isTrue(temp); 
 
                        temp = validParameter.validFile(parameters, "qfile", false);    if(temp == "not found"){        temp = "T";     }
-                       qual = m->isTrue(temp); 
+                       qual = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "pacbio", false);      if(temp == "not found"){        temp = "F";     }
+                       pacbio = m->isTrue(temp);
+
                        
             format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "sanger";      }
             
@@ -209,22 +215,33 @@ int ParseFastaQCommand::execute(){
                        if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
                        if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        
-                       //print sequence info to files
-                       if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
-                       
-                       if (qual) { 
-                               vector<int> qualScores = convertQual(quality);
+            vector<int> qualScores;
+            if (qual) {
+                               qualScores = convertQual(quality);
                                outQual << ">" << name << endl;
                                for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
                                outQual << endl;
                        }
+            
+            if (m->control_pressed) { break; }
+            
+            if (pacbio) {
+                if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already
+                for (int i = 0; i < qualScores.size(); i++) {
+                    if (qualScores[i] == 0){ sequence[i] = 'N'; }
+                }
+            }
+            
+                       //print sequence info to files
+                       if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
+                       
                }
                
                in.close();
                if (fasta)      { outFasta.close();     }
                if (qual)       { outQual.close();      }
                
-               if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
+               if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
                
                //set fasta file as new current fastafile
                string current = "";
index cb86bd66c0ae8597f605006893fd70371d16b5fc..5ae6b9c482fe6b4022bb4d4c5d4ae2e79ea3aad8 100644 (file)
@@ -36,7 +36,7 @@ private:
 
        vector<string> outputNames;     
        string outputDir, fastaQFile, format;
-       bool abort, fasta, qual;
+       bool abort, fasta, qual, pacbio;
        
        vector<int> convertQual(string);
     vector<char> convertTable;
index 87699739d34ad012b121a295be53f976b122a5b7..500d3e98db79b934effee830b65f54afb0023af3 100644 (file)
@@ -38,7 +38,7 @@ public:
        void updateReverseMap(vector<vector<int> >&, int, int, int);
     void setName(string n); 
     void setScores(vector<int> qs) { qScores = qs; seqLength = qScores.size(); }
-    
+    vector<int> getScores() { return qScores; }
        
 private:
        
index 4b90cda9355112d746999e2a90f9752f5befdf38..935b9a9297e08d1f50e89d4b9a059cf9cbe8e54a 100644 (file)
@@ -686,10 +686,12 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
             for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
                   oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
                 rpairedPrimers[it->first] = tempPair;
+                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
             }
             for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
                  oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
                 rpairedBarcodes[it->first] = tempPair;
+                 //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
             }
             rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
         }
@@ -712,13 +714,15 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 
                        Sequence currSeq(inFASTA); m->gobble(inFASTA);
                        //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+            Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
             
-                       QualityScores currQual;
+                       QualityScores currQual; QualityScores savedQual;
                        if(qFileName != ""){
                                currQual = QualityScores(qFile);  m->gobble(qFile);
+                savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
                 //cout << currQual.getName() << endl;
                        }
-                       
+                         
                        string origSeq = currSeq.getUnaligned();
                        if (origSeq != "") {
                                
@@ -747,7 +751,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                 
                                if(numFPrimers != 0){
                                        success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
-                                       if(success > pdiffs)            {       trashCode += 'f';       }
+                                       if(success > pdiffs)            {
+                        //if (pairedOligos) {  trashCode += trimOligos->getTrashCode(); }
+                        //else {  trashCode += 'f';  }
+                        trashCode += 'f';
+                    }
                                        else{ currentSeqsDiffs += success;  }
                                }
                                
@@ -767,17 +775,21 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                     int thisPrimerIndex = 0;
                     
                     if(numBarcodes != 0){
-                        thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+                        thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
                         if(thisSuccess > bdiffs)               {       thisTrashCode += 'b';   }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
                     
                     if(numFPrimers != 0){
-                        thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
-                        if(thisSuccess > pdiffs)               {       thisTrashCode += 'f';   }
+                        thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
+                            if(thisSuccess > pdiffs)           {
+                            //if (pairedOligos) {  thisTrashCode += rtrimOligos->getTrashCode(); }
+                            //else {  thisTrashCode += 'f';  }
+                            thisTrashCode += 'f'; 
+                        }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
-                    
+                   
                     if (thisCurrentSeqsDiffs > tdiffs) {       thisTrashCode += 't';   }
                     
                     if (thisTrashCode == "") { 
@@ -786,9 +798,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                         currentSeqsDiffs = thisCurrentSeqsDiffs;
                         barcodeIndex = thisBarcodeIndex;
                         primerIndex = thisPrimerIndex;
-                        currSeq.reverseComplement();
+                        savedSeq.reverseComplement();
+                        currSeq.setAligned(savedSeq.getAligned());
                         if(qFileName != ""){
-                            currQual.flipQScores();
+                            savedQual.flipQScores();
+                            currQual.setScores(savedQual.getScores());
                         }
                     }
                 }
@@ -1551,7 +1565,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        // get rest of line in case there is a primer name
                                        while (!inOligos.eof()) {
                                                char c = inOligos.get();
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  group += c;  }
                                        }
@@ -1586,7 +1600,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                     string temp = "";
                     while (!inOligos.eof())    {
                                                char c = inOligos.get();
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  temp += c;  }
                                        }
index 882f716357515b008c06889f3f1559a4e4525cd5..de3d839901762893a7ce2c4d41ce5f9db2f9cbaa 100644 (file)
@@ -299,11 +299,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                        int currentSeqsDiffs = 0;
             
                        Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
+            Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
                        
-                       QualityScores currQual;
+                       QualityScores currQual; QualityScores savedQual;
                        if(pDataArray->qFileName != ""){
                                currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
+                savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
                        }
+              
                        
                        string origSeq = currSeq.getUnaligned();
                        if (origSeq != "") {
@@ -353,13 +356,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                     int thisPrimerIndex = 0;
                     
                     if(numBarcodes != 0){
-                        thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+                        thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
                         if(thisSuccess > pDataArray->bdiffs)           {       thisTrashCode += 'b';   }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
                     
                     if(pDataArray->numFPrimers != 0){
-                        thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
+                        thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
                         if(thisSuccess > pDataArray->pdiffs)           {       thisTrashCode += 'f';   }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
@@ -372,9 +375,11 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                         currentSeqsDiffs = thisCurrentSeqsDiffs;
                         barcodeIndex = thisBarcodeIndex;
                         primerIndex = thisPrimerIndex;
-                        currSeq.reverseComplement();
+                        savedSeq.reverseComplement();
+                        currSeq.setAligned(savedSeq.getAligned());
                         if(pDataArray->qFileName != ""){
-                            currQual.flipQScores();
+                            savedQual.flipQScores();
+                            currQual.setScores(savedQual.getScores());
                         }
                     }
                 }