CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
+ CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
+ helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n";
helpString += "Example fastq.info(fastaq=test.fastaq).\n";
helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
return helpString;
fasta = m->isTrue(temp);
temp = validParameter.validFile(parameters, "qfile", false); if(temp == "not found"){ temp = "T"; }
- qual = m->isTrue(temp);
+ qual = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "pacbio", false); if(temp == "not found"){ temp = "F"; }
+ pacbio = m->isTrue(temp);
+
format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "sanger"; }
if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- //print sequence info to files
- if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
-
- if (qual) {
- vector<int> qualScores = convertQual(quality);
+ vector<int> qualScores;
+ if (qual) {
+ qualScores = convertQual(quality);
outQual << ">" << name << endl;
for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
outQual << endl;
}
+
+ if (m->control_pressed) { break; }
+
+ if (pacbio) {
+ if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already
+ for (int i = 0; i < qualScores.size(); i++) {
+ if (qualScores[i] == 0){ sequence[i] = 'N'; }
+ }
+ }
+
+ //print sequence info to files
+ if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
+
}
in.close();
if (fasta) { outFasta.close(); }
if (qual) { outQual.close(); }
- if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
+ if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
//set fasta file as new current fastafile
string current = "";
vector<string> outputNames;
string outputDir, fastaQFile, format;
- bool abort, fasta, qual;
+ bool abort, fasta, qual, pacbio;
vector<int> convertQual(string);
vector<char> convertTable;
void updateReverseMap(vector<vector<int> >&, int, int, int);
void setName(string n);
void setScores(vector<int> qs) { qScores = qs; seqLength = qScores.size(); }
-
+ vector<int> getScores() { return qScores; }
private:
for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
rpairedPrimers[it->first] = tempPair;
+ //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
}
for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
rpairedBarcodes[it->first] = tempPair;
+ //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
}
rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
}
Sequence currSeq(inFASTA); m->gobble(inFASTA);
//cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
- QualityScores currQual;
+ QualityScores currQual; QualityScores savedQual;
if(qFileName != ""){
currQual = QualityScores(qFile); m->gobble(qFile);
+ savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
//cout << currQual.getName() << endl;
}
-
+
string origSeq = currSeq.getUnaligned();
if (origSeq != "") {
if(numFPrimers != 0){
success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
- if(success > pdiffs) { trashCode += 'f'; }
+ if(success > pdiffs) {
+ //if (pairedOligos) { trashCode += trimOligos->getTrashCode(); }
+ //else { trashCode += 'f'; }
+ trashCode += 'f';
+ }
else{ currentSeqsDiffs += success; }
}
int thisPrimerIndex = 0;
if(numBarcodes != 0){
- thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
if(thisSuccess > bdiffs) { thisTrashCode += 'b'; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
if(numFPrimers != 0){
- thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
- if(thisSuccess > pdiffs) { thisTrashCode += 'f'; }
+ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward);
+ if(thisSuccess > pdiffs) {
+ //if (pairedOligos) { thisTrashCode += rtrimOligos->getTrashCode(); }
+ //else { thisTrashCode += 'f'; }
+ thisTrashCode += 'f';
+ }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
-
+
if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
if (thisTrashCode == "") {
currentSeqsDiffs = thisCurrentSeqsDiffs;
barcodeIndex = thisBarcodeIndex;
primerIndex = thisPrimerIndex;
- currSeq.reverseComplement();
+ savedSeq.reverseComplement();
+ currSeq.setAligned(savedSeq.getAligned());
if(qFileName != ""){
- currQual.flipQScores();
+ savedQual.flipQScores();
+ currQual.setScores(savedQual.getScores());
}
}
}
// get rest of line in case there is a primer name
while (!inOligos.eof()) {
char c = inOligos.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { group += c; }
}
string temp = "";
while (!inOligos.eof()) {
char c = inOligos.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { temp += c; }
}
int currentSeqsDiffs = 0;
Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
+ Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
- QualityScores currQual;
+ QualityScores currQual; QualityScores savedQual;
if(pDataArray->qFileName != ""){
currQual = QualityScores(qFile); pDataArray->m->gobble(qFile);
+ savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
}
+
string origSeq = currSeq.getUnaligned();
if (origSeq != "") {
int thisPrimerIndex = 0;
if(numBarcodes != 0){
- thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+ thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
if(pDataArray->numFPrimers != 0){
- thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
+ thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; }
else{ thisCurrentSeqsDiffs += thisSuccess; }
}
currentSeqsDiffs = thisCurrentSeqsDiffs;
barcodeIndex = thisBarcodeIndex;
primerIndex = thisPrimerIndex;
- currSeq.reverseComplement();
+ savedSeq.reverseComplement();
+ currSeq.setAligned(savedSeq.getAligned());
if(pDataArray->qFileName != ""){
- currQual.flipQScores();
+ savedQual.flipQScores();
+ currQual.setScores(savedQual.getScores());
}
}
}