helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc and abund. list, sabund, rabund or shared is required unless you have a valid current file. \n";
helpString += "The collect.single command should be in the following format: \n";
helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
- helpString += "collect.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
- helpString += "Example collect(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-chao-ace-jack).\n";
+ helpString += "collect.single(label=yourLabel, freq=yourFreq, calc=yourEstimators).\n";
+ helpString += "Example collect(label=unique-.01-.03, freq=10, calc=sobs-chao-ace-jack).\n";
helpString += "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson.\n";
helpString += validCalculator.printCalc("single");
helpString += "The label parameter is used to analyze specific labels in your input.\n";
return (left.index > right.index);
}
//********************************************************************************************************************
-//sorts highest to lowest
inline bool compareSpearman(spearmanRank left, spearmanRank right){
return (left.score < right.score);
}
}
return false;
}
-//********************************************************************************************************************
-//sorts lowest to highest
-inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){
- return (left.score < right.score);
-}
+
/************************************************************/
//sorts lowest to highest
inline bool compareDistLinePairs(distlinePair left, distlinePair right){
if (object == "cluster"){
mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
}else {
- mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+ mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
}
}
}
}
for (int i = index; i >= 0; i--) {
- newFileName = dirs[i] + "\\\\" + newFileName;
+ newFileName = dirs[i] + "\\" + newFileName;
}
return newFileName;
string individual = "";
int estimLength = estim.size();
bool prevEscape = false;
+ /*
for(int i=0;i<estimLength;i++){
if(prevEscape){
individual += estim[i];
}
}
}
- container.insert(individual);
+ */
+
+ for(int i=0;i<estimLength;i++){
+ if(estim[i] == '-'){
+ if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
+ else {
+ container.insert(individual);
+ individual = "";
+ }
+ }else if(estim[i] == '\\'){
+ if (i < estimLength-1) {
+ if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
+ else { individual += estim[i]; prevEscape = false; } //if no, add in
+ }else { individual += estim[i]; }
+ }else {
+ individual += estim[i];
+ }
+ }
+ container.insert(individual);
}
catch(exception& e) {
int lineNum;
int estimLength = estim.size();
bool prevEscape = false;
+ /*
for(int i=0;i<estimLength;i++){
if(prevEscape){
individual += estim[i];
prevEscape = false;
}
}
- }
+ }*/
+
+ for(int i=0;i<estimLength;i++){
+ if(estim[i] == '-'){
+ if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
+ else {
+ convert(individual, lineNum); //convert the string to int
+ container.insert(lineNum);
+ individual = "";
+ }
+ }else if(estim[i] == '\\'){
+ if (i < estimLength-1) {
+ if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
+ else { individual += estim[i]; prevEscape = false; } //if no, add in
+ }else { individual += estim[i]; }
+ }else {
+ individual += estim[i];
+ }
+ }
+
convert(individual, lineNum); //convert the string to int
container.insert(lineNum);
}
it->second = m->getDesignFile();
}else if (it->first == "sff") {
it->second = m->getSFFFile();
+ }else if (it->first == "flow") {
+ it->second = m->getFlowFile();
}else if (it->first == "oligos") {
it->second = m->getOligosFile();
}else if (it->first == "accnos") {
string name = m->getline(in); m->gobble(in);
if (name == "") { m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else { name = name.substr(1); }
+ else {
+ name = name.substr(1);
+ for (int i = 0; i < name.length(); i++) { if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } }
+ }
//read sequence
string sequence = m->getline(in); m->gobble(in);
string name2 = m->getline(in); m->gobble(in);
if (name2 == "") { m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else { name2 = name2.substr(1); }
+ else {
+ name2 = name2.substr(1);
+ for (int i = 0; i < name2.length(); i++) { if (name2[i] == ':') { name2[i] = '_'; m->changedSeqNames = true; } }
+ }
//read quality scores
string quality = m->getline(in); m->gobble(in);
bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
bool abort, keepprimer, keepdots;
string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
- int start, end, processors, length;
+ int start, end, processors, length, pdiffs;
vector<string> revPrimer, outputNames;
- vector<string> primers;
+ map<string, int> primers;
int writeAccnos(set<string>);
int readName(set<string>&);
bool readEcoli();
int driverPcr(string, string, string, set<string>&, linePair);
int createProcesses(string, string, string, set<string>&);
- bool findForward(Sequence&, int&, int&);
- bool findReverse(Sequence&, int&, int&);
bool isAligned(string, map<int, int>&);
- bool compareDNASeq(string, string);
string reverseOligo(string);
};
string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
unsigned long long fstart;
unsigned long long fend;
- int count, start, end, length;
+ int count, start, end, length, pdiffs;
MothurOut* m;
vector<string> primers;
vector<string> revPrimer;
pcrData(){}
- pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
+ pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
filename = f;
goodFasta = gf;
badFasta = bfn;
length = l;
fstart = fst;
fend = fen;
+ pdiffs = pd;
count = 0;
}
};
}
set<int> lengths;
+ //pdiffs, bdiffs, primers, barcodes, revPrimers
+ map<string, int> faked;
+ TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
pDataArray->count++;
//process primers
if (pDataArray->primers.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- //bool good = findForward(currSeq, primerStart, primerEnd);
- ///////////////////////////////////////////////////////////////
- bool good = false;
- string rawSequence = currSeq.getUnaligned();
-
- for(int j=0;j<pDataArray->primers.size();j++){
- string oligo = pDataArray->primers[j];
-
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
-
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int l = 0; l < rawSequence.length()-olength; l++){
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
- string rawChunk = rawSequence.substr(l, olength);
- //compareDNASeq(oligo, rawChunk)
- ////////////////////////////////////////////////////////
- bool success = 1;
- for(int k=0;k<olength;k++){
-
- if(oligo[k] != rawChunk[k]){
- if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C') { success = 0; }
- else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N')) { success = 0; }
- else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T')) { success = 0; }
- else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C')) { success = 0; }
- else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- ////////////////////////////////////////////////////////////////////
- if(success) {
- primerStart = j;
- primerEnd = primerStart + olength;
- good = true; break;
- }
- }
- if (good) { break; }
- }
-
- if (!good) { primerStart = 0; primerEnd = 0; }
- ///////////////////////////////////////////////////////////////
-
+ bool good = trim.findForward(currSeq, primerStart, primerEnd);
if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
else{
if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
}
+ ///////////////////////////////////////////////////////////////
+ mapAligned.clear();
+ string seq = currSeq.getAligned();
+ int countBases = 0;
+ for (int k = 0; k < seq.length(); k++) {
+ if (!isalpha(seq[k])) { ; }
+ else { mapAligned[countBases] = k; countBases++; }
+ }
+ ///////////////////////////////////////////////////////////////
}else {
if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
//process reverse primers
if (pDataArray->revPrimer.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- bool good = false;
- //findReverse(currSeq, primerStart, primerEnd);
- ///////////////////////////////////////////////////////////////
- string rawSequence = currSeq.getUnaligned();
-
- for(int j=0;j<pDataArray->revPrimer.size();j++){
- string oligo = pDataArray->revPrimer[j];
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int l = rawSequence.length()-olength; l >= 0; l--){
-
- string rawChunk = rawSequence.substr(l, olength);
- //compareDNASeq(oligo, rawChunk)
- ////////////////////////////////////////////////////////
- bool success = 1;
- for(int k=0;k<olength;k++){
-
- if(oligo[k] != rawChunk[k]){
- if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C') { success = 0; }
- else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N')) { success = 0; }
- else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T')) { success = 0; }
- else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C')) { success = 0; }
- else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- ////////////////////////////////////////////////////////////////////
- if(success) {
- primerStart = j;
- primerEnd = primerStart + olength;
- good = true; break;
- }
- }
- if (good) { break; }
- }
-
- if (!good) { primerStart = 0; primerEnd = 0; }
-
- ///////////////////////////////////////////////////////////////
+ bool good = trim.findReverse(currSeq, primerStart, primerEnd);
+
if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
else{
//are you aligned
CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
try {
string helpString = "";
helpString += "The pcr.seqs command reads a fasta file.\n";
- helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
+ helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
helpString += "The start parameter allows you to provide a starting position to trim to.\n";
helpString += "The end parameter allows you to provide a ending position to trim from.\n";
helpString += "The processors parameter allows you to use multiple processors.\n";
helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
return helpString;
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
+
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, pdiffs);
nomatch = validParameter.validFile(parameters, "nomatch", false); if (nomatch == "not found") { nomatch = "reject"; }
length = 0;
- if(oligosfile != ""){ readOligos(); } if (m->control_pressed) { return 0; }
+ if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } } if (m->control_pressed) { return 0; }
if(ecolifile != "") { readEcoli(); } if (m->control_pressed) { return 0; }
vector<unsigned long long> positions;
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
int count = 0;
set<int> lengths;
+ //pdiffs, bdiffs, primers, barcodes, revPrimers
+ map<string, int> faked;
+ TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
+
while (!done) {
if (m->control_pressed) { break; }
string trashCode = "";
if (currSeq.getName() != "") {
+ if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); }
+
bool goodSeq = true;
if (oligosfile != "") {
map<int, int> mapAligned;
//process primers
if (primers.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- bool good = findForward(currSeq, primerStart, primerEnd);
+ bool good = trim.findForward(currSeq, primerStart, primerEnd);
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
else{
}
else {
if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
}
+ isAligned(currSeq.getAligned(), mapAligned);
}else {
if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
//process reverse primers
if (revPrimer.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- bool good = findReverse(currSeq, primerStart, primerEnd);
+ bool good = trim.findReverse(currSeq, primerStart, primerEnd);
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
else{
- //are you aligned
+ //are you aligned
if (aligned) {
if (!keepprimer) {
if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
}
}
//********************************************************************/
-bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
- try {
-
- string rawSequence = seq.getUnaligned();
-
- for(int j=0;j<primers.size();j++){
- string oligo = primers[j];
-
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int j = 0; j < rawSequence.length()-olength; j++){
- if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
- string rawChunk = rawSequence.substr(j, olength);
- if(compareDNASeq(oligo, rawChunk)) {
- primerStart = j;
- primerEnd = primerStart + olength;
- return true;
- }
-
- }
- }
-
- primerStart = 0; primerEnd = 0;
- return false;
-
- }
- catch(exception& e) {
- m->errorOut(e, "TrimOligos", "stripForward");
- exit(1);
- }
-}
-//******************************************************************/
-bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
- try {
-
- string rawSequence = seq.getUnaligned();
-
- for(int i=0;i<revPrimer.size();i++){
- string oligo = revPrimer[i];
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int j = rawSequence.length()-olength; j >= 0; j--){
- if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
- string rawChunk = rawSequence.substr(j, olength);
-
- if(compareDNASeq(oligo, rawChunk)) {
- primerStart = j;
- primerEnd = primerStart + olength;
- return true;
- }
-
- }
- }
-
- primerStart = 0; primerEnd = 0;
- return false;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "findReverse");
- exit(1);
- }
-}
-//********************************************************************/
bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
try {
+ aligned.clear();
bool isAligned = false;
int countBases = 0;
m->openInputFile(oligosfile, inOligos);
string type, oligo, group;
+ int primerCount = 0;
while(!inOligos.eof()){
if (c == 10 || c == 13){ break; }
else if (c == 32 || c == 9){;} //space or tab
}
- primers.push_back(oligo);
+ primers[oligo] = primerCount; primerCount++;
}else if(type == "REVERSE"){
string oligoRC = reverseOligo(oligo);
revPrimer.push_back(oligoRC);
exit(1);
}
-}
-//******************************************************************/
-bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
- try {
- bool success = 1;
- int length = oligo.length();
-
- for(int i=0;i<length;i++){
-
- if(oligo[i] != seq[i]){
- if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
- else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
- else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
- else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
- else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
- else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
- else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- return success;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
- exit(1);
- }
-
}
//***************************************************************************************************************
int PcrSeqsCommand::readName(set<string>& names){
int score;
seqName = getSequenceName(qFile);
+ if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n."); }
+
if (!m->control_pressed) {
string qScoreString = m->getline(qFile);
- //cout << qScoreString << endl;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n."); }
+
while(qFile.peek() != '>' && qFile.peek() != EOF){
if (m->control_pressed) { break; }
string temp = m->getline(qFile);
- //cout << temp << endl;
+ if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n."); }
qScoreString += ' ' + temp;
}
//cout << "done reading " << endl;
string temp;
qScoreStringStream >> temp; m->gobble(qScoreStringStream);
+ if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n."); }
+
//check temp to make sure its a number
if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
convert(temp, score);
string sname = ""; nameStream >> sname;
sname = sname.substr(1);
+
+ for (int i = 0; i < sname.length(); i++) {
+ if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+ }
map<string, int>::iterator it = firstSeqNames.find(sname);
istringstream nameStream(input);
string sname = ""; nameStream >> sname;
+ for (int i = 0; i < sname.length(); i++) {
+ if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+ }
+
map<string, int>::iterator it = firstSeqNamesReport.find(sname);
if(it != firstSeqNamesReport.end()) { //this is the start of a new chunk
if (trim) {
if(header.clipQualRight < header.clipQualLeft){
- seq = "NNNN";
+ if (header.clipQualRight == 0) { //don't trim right
+ seq = seq.substr(header.clipQualLeft-1);
+ }else {
+ seq = "NNNN";
+ }
}
else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
if (trim) {
if(header.clipQualRight < header.clipQualLeft){
- seq = "NNNN";
+ if (header.clipQualRight == 0) { //don't trim right
+ seq = seq.substr(header.clipQualLeft-1);
+ }else {
+ seq = "NNNN";
+ }
}
else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
if (trim) {
if(header.clipQualRight < header.clipQualLeft){
- out << ">" << header.name << " xy=" << header.xy << endl;
- out << "0\t0\t0\t0";
+ if (header.clipQualRight == 0) { //don't trim right
+ out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;
+ for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }
+ }else {
+ out << ">" << header.name << " xy=" << header.xy << endl;
+ out << "0\t0\t0\t0";
+ }
}
else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
//**********************************************************************************************************************
int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
try {
+ if (header.clipQualRight == 0) { header.clipQualRight = read.flowgram.size(); }
+
if(header.clipQualRight > header.clipQualLeft){
int rightIndex = 0;
barcodes = br;
primers = pr;
revPrimer = r;
+
+ maxFBarcodeLength = 0;
+ for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
+ string oligo = it->first;
+ if(oligo.length() > maxFBarcodeLength){
+ maxFBarcodeLength = oligo.length();
+ }
+ }
+ maxRBarcodeLength = maxFBarcodeLength;
+
+ maxFPrimerLength = 0;
+ for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+ string oligo = it->first;
+ if(oligo.length() > maxFPrimerLength){
+ maxFPrimerLength = oligo.length();
+ }
+ }
+ maxRPrimerLength = maxFPrimerLength;
}
catch(exception& e) {
m->errorOut(e, "TrimOligos", "TrimOligos");
}
/********************************************************************/
TrimOligos::~TrimOligos() {}
+//********************************************************************/
+bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
+ try {
+
+ string rawSequence = seq.getUnaligned();
+
+ for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+ string oligo = it->first;
+
+ if(rawSequence.length() < oligo.length()) { break; }
+
+ //search for primer
+ int olength = oligo.length();
+ for (int j = 0; j < rawSequence.length()-olength; j++){
+ if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
+ string rawChunk = rawSequence.substr(j, olength);
+ if(compareDNASeq(oligo, rawChunk)) {
+ primerStart = j;
+ primerEnd = primerStart + olength;
+ return true;
+ }
+
+ }
+ }
+
+ primerStart = 0; primerEnd = 0;
+ //if you don't want to allow for diffs
+ if (pdiffs == 0) { return false; }
+ else { //try aligning and see if you can find it
+
+ //can you find the barcode
+ int minDiff = 1e6;
+ int minCount = 1;
+
+ Alignment* alignment;
+ if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
+ else{ alignment = NULL; }
+
+ for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+
+ string prim = it->first;
+ //search for primer
+ int olength = prim.length();
+ if (rawSequence.length() < olength) {} //ignore primers too long for this seq
+ else{
+ for (int j = 0; j < rawSequence.length()-olength; j++){
+
+ string oligo = it->first;
+
+ if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
+
+ string rawChunk = rawSequence.substr(j, olength+pdiffs);
+
+ //use needleman to align first primer.length()+numdiffs of sequence to each barcode
+ alignment->align(oligo, rawChunk);
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){
+ if(oligo[i] != '-'){ alnLength = i+1; break; }
+ }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+
+ int numDiff = countDiffs(oligo, temp);
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ primerStart = j;
+ primerEnd = primerStart + alnLength;
+ }else if(numDiff == minDiff){ minCount++; }
+ }
+ }
+ }
+
+ if (alignment != NULL) { delete alignment; }
+
+ if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
+ else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
+ else{ return true; }
+ }
+
+ primerStart = 0; primerEnd = 0;
+ return false;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimOligos", "stripForward");
+ exit(1);
+ }
+}
+//******************************************************************/
+bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
+ try {
+
+ string rawSequence = seq.getUnaligned();
+
+ for(int i=0;i<revPrimer.size();i++){
+ string oligo = revPrimer[i];
+ if(rawSequence.length() < oligo.length()) { break; }
+
+ //search for primer
+ int olength = oligo.length();
+ for (int j = rawSequence.length()-olength; j >= 0; j--){
+ if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
+ string rawChunk = rawSequence.substr(j, olength);
+
+ if(compareDNASeq(oligo, rawChunk)) {
+ primerStart = j;
+ primerEnd = primerStart + olength;
+ return true;
+ }
+
+ }
+ }
+
+ primerStart = 0; primerEnd = 0;
+ return false;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "findReverse");
+ exit(1);
+ }
+}
//*******************************************************************/
+
int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
try {
bool stripSpacer(Sequence&);
bool stripSpacer(Sequence&, QualityScores&);
+
+ //seq, primerStart, primerEnd
+ bool findForward(Sequence&, int&, int&);
+ bool findReverse(Sequence&, int&, int&);
+
private: