}
}
-/***********************************************************************/
\ No newline at end of file
+/***********************************************************************/
Alignment();
virtual ~Alignment();
virtual void align(string, string) = 0;
+ virtual void alignPrimer(string, string) {}
// float getAlignmentScore();
int start = time(NULL);
m->mothurOutEndLine();
m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
- bool aligned = false;
+ //bool aligned = false;
int tempLength = 0;
#ifdef USE_MPI
public:
Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+ virtual ~Cluster() {}
virtual void update(double&);
virtual string getTag() = 0;
virtual void setMapWanted(bool m);
-#endif
\ No newline at end of file
+#endif
}else {
-
+
InputData* input = new InputData(listfile, "list");
ListVector* list = input->getListVector();
outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
+ string snumBins = toString(list->getNumBins());
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
string bin = list->get(i);
string consSeq = getConsSeq(bin, outSummary, i);
+
+ string seqName = "Otu";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { seqName += "0"; }
+ }
+ seqName += sbinNumber;
- outFasta << ">seq" << (i+1) << endl << consSeq << endl;
- outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << bin << endl;
+ outFasta << ">" << seqName << endl << consSeq << endl;
+ outName << seqName << '\t' << seqName << "," << bin << endl;
}
outSummary.close(); outName.close(); outFasta.close();
// levelProbabilityOutput << tree[indices[i][maxIndex[i]]]->getName() << '(' << setprecision(6) << pLevel_X[i] << ");";
}
else{
- taxonProbabilityString += "unclassified" + '(' + toString(confidenceScore) + ");";
+ taxonProbabilityString += "unclassified(" + toString(confidenceScore) + ");";
// levelProbabilityOutput << "unclassified" << '(' << setprecision(6) << pLevel_X[i] << ");";
simpleTax += "unclassified;";
}
}
//**********************************************************************************************************************
//**********************************************************************************************************************
-//**********************************************************************************************************************
\ No newline at end of file
+//**********************************************************************************************************************
+
public:
Libshuff(FullMatrix*, int, float, float);
+ virtual ~Libshuff() {}
virtual vector<vector<double> > evaluateAll() = 0;
virtual float evaluatePair(int,int) = 0;
void randomizeGroups(int, int);
}else { //bad alloc
if (object == "cluster"){
mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+ }else if (object == "shhh.flows"){
+ mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
}else {
mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
}
}
}
+/**************************************************************************************************/
+
+void NeedlemanOverlap::alignPrimer(string A, string B){
+ try {
+
+ seqA = ' ' + A; lA = seqA.length(); // algorithm requires a dummy space at the beginning of each string
+ seqB = ' ' + B; lB = seqB.length(); // algorithm requires a dummy space at the beginning of each string
+
+ if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + "."); m->mothurOutEndLine(); }
+
+ for(int i=1;i<lB;i++){ // This code was largely translated from Perl code provided in Ex 3.1
+
+ for(int j=1;j<lA;j++){ // of the O'Reilly BLAST book. I found that the example output had a
+
+ // number of errors
+ float diagonal;
+ if(isEquivalent(seqB[i],seqA[j])) { diagonal = alignment[i-1][j-1].cValue + match; }
+ else { diagonal = alignment[i-1][j-1].cValue + mismatch; }
+
+ float up = alignment[i-1][j].cValue + gap;
+ float left = alignment[i][j-1].cValue + gap;
+
+ if(diagonal >= up){
+ if(diagonal >= left){
+ alignment[i][j].cValue = diagonal;
+ alignment[i][j].prevCell = 'd';
+ }
+ else{
+ alignment[i][j].cValue = left;
+ alignment[i][j].prevCell = 'l';
+ }
+ }
+ else{
+ if(up >= left){
+ alignment[i][j].cValue = up;
+ alignment[i][j].prevCell = 'u';
+ }
+ else{
+ alignment[i][j].cValue = left;
+ alignment[i][j].prevCell = 'l';
+ }
+ }
+ }
+ }
+
+ Overlap over;
+ over.setOverlap(alignment, lA, lB, 0); // Fix gaps at the beginning and end of the sequences
+ traceBack(); // Traceback the alignment to populate seqAaln and seqBaln
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "NeedlemanOverlap", "align");
+ exit(1);
+ }
+
+}
+//********************************************************************/
+bool NeedlemanOverlap::isEquivalent(char oligo, char seq){
+ try {
+
+ bool same = true;
+
+ oligo = toupper(oligo);
+ seq = toupper(seq);
+
+ if(oligo != seq){
+ if(oligo == 'A' && (seq != 'A' && seq != 'M' && seq != 'R' && seq != 'W' && seq != 'D' && seq != 'H' && seq != 'V')) { same = false; }
+ else if(oligo == 'C' && (seq != 'C' && seq != 'Y' && seq != 'M' && seq != 'S' && seq != 'B' && seq != 'H' && seq != 'V')) { same = false; }
+ else if(oligo == 'G' && (seq != 'G' && seq != 'R' && seq != 'K' && seq != 'S' && seq != 'B' && seq != 'D' && seq != 'V')) { same = false; }
+ else if(oligo == 'T' && (seq != 'T' && seq != 'Y' && seq != 'K' && seq != 'W' && seq != 'B' && seq != 'D' && seq != 'H')) { same = false; }
+ else if((oligo == '.' || oligo == '-')) { same = false; }
+ else if((oligo == 'N' || oligo == 'I') && (seq == 'N')) { same = false; }
+ else if(oligo == 'R' && (seq != 'A' && seq != 'G')) { same = false; }
+ else if(oligo == 'Y' && (seq != 'C' && seq != 'T')) { same = false; }
+ else if(oligo == 'M' && (seq != 'C' && seq != 'A')) { same = false; }
+ else if(oligo == 'K' && (seq != 'T' && seq != 'G')) { same = false; }
+ else if(oligo == 'W' && (seq != 'T' && seq != 'A')) { same = false; }
+ else if(oligo == 'S' && (seq != 'C' && seq != 'G')) { same = false; }
+ else if(oligo == 'B' && (seq != 'C' && seq != 'T' && seq != 'G')) { same = false; }
+ else if(oligo == 'D' && (seq != 'A' && seq != 'T' && seq != 'G')) { same = false; }
+ else if(oligo == 'H' && (seq != 'A' && seq != 'T' && seq != 'C')) { same = false; }
+ else if(oligo == 'V' && (seq != 'A' && seq != 'C' && seq != 'G')) { same = false; }
+ }
+
+
+ return same;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimOligos", "countDiffs");
+ exit(1);
+ }
+}
/**************************************************************************************************/
NeedlemanOverlap(float, float, float, int);
~NeedlemanOverlap();
void align(string, string);
-
+ void alignPrimer(string, string);
private:
float gap;
float match;
float mismatch;
+ bool isEquivalent(char, char);
};
/**************************************************************************************************/
if (mismatch > pDataArray->diffs) { mismatch = length; break; } //to far to cluster
}
- if (mismatch <= diffs) {
+ if (mismatch <= pDataArray->diffs) {
//merge
alignSeqs[j].names += ',' + alignSeqs[i].names;
alignSeqs[j].numIdentical += alignSeqs[i].numIdentical;
m->errorOut(e, "RegularizedRandomForest", "updateGlobalOutOfBagEstimates");
exit(1);
}
-}
\ No newline at end of file
+}
double begClock = clock();
unsigned long long begTime;
+ string fileNameForOutput = filenames[i];
+
for (int g = 0; g < theseFlowFileNames.size(); g++) {
string flowFileName = theseFlowFileNames[g];
begTime = time(NULL);
- flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
+ flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
m->mothurOutEndLine();
m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
}
numCompleted++;
- m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
+ m->mothurOut("Total time to process " + fileNameForOutput + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
}
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
{
try {
//need to transpose so we can compare rows (row-major order)
- int tmpnrows = nrows;
+ //int tmpnrows = nrows;
vector<vector<int> > tmpmatrix;
vector<int> tmprow;
TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
try {
m = MothurOut::getInstance();
+ paired = false;
pdiffs = p;
bdiffs = b;
bdiffs = b;
ldiffs = l;
sdiffs = s;
+ paired = true;
maxFBarcodeLength = 0;
for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
barcodes = br;
primers = pr;
revPrimer = r;
+ paired = false;
maxFBarcodeLength = 0;
for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
string rawChunk = rawSequence.substr(j, olength+pdiffs);
//use needleman to align first primer.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawChunk);
+ alignment->alignPrimer(oligo, rawChunk);
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
try {
+ if (paired) { int success = stripPairedBarcode(seq, qual, group); return success; }
+
string rawSequence = seq.getUnaligned();
int success = bdiffs + 1; //guilty until proven innocent
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
+ alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
+ alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
+ alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
}
+//*******************************************************************/
+int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
+ try {
+ //look for forward barcode
+ string rawSeq = seq.getUnaligned();
+ int success = bdiffs + 1; //guilty until proven innocent
+ //cout << seq.getName() << endl;
+ //can you find the forward barcode
+ for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
+ string foligo = it->second.forward;
+ string roligo = it->second.reverse;
+ //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
+ //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
+ if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
+ success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ break;
+ }
+ if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
+ success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ break;
+ }
+
+ if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+ group = it->first;
+ string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+ seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+ if(qual.getName() != ""){
+ qual.trimQScores(-1, rawSeq.length()-roligo.length());
+ qual.trimQScores(foligo.length(), -1);
+ }
+ success = 0;
+ break;
+ }
+ }
+ //cout << "success=" << success << endl;
+ //if you found the barcode or if you don't want to allow for diffs
+ if ((bdiffs == 0) || (success == 0)) { return success; }
+ else { //try aligning and see if you can find it
+ Alignment* alignment;
+ if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
+ else{ alignment = NULL; }
+
+ //can you find the barcode
+ int minDiff = 1e6;
+ int minCount = 1;
+ vector< vector<int> > minFGroup;
+ vector<int> minFPos;
+
+ //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+ /*
+ 1 Sarah Westcott
+ 2 John Westcott
+ 3 Anna Westcott
+ 4 Sarah Schloss
+ 5 Pat Schloss
+ 6 Gail Brown
+ 7 Pat Moore
+
+ only want to look for forward = Sarah, John, Anna, Pat, Gail
+ reverse = Westcott, Schloss, Brown, Moore
+
+ but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
+ */
+ //cout << endl << forwardSeq.getName() << endl;
+ for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
+ string oligo = it->first;
+
+ if(rawSeq.length() < maxFBarcodeLength){ //let's just assume that the barcodes are the same length
+ success = bdiffs + 10;
+ break;
+ }
+ //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (alnLength == 0) { numDiff = bdiffs + 100; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
+ }
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
+ }
+ minFPos.push_back(tempminFPos);
+ }
+ }
+
+ //cout << minDiff << '\t' << minCount << '\t' << endl;
+ if(minDiff > bdiffs) { success = minDiff; } //no good matches
+ else{
+ //check for reverse match
+ if (alignment != NULL) { delete alignment; }
+
+ if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
+ else{ alignment = NULL; }
+
+ //can you find the barcode
+ minDiff = 1e6;
+ minCount = 1;
+ vector< vector<int> > minRGroup;
+ vector<int> minRPos;
+
+ string rawRSequence = reverseOligo(seq.getUnaligned());
+ //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
+ for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
+ string oligo = reverseOligo(it->first);
+ //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
+ if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
+ success = bdiffs + 10;
+ break;
+ }
+
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+ if (alnLength == 0) { numDiff = bdiffs + 100; }
+
+ //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
+ }
+
+ }
+
+ /*cout << minDiff << '\t' << minCount << '\t' << endl;
+ for (int i = 0; i < minFGroup.size(); i++) {
+ cout << i << '\t';
+ for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
+ cout << endl;
+ }
+ cout << endl;
+ for (int i = 0; i < minRGroup.size(); i++) {
+ cout << i << '\t';
+ for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
+ cout << endl;
+ }
+ cout << endl;*/
+ if(minDiff > bdiffs) { success = minDiff; } //no good matches
+ else {
+ bool foundMatch = false;
+ vector<int> matches;
+ for (int i = 0; i < minFGroup.size(); i++) {
+ for (int j = 0; j < minFGroup[i].size(); j++) {
+ for (int k = 0; k < minRGroup.size(); k++) {
+ if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+ }
+ }
+ }
+
+ int fStart = 0;
+ int rStart = 0;
+ if (matches.size() == 1) {
+ foundMatch = true;
+ group = matches[0];
+ fStart = minFPos[0];
+ rStart = rawSeq.length() - minRPos[0];
+ }
+
+ //we have an acceptable match for the forward and reverse, but do they match?
+ if (foundMatch) {
+ string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
+ seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
+ if(qual.getName() != ""){
+ qual.trimQScores(-1, rStart);
+ qual.trimQScores(fStart, -1);
+ }
+ success = minDiff;
+ //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
+ }else { success = bdiffs + 100; }
+ }
+ }
+
+ if (alignment != NULL) { delete alignment; }
+ }
+
+ return success;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimOligos", "stripPairedBarcode");
+ exit(1);
+ }
+
+}
+//*******************************************************************/
+int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
+ try {
+ //look for forward
+ string rawSeq = seq.getUnaligned();
+ int success = pdiffs + 1; //guilty until proven innocent
+ //cout << seq.getName() << endl;
+ //can you find the forward
+ for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
+ string foligo = it->second.forward;
+ string roligo = it->second.reverse;
+
+ //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
+ //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
+ if(rawSeq.length() < foligo.length()){ //let's just assume that the barcodes are the same length
+ success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ break;
+ }
+ if(rawSeq.length() < roligo.length()){ //let's just assume that the barcodes are the same length
+ success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
+ break;
+ }
+
+ if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+ group = it->first;
+ if (!keepForward) {
+ string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+ seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+ if(qual.getName() != ""){
+ qual.trimQScores(-1, rawSeq.length()-roligo.length());
+ qual.trimQScores(foligo.length(), -1);
+ }
+ }
+ success = 0;
+ break;
+ }
+ }
+ //cout << "success=" << success << endl;
+ //if you found the barcode or if you don't want to allow for diffs
+ if ((pdiffs == 0) || (success == 0)) { return success; }
+ else { //try aligning and see if you can find it
+ Alignment* alignment;
+ if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
+ else{ alignment = NULL; }
+
+ //can you find the barcode
+ int minDiff = 1e6;
+ int minCount = 1;
+ vector< vector<int> > minFGroup;
+ vector<int> minFPos;
+
+ //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+ /*
+ 1 Sarah Westcott
+ 2 John Westcott
+ 3 Anna Westcott
+ 4 Sarah Schloss
+ 5 Pat Schloss
+ 6 Gail Brown
+ 7 Pat Moore
+
+ only want to look for forward = Sarah, John, Anna, Pat, Gail
+ reverse = Westcott, Schloss, Brown, Moore
+
+ but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
+ */
+ //cout << endl << forwardSeq.getName() << endl;
+ for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
+ string oligo = it->first;
+
+ if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
+ success = pdiffs + 10;
+ break;
+ }
+ //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (alnLength == 0) { numDiff = pdiffs + 100; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
+ }
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
+ }
+ minFPos.push_back(tempminFPos);
+ }
+ }
+
+ //cout << minDiff << '\t' << minCount << '\t' << endl;
+ if(minDiff > pdiffs) { success = minDiff; } //no good matches
+ else{
+ //check for reverse match
+ if (alignment != NULL) { delete alignment; }
+
+ if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
+ else{ alignment = NULL; }
+
+ //can you find the barcode
+ minDiff = 1e6;
+ minCount = 1;
+ vector< vector<int> > minRGroup;
+ vector<int> minRPos;
+
+ string rawRSequence = reverseOligo(seq.getUnaligned());
+
+ for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
+ string oligo = reverseOligo(it->first);
+ //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
+ if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
+ success = pdiffs + 10;
+ break;
+ }
+
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+ if (alnLength == 0) { numDiff = pdiffs + 100; }
+
+ //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
+ }
+
+ }
+
+ /*cout << minDiff << '\t' << minCount << '\t' << endl;
+ for (int i = 0; i < minFGroup.size(); i++) {
+ cout << i << '\t';
+ for (int j = 0; j < minFGroup[i].size(); j++) { cout << minFGroup[i][j] << " "; }
+ cout << endl;
+ }
+ cout << endl;
+ for (int i = 0; i < minRGroup.size(); i++) {
+ cout << i << '\t';
+ for (int j = 0; j < minRGroup[i].size(); j++) { cout << minRGroup[i][j] << " "; }
+ cout << endl;
+ }
+ cout << endl;*/
+ if(minDiff > pdiffs) { success = minDiff; } //no good matches
+ else {
+ bool foundMatch = false;
+ vector<int> matches;
+ for (int i = 0; i < minFGroup.size(); i++) {
+ for (int j = 0; j < minFGroup[i].size(); j++) {
+ for (int k = 0; k < minRGroup.size(); k++) {
+ if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+ }
+ }
+ }
+
+ int fStart = 0;
+ int rStart = 0;
+ if (matches.size() == 1) {
+ foundMatch = true;
+ group = matches[0];
+ fStart = minFPos[0];
+ rStart = rawSeq.length() - minRPos[0];
+ }
+
+ //we have an acceptable match for the forward and reverse, but do they match?
+ if (foundMatch) {
+ if (!keepForward) {
+ string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
+ seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
+ if(qual.getName() != ""){
+ qual.trimQScores(-1, rStart);
+ qual.trimQScores(fStart, -1);
+ }
+ }
+ success = minDiff;
+ //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
+ }else { success = pdiffs + 100; }
+ }
+ }
+
+ if (alignment != NULL) { delete alignment; }
+ }
+
+ return success;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimOligos", "stripPairedPrimers");
+ exit(1);
+ }
+
+}
+
//*******************************************************************/
int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
try {
}
//cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
+ alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
+ alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
+ alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
+ alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
//*******************************************************************/
int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
try {
+
+ if (paired) { int success = stripPairedPrimers(seq, qual, group, keepForward); return success; }
+
string rawSequence = seq.getUnaligned();
int success = pdiffs + 1; //guilty until proven innocent
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
+ alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
+ alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
+ alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
+ alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
}
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
+ alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
oligo = alignment->getSeqAAln();
string temp = alignment->getSeqBAln();
exit(1);
}
}
+//********************************************************************/
+string TrimOligos::reverseOligo(string oligo){
+ try {
+ string reverse = "";
+
+ for(int i=oligo.length()-1;i>=0;i--){
+
+ if(oligo[i] == 'A') { reverse += 'T'; }
+ else if(oligo[i] == 'T'){ reverse += 'A'; }
+ else if(oligo[i] == 'U'){ reverse += 'A'; }
+
+ else if(oligo[i] == 'G'){ reverse += 'C'; }
+ else if(oligo[i] == 'C'){ reverse += 'G'; }
+
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }
+
+ else if(oligo[i] == 'M'){ reverse += 'K'; }
+ else if(oligo[i] == 'K'){ reverse += 'M'; }
+
+ else if(oligo[i] == 'W'){ reverse += 'W'; }
+ else if(oligo[i] == 'S'){ reverse += 'S'; }
+
+ else if(oligo[i] == 'B'){ reverse += 'V'; }
+ else if(oligo[i] == 'V'){ reverse += 'B'; }
+
+ else if(oligo[i] == 'D'){ reverse += 'H'; }
+ else if(oligo[i] == 'H'){ reverse += 'D'; }
+
+ else if(oligo[i] == '-'){ reverse += '-'; }
+ else { reverse += 'N'; }
+ }
+
+
+ return reverse;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimOligos", "reverseOligo");
+ exit(1);
+ }
+}
+
/********************************************************************/
bool findForward(Sequence&, int&, int&);
bool findReverse(Sequence&, int&, int&);
+ string reverseOligo(string);
private:
int pdiffs, bdiffs, ldiffs, sdiffs;
+ bool paired;
map<string, int> barcodes;
map<string, int> primers;
MothurOut* m;
bool compareDNASeq(string, string);
- int countDiffs(string, string);
+ int countDiffs(string, string);
+
+ int stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group);
+ int stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool);
};
#endif
CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
+ CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
string helpString = "";
helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
- helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+ helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
helpString += "The fasta parameter is required.\n";
helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
+ helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
helpString += "The oligos parameter allows you to provide an oligos file.\n";
helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
TrimSeqsCommand::TrimSeqsCommand(){
try {
- abort = true; calledHelp = true;
+ abort = true; calledHelp = true;
setParameters();
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
keepforward = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
+ reorient = m->isTrue(temp);
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
if (abort == true) { if (calledHelp) { return 0; } return 2; }
+ pairedOligos = false;
numFPrimers = 0; //this needs to be initialized
numRPrimers = 0;
numSpacers = 0;
}
}
+ if (m->control_pressed) { return 0; }
+
//fills lines and qlines
setLines(fastaFile, qFileName);
int count = 0;
bool moreSeqs = 1;
- TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
-
+ int numBarcodes = barcodes.size();
+ TrimOligos* trimOligos = NULL;
+ if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
+ else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
+
+ TrimOligos* rtrimOligos = NULL;
+ if (reorient) {
+ //create reoriented primer and barcode pairs
+ map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
+ for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
+ cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl;
+ cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
+ oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
+ rpairedPrimers[it->first] = tempPair;
+ }
+ for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
+ cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl;
+ cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
+ oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
+ rpairedBarcodes[it->first] = tempPair;
+ }
+ rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+ }
+
while (moreSeqs) {
- if (m->control_pressed) {
+ if (m->control_pressed) {
+ delete trimOligos; if (reorient) { delete rtrimOligos; }
inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
int primerIndex = 0;
if(numLinkers != 0){
- success = trimOligos.stripLinker(currSeq, currQual);
+ success = trimOligos->stripLinker(currSeq, currQual);
if(success > ldiffs) { trashCode += 'k'; }
else{ currentSeqsDiffs += success; }
}
- if(barcodes.size() != 0){
- success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
+ if(numBarcodes != 0){
+ success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
if(success > bdiffs) { trashCode += 'b'; }
else{ currentSeqsDiffs += success; }
}
if(numSpacers != 0){
- success = trimOligos.stripSpacer(currSeq, currQual);
+ success = trimOligos->stripSpacer(currSeq, currQual);
if(success > sdiffs) { trashCode += 's'; }
else{ currentSeqsDiffs += success; }
}
if(numFPrimers != 0){
- success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
+ success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
if(success > pdiffs) { trashCode += 'f'; }
else{ currentSeqsDiffs += success; }
}
if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
if(numRPrimers != 0){
- success = trimOligos.stripReverse(currSeq, currQual);
+ success = trimOligos->stripReverse(currSeq, currQual);
if(!success) { trashCode += 'r'; }
}
-
+
+ if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
+ int thisSuccess = 0;
+ string thisTrashCode = "";
+ int thisCurrentSeqsDiffs = 0;
+
+ int thisBarcodeIndex = 0;
+ int thisPrimerIndex = 0;
+
+ if(numBarcodes != 0){
+ thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+ if(thisSuccess > bdiffs) { thisTrashCode += 'b'; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if(numFPrimers != 0){
+ thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
+ if(thisSuccess > pdiffs) { thisTrashCode += 'f'; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; }
+
+ if (thisTrashCode == "") {
+ trashCode = thisTrashCode;
+ success = thisSuccess;
+ currentSeqsDiffs = thisCurrentSeqsDiffs;
+ barcodeIndex = thisBarcodeIndex;
+ primerIndex = thisPrimerIndex;
+ currSeq.reverseComplement();
+ if(qFileName != ""){
+ currQual.flipQScores();
+ }
+ }
+ }
+
if(keepFirst != 0){
success = keepFirstTrim(currSeq, currQual);
}
if(trashCode.length() == 0){
string thisGroup = "";
if (createGroup) {
- if(barcodes.size() != 0){
+ if(numBarcodes != 0){
thisGroup = barcodeNameVector[barcodeIndex];
- if (primers.size() != 0) {
+ if (numFPrimers != 0) {
if (primerNameVector[primerIndex] != "") {
if(thisGroup != "") {
thisGroup += "." + primerNameVector[primerIndex];
}
if (createGroup) {
- if(barcodes.size() != 0){
+ if(numBarcodes != 0){
if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
//report progress
if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
-
+ delete trimOligos;
+ if (reorient) { delete rtrimOligos; }
inFASTA.close();
trimFASTAFile.close();
scrapFASTAFile.close();
tempPrimerQualFileNames,
tempNameFileNames,
lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
- pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer,
+ pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
- minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
+ minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
pDataArray.push_back(tempTrim);
hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
string sname = ""; nameStream >> sname;
sname = sname.substr(1);
+ for (int i = 0; i < sname.length(); i++) {
+ if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+ }
+
map<string, int>::iterator it = firstSeqNames.find(sname);
if(it != firstSeqNames.end()) { //this is the start of a new chunk
ofstream test;
- string type, oligo, group;
+ string type, oligo, roligo, group;
+ bool hasPrimer = false; bool hasPairedBarcodes = false;
int indexPrimer = 0;
int indexBarcode = 0;
+ int indexPairedPrimer = 0;
+ int indexPairedBarcode = 0;
+ set<string> uniquePrimers;
+ set<string> uniqueBarcodes;
while(!inOligos.eof()){
primers[oligo]=indexPrimer; indexPrimer++;
primerNameVector.push_back(group);
}
+ else if (type == "PRIMER"){
+ m->gobble(inOligos);
+
+ inOligos >> roligo;
+
+ for(int i=0;i<roligo.length();i++){
+ roligo[i] = toupper(roligo[i]);
+ if(roligo[i] == 'U') { roligo[i] = 'T'; }
+ }
+ roligo = reverseOligo(roligo);
+
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ oligosPair newPrimer(oligo, roligo);
+
+ //check for repeat barcodes
+ string tempPair = oligo+roligo;
+ if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
+ else { uniquePrimers.insert(tempPair); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
+
+ pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+ primerNameVector.push_back(group);
+ hasPrimer = true;
+ }
else if(type == "REVERSE"){
//Sequence oligoRC("reverse", oligo);
//oligoRC.reverseComplement();
}
else if(type == "BARCODE"){
inOligos >> group;
-
- //check for repeat barcodes
- map<string, int>::iterator itBar = barcodes.find(oligo);
- if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
- barcodes[oligo]=indexBarcode; indexBarcode++;
- barcodeNameVector.push_back(group);
+ //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
+ //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
+
+ string temp = "";
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { temp += c; }
+ }
+
+ //then this is illumina data with 4 columns
+ if (temp != "") {
+ hasPairedBarcodes = true;
+ string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+ group = temp;
+
+ for(int i=0;i<reverseBarcode.length();i++){
+ reverseBarcode[i] = toupper(reverseBarcode[i]);
+ if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
+ }
+
+ oligosPair newPair(oligo, reverseOligo(reverseBarcode));
+
+ if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+
+ //check for repeat barcodes
+ string tempPair = oligo+reverseBarcode;
+ if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
+ else { uniqueBarcodes.insert(tempPair); }
+
+ pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+ barcodeNameVector.push_back(group);
+ }else {
+ //check for repeat barcodes
+ map<string, int>::iterator itBar = barcodes.find(oligo);
+ if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ barcodes[oligo]=indexBarcode; indexBarcode++;
+ barcodeNameVector.push_back(group);
+ }
}else if(type == "LINKER"){
linker.push_back(oligo);
}else if(type == "SPACER"){
}
inOligos.close();
+ if (hasPairedBarcodes || hasPrimer) {
+ pairedOligos = true;
+ if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
+ }
+
if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0; }
//add in potential combos
if(allFiles){
set<string> uniqueNames; //used to cleanup outputFileNames
- for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
- for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
-
- string primerName = primerNameVector[itPrimer->second];
- string barcodeName = barcodeNameVector[itBar->second];
-
- if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
- else {
- string comboGroupName = "";
- string fastaFileName = "";
- string qualFileName = "";
- string nameFileName = "";
- string countFileName = "";
+ if (pairedOligos) {
+ for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
- if(primerName == ""){
- comboGroupName = barcodeNameVector[itBar->second];
- }
- else{
- if(barcodeName == ""){
- comboGroupName = primerNameVector[itPrimer->second];
+ string primerName = primerNameVector[itPrimer->first];
+ string barcodeName = barcodeNameVector[itBar->first];
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->first];
}
else{
- comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->first];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+ }
+ }
+
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+ variables["[tag]"] = comboGroupName;
+ fastaFileName = getOutputFileName("fasta", variables);
+ if (uniqueNames.count(fastaFileName) == 0) {
+ outputNames.push_back(fastaFileName);
+ outputTypes["fasta"].push_back(fastaFileName);
+ uniqueNames.insert(fastaFileName);
+ }
+
+ fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
+ m->openOutputFile(fastaFileName, temp); temp.close();
+
+ if(qFileName != ""){
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+ qualFileName = getOutputFileName("qfile", variables);
+ if (uniqueNames.count(qualFileName) == 0) {
+ outputNames.push_back(qualFileName);
+ outputTypes["qfile"].push_back(qualFileName);
+ }
+
+ qualFileNames[itBar->first][itPrimer->first] = qualFileName;
+ m->openOutputFile(qualFileName, temp); temp.close();
+ }
+
+ if(nameFile != ""){
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+ nameFileName = getOutputFileName("name", variables);
+ if (uniqueNames.count(nameFileName) == 0) {
+ outputNames.push_back(nameFileName);
+ outputTypes["name"].push_back(nameFileName);
+ }
+
+ nameFileNames[itBar->first][itPrimer->first] = nameFileName;
+ m->openOutputFile(nameFileName, temp); temp.close();
}
}
+ }
+ }
+ }else {
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+ string primerName = primerNameVector[itPrimer->second];
+ string barcodeName = barcodeNameVector[itBar->second];
- ofstream temp;
- map<string, string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
- variables["[tag]"] = comboGroupName;
- fastaFileName = getOutputFileName("fasta", variables);
- if (uniqueNames.count(fastaFileName) == 0) {
- outputNames.push_back(fastaFileName);
- outputTypes["fasta"].push_back(fastaFileName);
- uniqueNames.insert(fastaFileName);
- }
-
- fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
- m->openOutputFile(fastaFileName, temp); temp.close();
-
- if(qFileName != ""){
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
- qualFileName = getOutputFileName("qfile", variables);
- if (uniqueNames.count(qualFileName) == 0) {
- outputNames.push_back(qualFileName);
- outputTypes["qfile"].push_back(qualFileName);
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastaFileName = "";
+ string qualFileName = "";
+ string nameFileName = "";
+ string countFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->second];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->second];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+ }
}
- qualFileNames[itBar->second][itPrimer->second] = qualFileName;
- m->openOutputFile(qualFileName, temp); temp.close();
- }
-
- if(nameFile != ""){
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
- nameFileName = getOutputFileName("name", variables);
- if (uniqueNames.count(nameFileName) == 0) {
- outputNames.push_back(nameFileName);
- outputTypes["name"].push_back(nameFileName);
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+ variables["[tag]"] = comboGroupName;
+ fastaFileName = getOutputFileName("fasta", variables);
+ if (uniqueNames.count(fastaFileName) == 0) {
+ outputNames.push_back(fastaFileName);
+ outputTypes["fasta"].push_back(fastaFileName);
+ uniqueNames.insert(fastaFileName);
}
- nameFileNames[itBar->second][itPrimer->second] = nameFileName;
- m->openOutputFile(nameFileName, temp); temp.close();
+ fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
+ m->openOutputFile(fastaFileName, temp); temp.close();
+
+ if(qFileName != ""){
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+ qualFileName = getOutputFileName("qfile", variables);
+ if (uniqueNames.count(qualFileName) == 0) {
+ outputNames.push_back(qualFileName);
+ outputTypes["qfile"].push_back(qualFileName);
+ }
+
+ qualFileNames[itBar->second][itPrimer->second] = qualFileName;
+ m->openOutputFile(qualFileName, temp); temp.close();
+ }
+
+ if(nameFile != ""){
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+ nameFileName = getOutputFileName("name", variables);
+ if (uniqueNames.count(nameFileName) == 0) {
+ outputNames.push_back(nameFileName);
+ outputTypes["name"].push_back(nameFileName);
+ }
+
+ nameFileNames[itBar->second][itPrimer->second] = nameFileName;
+ m->openOutputFile(nameFileName, temp); temp.close();
+ }
}
}
- }
- }
+ }
+ }
}
numFPrimers = primers.size();
+ if (pairedOligos) { numFPrimers = pairedPrimers.size(); }
numRPrimers = revPrimer.size();
numLinkers = linker.size();
numSpacers = spacer.size();
bool abort, createGroup;
string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
- bool flip, allFiles, qtrim, keepforward;
+ bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
int qWindowSize, qWindowStep, keepFirst, removeLast;
double qRollAverage, qThreshold, qWindowAverage, qAverage;
vector<string> revPrimer, outputNames;
set<string> filesToRemove;
+ map<int, oligosPair> pairedBarcodes;
+ map<int, oligosPair> pairedPrimers;
map<string, int> barcodes;
vector<string> groupVector;
map<string, int> primers;
vector<vector<string> > qualFileNames;
vector<vector<string> > nameFileNames;
unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
- bool flip, allFiles, qtrim, keepforward, createGroup;
+ bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
int qWindowSize, qWindowStep, keepFirst, removeLast, count;
double qRollAverage, qThreshold, qWindowAverage, qAverage;
map<string, int> groupCounts;
map<string, string> nameMap;
map<string, string> groupMap;
+ map<int, oligosPair> pairedBarcodes;
+ map<int, oligosPair> pairedPrimers;
trimData(){}
trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout,
- int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa,
+ int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
- int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm, map<string, int> ncount) {
+ int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
filename = fn;
qFileName = qn;
nameFile = nf;
sdiffs = sd;
tdiffs = td;
barcodes = bar;
+ pairedPrimers = ppr;
+ pairedBarcodes = pbr;
+ pairedOligos = po;
primers = pri; numFPrimers = primers.size();
revPrimer = revP; numRPrimers = revPrimer.size();
linker = li; numLinkers = linker.size();
maxHomoP = maxH;
maxLength = maxL;
flip = fli;
+ reorient = reo;
nameMap = nm;
count = 0;
}
}
}
-
- TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
+ TrimOligos* trimOligos = NULL;
+ int numBarcodes = pDataArray->barcodes.size();
+ if (pDataArray->pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes); numBarcodes = pDataArray->pairedBarcodes.size(); }
+ else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer); }
+
+ TrimOligos* rtrimOligos = NULL;
+ if (pDataArray->reorient) {
+ //create reoriented primer and barcode pairs
+ map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
+ for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
+ oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
+ rpairedPrimers[it->first] = tempPair;
+ }
+ for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
+ oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
+ rpairedBarcodes[it->first] = tempPair;
+ }
+ rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+ }
pDataArray->count = 0;
for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
- if (pDataArray->m->control_pressed) {
+ if (pDataArray->m->control_pressed) {
+ delete trimOligos;
inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
if ((pDataArray->createGroup) && (pDataArray->countfile == "")) { outGroupsFile.close(); }
if(pDataArray->qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
int primerIndex = 0;
if(pDataArray->numLinkers != 0){
- success = trimOligos.stripLinker(currSeq, currQual);
+ success = trimOligos->stripLinker(currSeq, currQual);
if(success > pDataArray->ldiffs) { trashCode += 'k'; }
else{ currentSeqsDiffs += success; }
}
- if(pDataArray->barcodes.size() != 0){
- success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
+ if(numBarcodes != 0){
+ success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
if(success > pDataArray->bdiffs) { trashCode += 'b'; }
else{ currentSeqsDiffs += success; }
}
if(pDataArray->numSpacers != 0){
- success = trimOligos.stripSpacer(currSeq, currQual);
+ success = trimOligos->stripSpacer(currSeq, currQual);
if(success > pDataArray->sdiffs) { trashCode += 's'; }
else{ currentSeqsDiffs += success; }
}
if(pDataArray->numFPrimers != 0){
- success = trimOligos.stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
+ success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
if(success > pDataArray->pdiffs) { trashCode += 'f'; }
else{ currentSeqsDiffs += success; }
}
if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; }
if(pDataArray->numRPrimers != 0){
- success = trimOligos.stripReverse(currSeq, currQual);
+ success = trimOligos->stripReverse(currSeq, currQual);
if(!success) { trashCode += 'r'; }
}
+ if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse
+ int thisSuccess = 0;
+ string thisTrashCode = "";
+ int thisCurrentSeqsDiffs = 0;
+
+ int thisBarcodeIndex = 0;
+ int thisPrimerIndex = 0;
+
+ if(numBarcodes != 0){
+ thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+ if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if(pDataArray->numFPrimers != 0){
+ thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
+ if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; }
+ else{ thisCurrentSeqsDiffs += thisSuccess; }
+ }
+
+ if (thisCurrentSeqsDiffs > pDataArray->tdiffs) { thisTrashCode += 't'; }
+
+ if (thisTrashCode == "") {
+ trashCode = thisTrashCode;
+ success = thisSuccess;
+ currentSeqsDiffs = thisCurrentSeqsDiffs;
+ barcodeIndex = thisBarcodeIndex;
+ primerIndex = thisPrimerIndex;
+ }
+ }
+
+
if(pDataArray->keepFirst != 0){
//success = keepFirstTrim(currSeq, currQual);
success = 1;
if(trashCode.length() == 0){
string thisGroup = "";
if (pDataArray->createGroup) {
- if(pDataArray->barcodes.size() != 0){
+ if(numBarcodes != 0){
thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
- if (pDataArray->primers.size() != 0) {
+ if (pDataArray->numFPrimers != 0) {
if (pDataArray->primerNameVector[primerIndex] != "") {
if(thisGroup != "") {
thisGroup += "." + pDataArray->primerNameVector[primerIndex];
}
if (pDataArray->createGroup) {
- if(pDataArray->barcodes.size() != 0){
+ if(numBarcodes != 0){
if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
else { pDataArray->groupMap[currSeq.getName()] = thisGroup; }
//report progress
if((pDataArray->count) % 1000 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
-
+ delete trimOligos;
inFASTA.close();
trimFASTAFile.close();
scrapFASTAFile.close();