37E5F3E20F29FD4200F8D827 /* treenode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treenode.cpp; sourceTree = SOURCE_ROOT; };
37E5F4900F2A3DA800F8D827 /* readtreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readtreecommand.h; sourceTree = SOURCE_ROOT; };
37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readtreecommand.cpp; sourceTree = SOURCE_ROOT; };
- 7E09C5120FDA79C5002ECAE5 /* reversecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reversecommand.h; sourceTree = "<group>"; };
- 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reversecommand.cpp; sourceTree = "<group>"; };
- 7E09C5340FDA7F65002ECAE5 /* trimseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trimseqscommand.h; sourceTree = "<group>"; };
- 7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trimseqscommand.cpp; sourceTree = "<group>"; };
+ 7E09C5120FDA79C5002ECAE5 /* reversecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reversecommand.h; sourceTree = SOURCE_ROOT; };
+ 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reversecommand.cpp; sourceTree = SOURCE_ROOT; };
+ 7E09C5340FDA7F65002ECAE5 /* trimseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trimseqscommand.h; sourceTree = SOURCE_ROOT; };
+ 7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trimseqscommand.cpp; sourceTree = SOURCE_ROOT; };
7E412F420F8D213C00381DD0 /* libshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = libshuff.h; sourceTree = SOURCE_ROOT; };
7E412F470F8D21B600381DD0 /* slibshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slibshuff.h; sourceTree = SOURCE_ROOT; };
7E412F480F8D21B600381DD0 /* slibshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slibshuff.cpp; sourceTree = SOURCE_ROOT; };
TrimSeqsCommand::TrimSeqsCommand(){
try {
- oligos = 0;
- totalBarcodeCount = 0;
- matchBarcodeCount = 0;
-
globaldata = GlobalData::getInstance();
+
+ oligos = 0;
+
if(globaldata->getFastaFile() == ""){
cout << "you need to at least enter a fasta file name" << endl;
}
- if(isTrue(globaldata->getFlip())) { flip = 1; }
- if(globaldata->getOligosFile() != "") { oligos = 1; }
- if(!flip && !oligos) { cout << "huh?" << endl; }
+ if(isTrue(globaldata->getFlip())) { flip = 1; }
+ if(globaldata->getOligosFile() != "") { oligos = 1; }
+
+ if(globaldata->getMaxAmbig() != "-1") { maxAmbig = atoi(globaldata->getMaxAmbig().c_str()); }
+ else { maxAmbig = -1; }
+
+ if(globaldata->getMaxHomoPolymer() != "-1") { maxHomoP = atoi(globaldata->getMaxHomoPolymer().c_str()); }
+ else { maxHomoP = 0; }
+
+ if(globaldata->getMinLength() != "-1") { minLength = atoi(globaldata->getMinLength().c_str()); }
+ else { minLength = 0; }
+
+ if(globaldata->getMaxLength() != "-1") { maxLength = atoi(globaldata->getMaxLength().c_str()); }
+ else { maxLength = 0; }
+
+ if(!flip && !oligos && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP ){ cout << "huh?" << endl; }
}
catch(exception& e) {
success = stripReverse(currSeq);
if(!success){ trashCode += 'r'; }
}
+ if(minLength > 0 || maxLength > 0){
+ success = cullLength(currSeq);
+ if(!success){ trashCode += 'l'; }
+ }
+ if(maxHomoP > 0){
+ success = cullHomoP(currSeq);
+ if(!success){ trashCode += 'h'; }
+ }
+ if(maxAmbig != -1){
+ success = cullAmbigs(currSeq);
+ if(!success){ trashCode += 'n'; }
+ }
+
if(flip){ currSeq.reverseComplement(); } // should go last
if(trashCode.length() == 0){
if (rawSequence.compare(0,oligo.length(),oligo) == 0){
group = it->second;
seq.setUnaligned(rawSequence.substr(oligo.length()));
- matchBarcodeCount++;
success = 1;
break;
}
}
- totalBarcodeCount++;
return success;
}
if (rawSequence.compare(0,oligo.length(),oligo) == 0){
seq.setUnaligned(rawSequence.substr(oligo.length()));
- matchFPrimerCount++;
success = 1;
break;
}
}
- totalFPrimerCount++;
return success;
}
if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){
seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
- matchRPrimerCount++;
success = 1;
break;
}
}
+ return success;
+
+}
+
+//***************************************************************************************************************
+
+bool TrimSeqsCommand::cullLength(Sequence& seq){
+
+ int length = seq.getNumBases();
+ bool success = 0; //guilty until proven innocent
+
+ if(length >= minLength && maxLength == 0) { success = 1; }
+ else if(length >= minLength && length <= maxLength) { success = 1; }
+ else { success = 0; }
+
+ return success;
+
+}
+
+//***************************************************************************************************************
+
+bool TrimSeqsCommand::cullHomoP(Sequence& seq){
+
+ int longHomoP = seq.getLongHomoPolymer();
+ bool success = 0; //guilty until proven innocent
+
+ if(longHomoP <= maxHomoP){ success = 1; }
+ else { success = 0; }
+
+ return success;
+
+}
+
+//***************************************************************************************************************
+
+bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
+
+ int numNs = seq.getAmbigBases();
+ bool success = 0; //guilty until proven innocent
+
+ if(numNs <= maxAmbig){ success = 1; }
+ else { success = 0; }
- totalRPrimerCount++;
return success;
}
bool stripBarcode(Sequence&, string&);
bool stripForward(Sequence&);
bool stripReverse(Sequence&);
+ bool cullLength(Sequence&);
+ bool cullHomoP(Sequence&);
+ bool cullAmbigs(Sequence&);
GlobalData* globaldata;
- int totalBarcodeCount, matchBarcodeCount; // to be removed
- int totalFPrimerCount, matchFPrimerCount; // to be removed
- int totalRPrimerCount, matchRPrimerCount; // to be removed
-
bool oligos, flip;
- int numFPrimers, numRPrimers;
+ int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength;
vector<string> forPrimer, revPrimer;
map<string, string> barcodes;
};
string reverseseqsArray[] = {"fasta"};
commandParameters["reverse.seqs"] = addParameters(reverseseqsArray, sizeof(reverseseqsArray)/sizeof(string));
- string trimseqsArray[] = {"fasta", "flip", "oligos"};
+ string trimseqsArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength"};
commandParameters["trim.seqs"] = addParameters(trimseqsArray, sizeof(trimseqsArray)/sizeof(string));
string vennArray[] = {"groups","line","label","calc"};