//**********************************************************************************************************************
vector<string> PcrSeqsCommand::setParameters(){
try {
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(poligos);
- CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
- CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
- CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
- CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(pecoli);
- CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
- CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
- CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
- CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
- CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
+ CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false,true); parameters.push_back(poligos);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
+ CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptax);
+ CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false); parameters.push_back(pecoli);
+ CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
+ CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
+ CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
+ CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
try {
string helpString = "";
helpString += "The pcr.seqs command reads a fasta file.\n";
- helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
+ helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
helpString += "The start parameter allows you to provide a starting position to trim to.\n";
helpString += "The end parameter allows you to provide a ending position to trim from.\n";
helpString += "The processors parameter allows you to use multiple processors.\n";
helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
return helpString;
exit(1);
}
}
-
//**********************************************************************************************************************
-string PcrSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
- try {
- string outputFileName = "";
- map<string, vector<string> >::iterator it;
+string PcrSeqsCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
- //is this a type this command creates
- it = outputTypes.find(type);
- if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
- else {
- if (type == "fasta") { outputFileName = "pcr.fasta"; }
- else if (type == "taxonomy") { outputFileName = "pcr" + m->getExtension(inputName); }
- else if (type == "group") { outputFileName = "pcr" + m->getExtension(inputName); }
- else if (type == "name") { outputFileName = "pcr" + m->getExtension(inputName); }
- else if (type == "count") { outputFileName = "pcr" + m->getExtension(inputName); }
- else if (type == "accnos") { outputFileName = "bad.accnos"; }
- else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
- }
- return outputFileName;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "getOutputFileNameTag");
- exit(1);
- }
+ if (type == "fasta") { pattern = "[filename],pcr,[extension]-[filename],[tag],pcr,[extension]"; }
+ else if (type == "taxonomy") { pattern = "[filename],pcr,[extension]"; }
+ else if (type == "name") { pattern = "[filename],pcr,[extension]"; }
+ else if (type == "group") { pattern = "[filename],pcr,[extension]"; }
+ else if (type == "count") { pattern = "[filename],pcr,[extension]"; }
+ else if (type == "accnos") { pattern = "[filename],bad.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "getOutputPattern");
+ exit(1);
+ }
}
//**********************************************************************************************************************
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
+
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, pdiffs);
nomatch = validParameter.validFile(parameters, "nomatch", false); if (nomatch == "not found") { nomatch = "reject"; }
if (abort == true) { if (calledHelp) { return 0; } return 2; }
int start = time(NULL);
+ fileAligned = true;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
+ variables["[extension]"] = m->getExtension(fastafile);
+ string trimSeqFile = getOutputFileName("fasta",variables);
outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
-
- string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "scrap." + getOutputFileNameTag("fasta");
+ variables["[tag]"] = "scrap";
+ string badSeqFile = getOutputFileName("fasta",variables);
length = 0;
- if(oligosfile != ""){ readOligos(); } if (m->control_pressed) { return 0; }
+ if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } } if (m->control_pressed) { return 0; }
if(ecolifile != "") { readEcoli(); } if (m->control_pressed) { return 0; }
vector<unsigned long long> positions;
if (m->control_pressed) { return 0; }
set<string> badNames;
- if(processors == 1) { numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]); }
- else { numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames); }
+ numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);
if (m->control_pressed) { return 0; }
vector<int> processIDS;
int process = 1;
int num = 0;
+ int pstart = -1; int pend = -1;
+ bool adjustNeeded = false;
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
+ string locationsFile = toString(getpid()) + ".temp";
+ num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
//pass numSeqs to parent
ofstream out;
string tempFile = filename + toString(getpid()) + ".num.temp";
m->openOutputFile(tempFile, out);
+ out << pstart << '\t' << adjustNeeded << endl;
out << num << '\t' << badSeqNames.size() << endl;
for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
out << (*it) << endl;
}
}
- num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
+ string locationsFile = toString(getpid()) + ".temp";
+ num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
string tempFile = filename + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
int numBadNames = 0; string name = "";
- if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
+ int tpstart = -1; bool tempAdjust = false;
+
+ if (!in.eof()) {
+ in >> tpstart >> tempAdjust; m->gobble(in);
+
+ if (tempAdjust) { adjustNeeded = true; }
+ if (tpstart != -1) {
+ if (tpstart != pstart) { adjustNeeded = true; }
+ if (tpstart < pstart) { pstart = tpstart; } //smallest start
+ }
+ int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in);
+ }
for (int j = 0; j < numBadNames; j++) {
in >> name; m->gobble(in);
badSeqNames.insert(name);
m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((toString(processIDS[i]) + ".temp"), locationsFile);
+ m->mothurRemove((toString(processIDS[i]) + ".temp"));
}
#else
DWORD dwThreadIdArray[processors-1];
HANDLE hThreadArray[processors-1];
+ string locationsFile = "locationsFile.txt";
+ m->mothurRemove(locationsFile);
+ m->mothurRemove(goodFileName);
+ m->mothurRemove(badFileName);
+
//Create processor worker threads.
for( int i=0; i<processors-1; i++ ){
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
}
//do your part
- num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
+ num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, adjustNeeded);
processIDS.push_back(processors-1);
//Wait until all threads have terminated.
//Close all thread handles and free memory allocations.
for(int i=0; i < pDataArray.size(); i++){
num += pDataArray[i]->count;
+ if (pDataArray[i]->count != pDataArray[i]->fend) {
+ m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->fend) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
+ }
+ if (pDataArray[i]->adjustNeeded) { adjustNeeded = true; }
+ if (pDataArray[i]->pstart != -1) {
+ if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; }
+ if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; }
+ } //smallest start
+
for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it); }
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((locationsFile+toString(processIDS[i]) + ".temp"), locationsFile);
+ m->mothurRemove((locationsFile+toString(processIDS[i]) + ".temp"));
}
#endif
+
+
+ if (fileAligned) {
+ //find pend - pend is the biggest ending value, but we must account for when we adjust the start. That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
+ ifstream inLocations;
+ m->openInputFile(locationsFile, inLocations);
+
+ while(!inLocations.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string name = "";
+ int thisStart = -1; int thisEnd = -1;
+ if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+ else { pend = -1; break; }
+
+ int myDiff = 0;
+ if (pstart != -1) {
+ if (thisStart != -1) {
+ if (thisStart != pstart) { myDiff += (thisStart - pstart); }
+ }
+ }
+
+ int myEnd = thisEnd + myDiff;
+ //cout << name << '\t' << thisStart << '\t' << thisEnd << " diff = " << myDiff << '\t' << myEnd << endl;
+
+ if (thisEnd != -1) {
+ if (myEnd > pend) { pend = myEnd; }
+ }
+
+ }
+ inLocations.close();
+
+ adjustDots(goodFileName, locationsFile, pstart, pend);
+ }
+
return num;
}
}
//**********************************************************************************************************************
-int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
+int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, bool& adjustNeeded){
try {
ofstream goodFile;
m->openOutputFile(goodFasta, goodFile);
ofstream badFile;
m->openOutputFile(badFasta, badFile);
+
+ ofstream locationsFile;
+ m->openOutputFile(locationsName, locationsFile);
ifstream inFASTA;
m->openInputFile(filename, inFASTA);
bool done = false;
int count = 0;
set<int> lengths;
+ set<int> locations; //locations[0] = beginning locations,
+
+ //pdiffs, bdiffs, primers, barcodes, revPrimers
+ map<string, int> faked;
+ TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
while (!done) {
Sequence currSeq(inFASTA); m->gobble(inFASTA);
+ if (fileAligned) { //assume aligned until proven otherwise
+ lengths.insert(currSeq.getAligned().length());
+ if (lengths.size() > 1) { fileAligned = false; }
+ }
+
string trashCode = "";
+ string locationsString = "";
+ int thisPStart = -1;
+ int thisPEnd = -1;
if (currSeq.getName() != "") {
+ if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); }
+
bool goodSeq = true;
if (oligosfile != "") {
map<int, int> mapAligned;
//process primers
if (primers.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- bool good = findForward(currSeq, primerStart, primerEnd);
+ bool good = trim.findForward(currSeq, primerStart, primerEnd);
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
else{
//are you aligned
if (aligned) {
- if (!keepprimer) {
- if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); }
+ if (!keepprimer) {
+ if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd-1]+1); } //mapAligned[primerEnd-1] is the location of the last base in the primer. we want to trim to the space just after that. The -1 & +1 ensures if the primer is followed by gaps they are not trimmed causing an aligned sequence dataset to become unaligned.
+ else {
+ currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
+ if (fileAligned) {
+ thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1);
+ locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
+ }
+ }
}
else {
if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
+ else {
+ currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
+ if (fileAligned) {
+ thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]);
+ locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
+ }
+ }
}
+ isAligned(currSeq.getAligned(), mapAligned);
}else {
if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
//process reverse primers
if (revPrimer.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- bool good = findReverse(currSeq, primerStart, primerEnd);
+ bool good = trim.findReverse(currSeq, primerStart, primerEnd);
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
- else{
+ else{
//are you aligned
if (aligned) {
if (!keepprimer) {
if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); }
+ else {
+ currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
+ if (fileAligned) {
+ thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]);
+ locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
+ }
+ }
}
else {
- if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); }
+ if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
+ else {
+ currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
+ if (fileAligned) {
+ thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1);
+ locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
+ }
+ }
}
}
else {
}
}else if (ecolifile != "") {
//make sure the seqs are aligned
- lengths.insert(currSeq.getAligned().length());
- if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
+ if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
else if (currSeq.getAligned().length() != length) {
m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break;
}else {
}
}else{ //using start and end to trim
//make sure the seqs are aligned
- lengths.insert(currSeq.getAligned().length());
- if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
+ if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
else {
if (end != -1) {
if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
//trimming removed all bases
if (currSeq.getUnaligned() == "") { goodSeq = false; }
- if(goodSeq == 1) { currSeq.printSequence(goodFile); }
+ if(goodSeq == 1) {
+ currSeq.printSequence(goodFile);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); }
+ if (thisPStart != -1) { locations.insert(thisPStart); }
+ if (locationsString != "") { locationsFile << locationsString; }
+ }
else {
badSeqNames.insert(currSeq.getName());
currSeq.setName(currSeq.getName() + '|' + trashCode);
#endif
//report progress
- if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n"); }
}
//report progress
- if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n"); }
badFile.close();
goodFile.close();
inFASTA.close();
-
+ locationsFile.close();
+
+ if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); }
+
+ if (fileAligned && !keepdots) { //print out smallest start value and largest end value
+ if (locations.size() > 1) { adjustNeeded = true; }
+ if (primers.size() != 0) { set<int>::iterator it = locations.begin(); pstart = *it; }
+ }
+
return count;
}
catch(exception& e) {
}
}
//********************************************************************/
-bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
- try {
-
- string rawSequence = seq.getUnaligned();
-
- for(int j=0;j<primers.size();j++){
- string oligo = primers[j];
-
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int j = 0; j < rawSequence.length()-olength; j++){
- if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
- string rawChunk = rawSequence.substr(j, olength);
- if(compareDNASeq(oligo, rawChunk)) {
- primerStart = j;
- primerEnd = primerStart + olength;
- return true;
- }
-
- }
- }
-
- primerStart = 0; primerEnd = 0;
- return false;
-
- }
- catch(exception& e) {
- m->errorOut(e, "TrimOligos", "stripForward");
- exit(1);
- }
-}
-//******************************************************************/
-bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
- try {
-
- string rawSequence = seq.getUnaligned();
-
- for(int i=0;i<revPrimer.size();i++){
- string oligo = revPrimer[i];
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int j = rawSequence.length()-olength; j >= 0; j--){
- if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
- string rawChunk = rawSequence.substr(j, olength);
-
- if(compareDNASeq(oligo, rawChunk)) {
- primerStart = j;
- primerEnd = primerStart + olength;
- return true;
- }
-
- }
- }
-
- primerStart = 0; primerEnd = 0;
- return false;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "findReverse");
- exit(1);
- }
-}
-//********************************************************************/
bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
try {
+ aligned.clear();
bool isAligned = false;
int countBases = 0;
exit(1);
}
}
+//**********************************************************************************************************************
+int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, int pend){
+ try {
+ ifstream inFasta;
+ m->openInputFile(goodFasta, inFasta);
+
+ ifstream inLocations;
+ m->openInputFile(locations, inLocations);
+
+ ofstream out;
+ m->openOutputFile(goodFasta+".temp", out);
+
+ set<int> lengths;
+ //cout << pstart << '\t' << pend << endl;
+ //if (pstart > pend) { //swap them
+
+ while(!inFasta.eof()) {
+ if(m->control_pressed) { break; }
+
+ Sequence seq(inFasta); m->gobble(inFasta);
+
+ string name = "";
+ int thisStart = -1; int thisEnd = -1;
+ if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+
+
+ //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
+ //cout << seq.getName() << '\t' << pstart << '\t' << pend << endl;
+
+ if (name != seq.getName()) { m->mothurOut("[ERROR]: name mismatch in pcr.seqs.\n"); }
+ else {
+ if (pstart != -1) {
+ if (thisStart != -1) {
+ if (thisStart != pstart) {
+ string dots = "";
+ for (int i = pstart; i < thisStart; i++) { dots += "."; }
+ thisEnd += dots.length();
+ dots += seq.getAligned();
+ seq.setAligned(dots);
+ }
+ }
+ }
+
+ if (pend != -1) {
+ if (thisEnd != -1) {
+ if (thisEnd != pend) {
+ string dots = seq.getAligned();
+ for (int i = thisEnd; i < pend; i++) { dots += "."; }
+ seq.setAligned(dots);
+ }
+ }
+ }
+ lengths.insert(seq.getAligned().length());
+ }
+
+ seq.printSequence(out);
+ }
+ inFasta.close();
+ inLocations.close();
+ out.close();
+ m->mothurRemove(locations);
+ m->mothurRemove(goodFasta);
+ m->renameFile(goodFasta+".temp", goodFasta);
+
+ //cout << "final lengths = \n";
+ //for (set<int>::iterator it = lengths.begin(); it != lengths.end(); it++) {
+ //cout << *it << endl;
+ // cout << lengths.count(*it) << endl;
+ // }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "adjustDots");
+ exit(1);
+ }
+}
//********************************************************************/
string PcrSeqsCommand::reverseOligo(string oligo){
try {
m->openInputFile(oligosfile, inOligos);
string type, oligo, group;
+ int primerCount = 0;
while(!inOligos.eof()){
// get rest of line in case there is a primer name
while (!inOligos.eof()) {
char c = inOligos.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
}
- primers.push_back(oligo);
+ primers[oligo] = primerCount; primerCount++;
+ //cout << "for oligo = " << oligo << endl;
}else if(type == "REVERSE"){
string oligoRC = reverseOligo(oligo);
revPrimer.push_back(oligoRC);
- //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
+ //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
}else if(type == "BARCODE"){
- inOligos >> group;
+ inOligos >> group;
+ }else if(type == "PRIMER"){
+ m->gobble(inOligos);
+ primers[oligo] = primerCount; primerCount++;
+
+ string roligo="";
+ inOligos >> roligo;
+
+ for(int i=0;i<roligo.length();i++){
+ roligo[i] = toupper(roligo[i]);
+ if(roligo[i] == 'U') { roligo[i] = 'T'; }
+ }
+ revPrimer.push_back(reverseOligo(roligo));
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ }
+ //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
}else if((type == "LINKER")||(type == "SPACER")) {;}
- else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else{ m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
}
m->gobble(inOligos);
}
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
+ string outputFileName = getOutputFileName("accnos",variables);
outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
ofstream out;
exit(1);
}
-}
-//******************************************************************/
-bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
- try {
- bool success = 1;
- int length = oligo.length();
-
- for(int i=0;i<length;i++){
-
- if(oligo[i] != seq[i]){
- if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
- else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
- else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
- else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
- else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
- else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
- else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- return success;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
- exit(1);
- }
-
}
//***************************************************************************************************************
int PcrSeqsCommand::readName(set<string>& names){
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
+ variables["[extension]"] = m->getExtension(namefile);
+ string outputFileName = getOutputFileName("name", variables);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
+ variables["[extension]"] = m->getExtension(groupfile);
+ string outputFileName = getOutputFileName("group", variables);
ofstream out;
m->openOutputFile(outputFileName, out);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
- string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
+ variables["[extension]"] = m->getExtension(taxfile);
+ string outputFileName = getOutputFileName("taxonomy", variables);
+
ofstream out;
m->openOutputFile(outputFileName, out);
m->openInputFile(countfile, in);
set<string>::iterator it;
- string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
+ variables["[extension]"] = m->getExtension(countfile);
+ string goodCountFile = getOutputFileName("count", variables);
+
outputNames.push_back(goodCountFile); outputTypes["count"].push_back(goodCountFile);
ofstream goodCountOut; m->openOutputFile(goodCountFile, goodCountOut);
//check for groups that have been eliminated
CountTable ct;
if (ct.testGroups(goodCountFile)) {
- ct.readTable(goodCountFile);
+ ct.readTable(goodCountFile, true, false);
ct.printTable(goodCountFile);
}