if (abort == true) { if (calledHelp) { return 0; } return 2; }
int start = time(NULL);
+ fileAligned = true;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
if (m->control_pressed) { return 0; }
set<string> badNames;
- if(processors == 1) { numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]); }
- else { numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames); }
+ numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);
if (m->control_pressed) { return 0; }
vector<int> processIDS;
int process = 1;
int num = 0;
+ int pstart = -1; int pend = -1;
+ bool adjustNeeded = false;
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
+ string locationsFile = toString(getpid()) + ".temp";
+ num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
//pass numSeqs to parent
ofstream out;
string tempFile = filename + toString(getpid()) + ".num.temp";
m->openOutputFile(tempFile, out);
+ out << pstart << '\t' << adjustNeeded << endl;
out << num << '\t' << badSeqNames.size() << endl;
for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
out << (*it) << endl;
}
}
- num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
+ string locationsFile = toString(getpid()) + ".temp";
+ num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
string tempFile = filename + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
int numBadNames = 0; string name = "";
- if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
+ int tpstart = -1; bool tempAdjust = false;
+
+ if (!in.eof()) {
+ in >> tpstart >> tempAdjust; m->gobble(in);
+
+ if (tempAdjust) { adjustNeeded = true; }
+ if (tpstart != -1) {
+ if (tpstart != pstart) { adjustNeeded = true; }
+ if (tpstart < pstart) { pstart = tpstart; } //smallest start
+ }
+ int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in);
+ }
for (int j = 0; j < numBadNames; j++) {
in >> name; m->gobble(in);
badSeqNames.insert(name);
m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((toString(processIDS[i]) + ".temp"), locationsFile);
+ m->mothurRemove((toString(processIDS[i]) + ".temp"));
}
#else
DWORD dwThreadIdArray[processors-1];
HANDLE hThreadArray[processors-1];
+ string locationsFile = "locationsFile.txt";
+ m->mothurRemove(locationsFile);
+ m->mothurRemove(goodFileName);
+ m->mothurRemove(badFileName);
+
//Create processor worker threads.
for( int i=0; i<processors-1; i++ ){
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
}
//do your part
- num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
+ num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, adjustNeeded);
processIDS.push_back(processors-1);
//Wait until all threads have terminated.
if (pDataArray[i]->count != pDataArray[i]->fend) {
m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->fend) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
}
+ if (pDataArray[i]->adjustNeeded) { adjustNeeded = true; }
+ if (pDataArray[i]->pstart != -1) {
+ if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; }
+ if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; }
+ } //smallest start
+
for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it); }
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((locationsFile+toString(processIDS[i]) + ".temp"), locationsFile);
+ m->mothurRemove((locationsFile+toString(processIDS[i]) + ".temp"));
}
#endif
+
+
+ if (fileAligned) {
+ //find pend - pend is the biggest ending value, but we must account for when we adjust the start. That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
+ ifstream inLocations;
+ m->openInputFile(locationsFile, inLocations);
+
+ while(!inLocations.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string name = "";
+ int thisStart = -1; int thisEnd = -1;
+ if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+ else { pend = -1; break; }
+
+ int myDiff = 0;
+ if (pstart != -1) {
+ if (thisStart != -1) {
+ if (thisStart != pstart) { myDiff += (thisStart - pstart); }
+ }
+ }
+
+ int myEnd = thisEnd + myDiff;
+ //cout << name << '\t' << thisStart << '\t' << thisEnd << " diff = " << myDiff << '\t' << myEnd << endl;
+
+ if (thisEnd != -1) {
+ if (myEnd > pend) { pend = myEnd; }
+ }
+
+ }
+ inLocations.close();
+
+ adjustDots(goodFileName, locationsFile, pstart, pend);
+ }
+
return num;
}
}
//**********************************************************************************************************************
-int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
+int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, bool& adjustNeeded){
try {
ofstream goodFile;
m->openOutputFile(goodFasta, goodFile);
ofstream badFile;
m->openOutputFile(badFasta, badFile);
+
+ ofstream locationsFile;
+ m->openOutputFile(locationsName, locationsFile);
ifstream inFASTA;
m->openInputFile(filename, inFASTA);
bool done = false;
int count = 0;
set<int> lengths;
+ set<int> locations; //locations[0] = beginning locations,
//pdiffs, bdiffs, primers, barcodes, revPrimers
map<string, int> faked;
Sequence currSeq(inFASTA); m->gobble(inFASTA);
+ if (fileAligned) { //assume aligned until proven otherwise
+ lengths.insert(currSeq.getAligned().length());
+ if (lengths.size() > 1) { fileAligned = false; }
+ }
+
string trashCode = "";
+ string locationsString = "";
+ int thisPStart = -1;
+ int thisPEnd = -1;
if (currSeq.getName() != "") {
if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); }
else{
//are you aligned
if (aligned) {
- if (!keepprimer) {
- if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); }
+ if (!keepprimer) {
+ if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd-1]+1); } //mapAligned[primerEnd-1] is the location of the last base in the primer. we want to trim to the space just after that. The -1 & +1 ensures if the primer is followed by gaps they are not trimmed causing an aligned sequence dataset to become unaligned.
+ else {
+ currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
+ if (fileAligned) {
+ thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1);
+ locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
+ }
+ }
}
else {
if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
+ else {
+ currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
+ if (fileAligned) {
+ thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]);
+ locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
+ }
+ }
}
isAligned(currSeq.getAligned(), mapAligned);
}else {
int primerStart = 0; int primerEnd = 0;
bool good = trim.findReverse(currSeq, primerStart, primerEnd);
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
- else{
- //are you aligned
+ else{
+ //are you aligned
if (aligned) {
if (!keepprimer) {
if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); }
+ else {
+ currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
+ if (fileAligned) {
+ thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]);
+ locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
+ }
+ }
}
else {
- if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); }
+ if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
+ else {
+ currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
+ if (fileAligned) {
+ thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1);
+ locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
+ }
+ }
}
}
else {
}
}else if (ecolifile != "") {
//make sure the seqs are aligned
- lengths.insert(currSeq.getAligned().length());
- if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
+ if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
else if (currSeq.getAligned().length() != length) {
m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break;
}else {
}
}else{ //using start and end to trim
//make sure the seqs are aligned
- lengths.insert(currSeq.getAligned().length());
- if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
+ if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
else {
if (end != -1) {
if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
//trimming removed all bases
if (currSeq.getUnaligned() == "") { goodSeq = false; }
- if(goodSeq == 1) { currSeq.printSequence(goodFile); }
+ if(goodSeq == 1) {
+ currSeq.printSequence(goodFile);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); }
+ if (thisPStart != -1) { locations.insert(thisPStart); }
+ if (locationsString != "") { locationsFile << locationsString; }
+ }
else {
badSeqNames.insert(currSeq.getName());
currSeq.setName(currSeq.getName() + '|' + trashCode);
#endif
//report progress
- if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n"); }
}
//report progress
- if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n"); }
badFile.close();
goodFile.close();
inFASTA.close();
-
+ locationsFile.close();
+
+ if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); }
+
+ if (fileAligned && !keepdots) { //print out smallest start value and largest end value
+ if (locations.size() > 1) { adjustNeeded = true; }
+ if (primers.size() != 0) { set<int>::iterator it = locations.begin(); pstart = *it; }
+ }
+
return count;
}
catch(exception& e) {
exit(1);
}
}
+//**********************************************************************************************************************
+int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, int pend){
+ try {
+ ifstream inFasta;
+ m->openInputFile(goodFasta, inFasta);
+
+ ifstream inLocations;
+ m->openInputFile(locations, inLocations);
+
+ ofstream out;
+ m->openOutputFile(goodFasta+".temp", out);
+
+ set<int> lengths;
+ //cout << pstart << '\t' << pend << endl;
+ //if (pstart > pend) { //swap them
+
+ while(!inFasta.eof()) {
+ if(m->control_pressed) { break; }
+
+ Sequence seq(inFasta); m->gobble(inFasta);
+
+ string name = "";
+ int thisStart = -1; int thisEnd = -1;
+ if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+
+
+ //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
+ //cout << seq.getName() << '\t' << pstart << '\t' << pend << endl;
+
+ if (name != seq.getName()) { m->mothurOut("[ERROR]: name mismatch in pcr.seqs.\n"); }
+ else {
+ if (pstart != -1) {
+ if (thisStart != -1) {
+ if (thisStart != pstart) {
+ string dots = "";
+ for (int i = pstart; i < thisStart; i++) { dots += "."; }
+ thisEnd += dots.length();
+ dots += seq.getAligned();
+ seq.setAligned(dots);
+ }
+ }
+ }
+
+ if (pend != -1) {
+ if (thisEnd != -1) {
+ if (thisEnd != pend) {
+ string dots = seq.getAligned();
+ for (int i = thisEnd; i < pend; i++) { dots += "."; }
+ seq.setAligned(dots);
+ }
+ }
+ }
+ lengths.insert(seq.getAligned().length());
+ }
+
+ seq.printSequence(out);
+ }
+ inFasta.close();
+ inLocations.close();
+ out.close();
+ m->mothurRemove(locations);
+ m->mothurRemove(goodFasta);
+ m->renameFile(goodFasta+".temp", goodFasta);
+
+ //cout << "final lengths = \n";
+ //for (set<int>::iterator it = lengths.begin(); it != lengths.end(); it++) {
+ //cout << *it << endl;
+ // cout << lengths.count(*it) << endl;
+ // }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "adjustDots");
+ exit(1);
+ }
+}
//********************************************************************/
string PcrSeqsCommand::reverseOligo(string oligo){
try {
// get rest of line in case there is a primer name
while (!inOligos.eof()) {
char c = inOligos.get();
- if (c == 10 || c == 13){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
}
primers[oligo] = primerCount; primerCount++;
+ //cout << "for oligo = " << oligo << endl;
}else if(type == "REVERSE"){
string oligoRC = reverseOligo(oligo);
revPrimer.push_back(oligoRC);
- //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
+ //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
}else if(type == "BARCODE"){
- inOligos >> group;
+ inOligos >> group;
+ }else if(type == "PRIMER"){
+ m->gobble(inOligos);
+ primers[oligo] = primerCount; primerCount++;
+
+ string roligo="";
+ inOligos >> roligo;
+
+ for(int i=0;i<roligo.length();i++){
+ roligo[i] = toupper(roligo[i]);
+ if(roligo[i] == 'U') { roligo[i] = 'T'; }
+ }
+ revPrimer.push_back(reverseOligo(roligo));
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ }
+ //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
}else if((type == "LINKER")||(type == "SPACER")) {;}
- else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else{ m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
}
m->gobble(inOligos);
}
//check for groups that have been eliminated
CountTable ct;
if (ct.testGroups(goodCountFile)) {
- ct.readTable(goodCountFile);
+ ct.readTable(goodCountFile, true, false);
ct.printTable(goodCountFile);
}