if (abort == true) { if (calledHelp) { return 0; } return 2; }
int start = time(NULL);
- fileAligned = true;
+ fileAligned = true; pairedOligos = false;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
length = 0;
- if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } } if (m->control_pressed) { return 0; }
+ if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(numFPrimers) + ", revprimers = " + toString(numRPrimers) + ".\n"); } } if (m->control_pressed) { return 0; }
if(ecolifile != "") { readEcoli(); } if (m->control_pressed) { return 0; }
vector<unsigned long long> positions;
//loop through and create all the processes you want
while (process != processors) {
- int pid = fork();
+ pid_t pid = fork();
if (pid > 0) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- string locationsFile = toString(getpid()) + ".temp";
- num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, pend, adjustNeeded);
+ string locationsFile = m->mothurGetpid(process) + ".temp";
+ num = driverPcr(filename, goodFileName + m->mothurGetpid(process) + ".temp", badFileName + m->mothurGetpid(process) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
//pass numSeqs to parent
ofstream out;
- string tempFile = filename + toString(getpid()) + ".num.temp";
+ string tempFile = filename + m->mothurGetpid(process) + ".num.temp";
m->openOutputFile(tempFile, out);
- out << pstart << '\t' << pend << '\t' << adjustNeeded << endl;
+ out << pstart << '\t' << adjustNeeded << endl;
out << num << '\t' << badSeqNames.size() << endl;
for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
out << (*it) << endl;
}
}
- string locationsFile = toString(getpid()) + ".temp";
- num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, pend, adjustNeeded);
+ string locationsFile = m->mothurGetpid(process) + ".temp";
+ num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
string tempFile = filename + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
int numBadNames = 0; string name = "";
- int tpstart = -1; int tpend = -1; bool tempAdjust = false;
+ int tpstart = -1; bool tempAdjust = false;
if (!in.eof()) {
- in >> tpstart >> tpend >> tempAdjust; m->gobble(in);
+ in >> tpstart >> tempAdjust; m->gobble(in);
if (tempAdjust) { adjustNeeded = true; }
if (tpstart != -1) {
if (tpstart != pstart) { adjustNeeded = true; }
if (tpstart < pstart) { pstart = tpstart; } //smallest start
}
- if (tpend != -1) {
- if (tpend != pend) { adjustNeeded = true; }
- if (tpend > pend) { pend = tpend; } //largest end
- }
int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in);
}
for (int j = 0; j < numBadNames; j++) {
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
}
//do your part
- num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, pend, adjustNeeded);
+ num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, adjustNeeded);
processIDS.push_back(processors-1);
//Wait until all threads have terminated.
if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; }
if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; }
} //smallest start
- if (pDataArray[i]->pend != -1) {
- if (pDataArray[i]->pend != pend) { adjustNeeded = true; }
- if (pDataArray[i]->pend > pend) { pend = pDataArray[i]->pend; }
- } //largest end
-
+
for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it); }
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
#endif
- if (fileAligned && adjustNeeded) { adjustDots(goodFileName, locationsFile, pstart, pend); }
+
+
+
+ if (fileAligned && adjustNeeded) {
+ //find pend - pend is the biggest ending value, but we must account for when we adjust the start. That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
+ ifstream inLocations;
+ m->openInputFile(locationsFile, inLocations);
+
+ while(!inLocations.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string name = "";
+ int thisStart = -1; int thisEnd = -1;
+ if (numFPrimers != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (numRPrimers != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+ else { pend = -1; break; }
+
+ int myDiff = 0;
+ if (pstart != -1) {
+ if (thisStart != -1) {
+ if (thisStart != pstart) { myDiff += (thisStart - pstart); }
+ }
+ }
+
+ int myEnd = thisEnd + myDiff;
+ //cout << name << '\t' << thisStart << '\t' << thisEnd << " diff = " << myDiff << '\t' << myEnd << endl;
+
+ if (thisEnd != -1) {
+ if (myEnd > pend) { pend = myEnd; }
+ }
+
+ }
+ inLocations.close();
+
+ adjustDots(goodFileName, locationsFile, pstart, pend);
+ }else { m->mothurRemove(locationsFile); }
return num;
}
//**********************************************************************************************************************
-int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, int& pend, bool& adjustNeeded){
+int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, bool& adjustNeeded){
try {
ofstream goodFile;
m->openOutputFile(goodFasta, goodFile);
bool done = false;
int count = 0;
set<int> lengths;
- vector< set<int> > locations; //locations[0] = beginning locations, locations[1] = ending locations
- locations.resize(2);
+ set<int> locations; //locations[0] = beginning locations,
//pdiffs, bdiffs, primers, barcodes, revPrimers
- map<string, int> faked;
- TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
+ map<string, int> primers;
+ map<string, int> barcodes; //not used
+ vector<string> revPrimer;
+ if (pairedOligos) {
+ map<int, oligosPair> primerPairs = oligos.getPairedPrimers();
+ for (map<int, oligosPair>::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) {
+ primers[(it->second).forward] = it->first;
+ revPrimer.push_back((it->second).reverse);
+ }
+ if (pdiffs != 0) { m->mothurOut("[WARNING]: Pcr.seqs is only designed to allow diffs in forward primers. Reverse primers must be an exact match.\n"); }
+ }else{
+ primers = oligos.getPrimers();
+ revPrimer = oligos.getReversePrimers();
+ }
+
+ TrimOligos trim(pdiffs, 0, primers, barcodes, revPrimer);
while (!done) {
if(goodSeq == 1) {
currSeq.printSequence(goodFile);
if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); }
+ if (thisPStart != -1) { locations.insert(thisPStart); }
if (locationsString != "") { locationsFile << locationsString; }
- if (thisPStart != -1) { locations[0].insert(thisPStart); }
- if (thisPEnd != -1) { locations[1].insert(thisPEnd); }
}
else {
badSeqNames.insert(currSeq.getName());
if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); }
if (fileAligned && !keepdots) { //print out smallest start value and largest end value
- if ((locations[0].size() > 1) || (locations[1].size() > 1)) { adjustNeeded = true; }
- if (primers.size() != 0) { set<int>::iterator it = locations[0].begin(); pstart = *it; }
- if (revPrimer.size() != 0) { set<int>::reverse_iterator it2 = locations[1].rbegin(); pend = *it2; }
+ if (locations.size() > 1) { adjustNeeded = true; }
+ if (primers.size() != 0) { set<int>::iterator it = locations.begin(); pstart = *it; }
}
return count;
set<int> lengths;
//cout << pstart << '\t' << pend << endl;
+ //if (pstart > pend) { //swap them
while(!inFasta.eof()) {
if(m->control_pressed) { break; }
string name = "";
int thisStart = -1; int thisEnd = -1;
- if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
- if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+ if (numFPrimers != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (numRPrimers != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+
+
//cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
//cout << seq.getName() << '\t' << pstart << '\t' << pend << endl;
//cout << "final lengths = \n";
//for (set<int>::iterator it = lengths.begin(); it != lengths.end(); it++) {
- // cout << *it << endl;
+ //cout << *it << endl;
+ // cout << lengths.count(*it) << endl;
// }
return 0;
exit(1);
}
}
-//********************************************************************/
-string PcrSeqsCommand::reverseOligo(string oligo){
- try {
- string reverse = "";
-
- for(int i=oligo.length()-1;i>=0;i--){
-
- if(oligo[i] == 'A') { reverse += 'T'; }
- else if(oligo[i] == 'T'){ reverse += 'A'; }
- else if(oligo[i] == 'U'){ reverse += 'A'; }
-
- else if(oligo[i] == 'G'){ reverse += 'C'; }
- else if(oligo[i] == 'C'){ reverse += 'G'; }
-
- else if(oligo[i] == 'R'){ reverse += 'Y'; }
- else if(oligo[i] == 'Y'){ reverse += 'R'; }
-
- else if(oligo[i] == 'M'){ reverse += 'K'; }
- else if(oligo[i] == 'K'){ reverse += 'M'; }
-
- else if(oligo[i] == 'W'){ reverse += 'W'; }
- else if(oligo[i] == 'S'){ reverse += 'S'; }
-
- else if(oligo[i] == 'B'){ reverse += 'V'; }
- else if(oligo[i] == 'V'){ reverse += 'B'; }
-
- else if(oligo[i] == 'D'){ reverse += 'H'; }
- else if(oligo[i] == 'H'){ reverse += 'D'; }
-
- else { reverse += 'N'; }
- }
-
-
- return reverse;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-bool PcrSeqsCommand::readOligos(){
- try {
- ifstream inOligos;
- m->openInputFile(oligosfile, inOligos);
-
- string type, oligo, group;
- int primerCount = 0;
-
- while(!inOligos.eof()){
-
- inOligos >> type;
-
- if(type[0] == '#'){ //ignore
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- m->gobble(inOligos);
- }else{
- m->gobble(inOligos);
- //make type case insensitive
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
-
- inOligos >> oligo;
-
- for(int i=0;i<oligo.length();i++){
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
-
- if(type == "FORWARD"){
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- }
- primers[oligo] = primerCount; primerCount++;
- //cout << "for oligo = " << oligo << endl;
- }else if(type == "REVERSE"){
- string oligoRC = reverseOligo(oligo);
- revPrimer.push_back(oligoRC);
- //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
- }else if(type == "BARCODE"){
- inOligos >> group;
- }else if(type == "PRIMER"){
- m->gobble(inOligos);
- primers[oligo] = primerCount; primerCount++;
-
- string roligo="";
- inOligos >> roligo;
-
- for(int i=0;i<roligo.length();i++){
- roligo[i] = toupper(roligo[i]);
- if(roligo[i] == 'U') { roligo[i] = 'T'; }
- }
- revPrimer.push_back(reverseOligo(roligo));
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- }
- //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
- }else if((type == "LINKER")||(type == "SPACER")) {;}
- else{ m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
- }
- m->gobble(inOligos);
- }
- inOligos.close();
-
- if ((primers.size() == 0) && (revPrimer.size() == 0)) {
- m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers. Please correct."); m->mothurOutEndLine();
- m->control_pressed = true;
- return false;
- }
-
- return true;
-
- }catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "readOligos");
- exit(1);
- }
-}
//***************************************************************************************************************
bool PcrSeqsCommand::readEcoli(){
try {
//check for groups that have been eliminated
CountTable ct;
if (ct.testGroups(goodCountFile)) {
- ct.readTable(goodCountFile, true);
+ ct.readTable(goodCountFile, true, false);
ct.printTable(goodCountFile);
}
exit(1);
}
}
+//***************************************************************************************************************
+
+int PcrSeqsCommand::readOligos(){
+ try {
+ oligos.read(oligosfile);
+
+ if (m->control_pressed) { return false; } //error in reading oligos
+
+ if (oligos.hasPairedBarcodes()) {
+ pairedOligos = true;
+ numFPrimers = oligos.getPairedPrimers().size();
+ }else {
+ pairedOligos = false;
+ numFPrimers = oligos.getPrimers().size();
+ }
+ numRPrimers = oligos.getReversePrimers().size();
+
+ if (oligos.getLinkers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove linkers, ignoring.\n"); }
+ if (oligos.getSpacers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove spacers, ignoring.\n"); }
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "readOligos");
+ exit(1);
+ }
+}
+
/**************************************************************************************/