From db2df3a6a0e6354a666472ede1de74642cfeb06b Mon Sep 17 00:00:00 2001 From: westcott Date: Tue, 28 Jul 2009 17:17:20 +0000 Subject: [PATCH] working on chimeras --- decalc.cpp | 23 +-- decalc.h | 7 +- pintail.cpp | 491 ++++++++++++++++++++++++++++++++++++++-------------- pintail.h | 5 +- 4 files changed, 383 insertions(+), 143 deletions(-) diff --git a/decalc.cpp b/decalc.cpp index d57f435..1a73494 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -51,11 +51,11 @@ void DeCalculator::runMask(Sequence* seq) { } //*************************************************************************************************************** //num is query's spot in querySeqs -void DeCalculator::trimSeqs(Sequence* query, Sequence subject, map& trim) { +void DeCalculator::trimSeqs(Sequence* query, Sequence* subject, map& trim) { try { string q = query->getAligned(); - string s = subject.getAligned(); + string s = subject->getAligned(); int front = 0; for (int i = 0; i < q.length(); i++) { @@ -131,7 +131,7 @@ vector DeCalculator::findWindows(Sequence* query, int front, int back, int } //*************************************************************************************************************** -vector DeCalculator::calcObserved(Sequence* query, Sequence subject, vector window, int size) { +vector DeCalculator::calcObserved(Sequence* query, Sequence* subject, vector window, int size) { try { vector temp; @@ -139,7 +139,7 @@ vector DeCalculator::calcObserved(Sequence* query, Sequence subject, vect for (int m = 0; m < window.size(); m++) { string seqFrag = query->getAligned().substr(window[m], size); - string seqFragsub = subject.getAligned().substr(window[m], size); + string seqFragsub = subject->getAligned().substr(window[m], size); //cout << "start point = " << window[m] << " end point = " << window[m]+size << endl; int diff = 0; for (int b = 0; b < seqFrag.length(); b++) { @@ -162,7 +162,7 @@ vector DeCalculator::calcObserved(Sequence* query, Sequence subject, vect } } //*************************************************************************************************************** -float DeCalculator::calcDist(Sequence* query, Sequence subject, int front, int back) { +float DeCalculator::calcDist(Sequence* query, Sequence* subject, int front, int back) { try { //so you only look at the trimmed part of the sequence @@ -170,7 +170,7 @@ float DeCalculator::calcDist(Sequence* query, Sequence subject, int front, int b //from first startpoint with length back-front string seqFrag = query->getAligned().substr(front, cutoff); - string seqFragsub = subject.getAligned().substr(front, cutoff); + string seqFragsub = subject->getAligned().substr(front, cutoff); int diff = 0; for (int b = 0; b < seqFrag.length(); b++) { @@ -234,7 +234,7 @@ vector DeCalculator::calcFreq(vector seqs, string filename) { try { vector prob; - string freqfile = getRootName(filename) + "prob"; + string freqfile = getRootName(filename) + "freq"; ofstream outFreq; openOutputFile(freqfile, outFreq); @@ -273,9 +273,12 @@ vector DeCalculator::calcFreq(vector seqs, string filename) { if (Pi < 0) { Pi = 0.0; } //saves this for later - outFreq << i+1 << '\t' << Pi << endl; + outFreq << i+1 << '\t' << highFreq << endl; - prob.push_back(Pi); + if (h.count(i) > 0) { + cout << i+1 << '\t' << highFreq << endl; + prob.push_back(Pi); + } } outFreq.close(); @@ -341,7 +344,7 @@ vector< vector > DeCalculator::getQuantiles(vector seqs, vecto //compare to every other sequence in template for(int j = 0; j < i; j++){ - Sequence subject = *(seqs[j]); + Sequence* subject = seqs[j]; map trim; map::iterator it; diff --git a/decalc.h b/decalc.h index dbdf316..9309c37 100644 --- a/decalc.h +++ b/decalc.h @@ -27,16 +27,17 @@ class DeCalculator { DeCalculator() {}; ~DeCalculator() {}; + set getPos() { return h; } void setMask(string m); void runMask(Sequence*); - void trimSeqs(Sequence*, Sequence, map&); + void trimSeqs(Sequence*, Sequence*, map&); vector calcFreq(vector, string); vector findWindows(Sequence*, int, int, int&, int); - vector calcObserved(Sequence*, Sequence, vector, int); + vector calcObserved(Sequence*, Sequence*, vector, int); vector calcExpected(vector, float); vector findQav(vector, int, vector); float calcDE(vector, vector); - float calcDist(Sequence*, Sequence, int, int); + float calcDist(Sequence*, Sequence*, int, int); float getCoef(vector, vector); vector< vector > getQuantiles(vector, vector, int, vector, int, int, int); diff --git a/pintail.cpp b/pintail.cpp index bcdc679..b7e5b5f 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -19,6 +19,8 @@ Pintail::~Pintail() { try { for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; } for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } + + if (processors != 1) { for (int i = 0; i < bestfit.size(); i++) { delete bestfit[i]; } } } catch(exception& e) { errorOut(e, "Pintail", "~Pintail"); @@ -121,9 +123,49 @@ void Pintail::getChimeras() { decalc->setMask(seqMask); + //find pairs + if (processors == 1) { + mothurOut("Finding closest sequence in template to each sequence... "); cout.flush(); + bestfit = findPairs(lines[0]->start, lines[0]->end); + mothurOut("Done."); mothurOutEndLine(); + }else { createProcessesPairs(); } + + + for (int j = 0; j < bestfit.size(); j++) { + //chops off beginning and end of sequences so they both start and end with a base + ofstream out; + string s = querySeqs[j]->getName(); + + openOutputFile(s, out); + out << ">" << querySeqs[j]->getName() << endl; + out << querySeqs[j]->getAligned() << endl; + out.close(); + + string t =querySeqs[j]->getName() + ".ref"; + openOutputFile(t, out); + out << ">" << bestfit[j]->getName() << endl; + out << bestfit[j]->getAligned() << endl; + out.close(); + } + + + //find P + mothurOut("Getting conservation... "); cout.flush(); + if (consfile == "") { + mothurOut("Calculating probability of conservation for your template sequences. This can take a while... I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command. Providing the .freq file will improve speed. "); cout.flush(); + probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); + mothurOut("Done."); mothurOutEndLine(); + }else { probabilityProfile = readFreq(); } + + //make P into Q + for (int i = 0; i < probabilityProfile.size(); i++) { probabilityProfile[i] = 1 - probabilityProfile[i]; cout << i << '\t' << probabilityProfile[i] << endl; } + mothurOut("Done."); mothurOutEndLine(); + //mask querys for (int i = 0; i < querySeqs.size(); i++) { + //cout << querySeqs[i]->getName() << " before mask = " << querySeqs[i]->getAligned() << endl << endl; decalc->runMask(querySeqs[i]); + //cout << querySeqs[i]->getName() << " after mask = " << querySeqs[i]->getAligned() << endl << endl; } //mask templates @@ -131,46 +173,20 @@ void Pintail::getChimeras() { decalc->runMask(templateSeqs[i]); } -for (int i = 0; i < lines.size(); i++) { cout << "line pair " << i << " = " << lines[i]->start << '\t' << lines[i]->end << endl; } +//for (int i = 0; i < lines.size(); i++) { cout << "line pair " << i << " = " << lines[i]->start << '\t' << lines[i]->end << endl; } if (processors == 1) { - mothurOut("Finding closest sequence in template to each sequence... "); cout.flush(); - bestfit = findPairs(lines[0]->start, lines[0]->end); - - //ex.align matches from wigeon -for (int m = 0; m < templateSeqs.size(); m++) { - if (templateSeqs[m]->getName() == "159481") { bestfit[17] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "100137") { bestfit[16] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "112956") { bestfit[15] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "102326") { bestfit[14] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "66229") { bestfit[13] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "206276") { bestfit[12] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "63607") { bestfit[11] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "7056") { bestfit[10] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "7088") { bestfit[9] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "17553") { bestfit[8] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "131723") { bestfit[7] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "69013") { bestfit[6] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "24543") { bestfit[5] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "27824") { bestfit[4] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "1456") { bestfit[3] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "1456") { bestfit[2] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "141312") { bestfit[1] = *(templateSeqs[m]); } - if (templateSeqs[m]->getName() == "141312") { bestfit[0] = *(templateSeqs[m]); } - - -} - + for (int j = 0; j < bestfit.size(); j++) { - //chops off beginning and end of sequences so they both start and end with a base + cout << querySeqs[j]->getName() << " after mask = " << querySeqs[j]->getAligned() << endl << endl; + cout << bestfit[j]->getName() << " after mask = " << bestfit[j]->getAligned() << endl << endl; decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]); } - mothurOut("Done."); mothurOutEndLine(); mothurOut("Finding window breaks... "); cout.flush(); for (int i = lines[0]->start; i < lines[0]->end; i++) { it = trimmed[i].begin(); -//cout << "trimmed = " << it->first << '\t' << it->second << endl; +cout << i << '\t' << "trimmed = " << it->first << '\t' << it->second << endl; vector win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment); windowsForeachQuery[i] = win; } @@ -178,24 +194,18 @@ for (int m = 0; m < templateSeqs.size(); m++) { }else { createProcessesSpots(); } - //find P - mothurOut("Getting conservation... "); cout.flush(); - if (consfile == "") { - mothurOut("Calculating probability of conservation for your template sequences. This can take a while... I will output the quantiles to a .prob file so that you can input them using the conservation parameter next time you run this command. Providing the .prob file will dramatically improve speed. "); cout.flush(); - probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); - mothurOut("Done."); mothurOutEndLine(); - }else { probabilityProfile = readFreq(); } - - //make P into Q - for (int i = 0; i < probabilityProfile.size(); i++) { probabilityProfile[i] = 1 - probabilityProfile[i]; } - mothurOut("Done."); mothurOutEndLine(); if (processors == 1) { mothurOut("Calculating observed distance... "); cout.flush(); for (int i = lines[0]->start; i < lines[0]->end; i++) { - //cout << querySeqs[i]->getName() << '\t' << bestfit[i].getName() << " windows = " << windowsForeachQuery[i].size() << " size = " << windowSizes[i] << endl; + cout << querySeqs[i]->getName() << '\t' << bestfit[i]->getName() << " windows = " << windowsForeachQuery[i].size() << " size = " << windowSizes[i] << endl; vector obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]); + + for (int j = 0; j < obsi.size(); j++) { + cout << obsi[j] << '\t'; + } + cout << endl; obsDistance[i] = obsi; } mothurOut("Done."); mothurOutEndLine(); @@ -206,11 +216,11 @@ for (int m = 0; m < templateSeqs.size(); m++) { vector q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile); Qav[i] = q; -//cout << i+1 << endl; -//for (int j = 0; j < Qav[i].size(); j++) { - //cout << Qav[i][j] << '\t'; -//} -//cout << endl << endl; +cout << i+1 << endl; +for (int j = 0; j < Qav[i].size(); j++) { + cout << Qav[i][j] << '\t'; +} +cout << endl << endl; } mothurOut("Done."); mothurOutEndLine(); @@ -219,7 +229,7 @@ for (int m = 0; m < templateSeqs.size(); m++) { mothurOut("Calculating alpha... "); cout.flush(); for (int i = lines[0]->start; i < lines[0]->end; i++) { float alpha = decalc->getCoef(obsDistance[i], Qav[i]); -//cout << i+1 << "\tcoef = " << alpha << endl; +cout << i+1 << "\tcoef = " << alpha << endl; seqCoef[i] = alpha; } mothurOut("Done."); mothurOutEndLine(); @@ -237,9 +247,10 @@ for (int m = 0; m < templateSeqs.size(); m++) { for (int i = lines[0]->start; i < lines[0]->end; i++) { float de = decalc->calcDE(obsDistance[i], expectedDistance[i]); DE[i] = de; - +cout << querySeqs[i]->getName() << '\t' << "de value = " << de << endl; it = trimmed[i].begin(); float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); +cout << querySeqs[i]->getName() << '\t' << "dist value = " << dist << endl; deviation[i] = dist; } mothurOut("Done."); mothurOutEndLine(); @@ -324,6 +335,7 @@ vector Pintail::readFreq() { openInputFile(consfile, in); vector prob; + set h = decalc->getPos(); //positions of bases in masking sequence //read in probabilities and store in vector int pos; float num; @@ -332,8 +344,16 @@ vector Pintail::readFreq() { in >> pos >> num; - //do you want this spot - prob.push_back(num); + if (h.count(pos-1) > 0) { + float Pi; + Pi = (num - 0.25) / 0.75; + + //cannot have probability less than 0. + if (Pi < 0) { Pi = 0.0; } + + //do you want this spot + prob.push_back(Pi); + } gobble(in); } @@ -389,16 +409,16 @@ vector< vector > Pintail::readQuantiles() { } //*************************************************************************************************************** //calculate the distances from each query sequence to all sequences in the template to find the closest sequence -vector Pintail::findPairs(int start, int end) { +vector Pintail::findPairs(int start, int end) { try { - vector seqsMatches; + vector seqsMatches; for(int i = start; i < end; i++){ float smallest = 10000.0; Sequence query = *(querySeqs[i]); - Sequence match; + Sequence* match; for(int j = 0; j < templateSeqs.size(); j++){ @@ -408,7 +428,7 @@ vector Pintail::findPairs(int start, int end) { float dist = distcalculator->getDist(); if (dist < smallest) { - match = *(templateSeqs[j]); + match = templateSeqs[j]; smallest = dist; } } @@ -442,19 +462,13 @@ void Pintail::createProcessesSpots() { process++; }else if (pid == 0){ - mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine(); - bestfit = findPairs(lines[process]->start, lines[process]->end); - mothurOut("Done finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine(); - - int count = lines[process]->start; - for (int j = 0; j < bestfit.size(); j++) { + for (int j = lines[process]->start; j < lines[process]->end; j++) { //chops off beginning and end of sequences so they both start and end with a base map trim; - decalc->trimSeqs(querySeqs[count], bestfit[j], trim); - trimmed[count] = trim; + decalc->trimSeqs(querySeqs[j], bestfit[j], trim); + trimmed[j] = trim; - count++; } mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine(); @@ -466,31 +480,30 @@ void Pintail::createProcessesSpots() { //write out data to file so parent can read it ofstream out; - string s = toString(pid) + ".temp"; + string s = toString(getpid()) + ".temp"; openOutputFile(s, out); - //output range and size - out << bestfit.size() << endl; - - //output pairs - for (int i = 0; i < bestfit.size(); i++) { - out << ">" << bestfit[i].getName() << endl << bestfit[i].getAligned() << endl; - } - //output windowsForeachQuery - for (int i = 0; i < windowsForeachQuery.size(); i++) { + for (int i = lines[process]->start; i < lines[process]->end; i++) { out << windowsForeachQuery[i].size() << '\t'; for (int j = 0; j < windowsForeachQuery[i].size(); j++) { out << windowsForeachQuery[i][j] << '\t'; } out << endl; } - + //output windowSizes - for (int i = 0; i < windowSizes.size(); i++) { + for (int i = lines[process]->start; i < lines[process]->end; i++) { out << windowSizes[i] << '\t'; } - out << endl; + out << endl; + + //output trimmed values + for (int i = lines[process]->start; i < lines[process]->end; i++) { + it = trimmed[i].begin(); + + out << it->first << '\t' << it->second << endl; + } out.close(); exit(0); @@ -509,40 +522,27 @@ void Pintail::createProcessesSpots() { string s = toString(processIDS[i]) + ".temp"; openInputFile(s, in); - int size; - in >> size; gobble(in); - - //get pairs + int size = lines[i]->end - lines[i]->start; + int count = lines[i]->start; - for (int m = 0; m < size; m++) { - Sequence temp(in); - bestfit[count] = temp; - - count++; - gobble(in); - } - - gobble(in); - - count = lines[i]->start; for (int m = 0; m < size; m++) { int num; in >> num; - + vector win; int w; for (int j = 0; j < num; j++) { in >> w; win.push_back(w); } - + windowsForeachQuery[count] = win; count++; gobble(in); } - + gobble(in); count = lines[i]->start; - for (int i = 0; i < size; i++) { + for (int m = 0; m < size; m++) { int num; in >> num; @@ -550,15 +550,33 @@ void Pintail::createProcessesSpots() { count++; } + gobble(in); + + count = lines[i]->start; + for (int m = 0; m < size; m++) { + int front, back; + in >> front >> back; + + map t; + + t[front] = back; + + trimmed[count] = t; + count++; + + gobble(in); + } + + in.close(); + remove(s.c_str()); } - + #else - bestfit = findPairs(lines[0]->start, lines[0]->end); for (int j = 0; j < bestfit.size(); j++) { - //chops off beginning and end of sequences so they both start and end with a base - decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]); + //chops off beginning and end of sequences so they both start and end with a base + decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]); } for (int i = lines[0]->start; i < lines[0]->end; i++) { @@ -574,21 +592,91 @@ void Pintail::createProcessesSpots() { exit(1); } } - - /**************************************************************************************************/ -void Pintail::createProcesses() { +void Pintail::createProcessesPairs() { try { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 0; vector processIDS; - vector< vector > exp; exp.resize(querySeqs.size()); - vector de; de.resize(querySeqs.size()); - vector< vector > obs; obs.resize(querySeqs.size()); - vector dev; dev.resize(querySeqs.size()); + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); + process++; + }else if (pid == 0){ + + mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine(); + bestfit = findPairs(lines[process]->start, lines[process]->end); + mothurOut("Done finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine(); + + //write out data to file so parent can read it + ofstream out; + string s = toString(getpid()) + ".temp"; + openOutputFile(s, out); + + //output range and size + out << bestfit.size() << endl; + + //output pairs + for (int i = 0; i < bestfit.size(); i++) { + out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl; + } + out.close(); + + exit(0); + }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); } + } + + //force parent to wait until all the processes are done + for (int i=0;i> size; gobble(in); + + //get pairs + int count = lines[i]->start; + for (int m = 0; m < size; m++) { + Sequence* temp = new Sequence(in); + bestfit[count] = temp; + + count++; + gobble(in); + } + + in.close(); + remove(s.c_str()); + } + + +#else + bestfit = findPairs(lines[0]->start, lines[0]->end); +#endif + } + catch(exception& e) { + errorOut(e, "Pintail", "createProcessesPairs"); + exit(1); + } +} +/**************************************************************************************************/ + +void Pintail::createProcesses() { + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 0; + vector processIDS; //loop through and create all the processes you want while (process != processors) { @@ -603,7 +691,7 @@ void Pintail::createProcesses() { for (int i = lines[process]->start; i < lines[process]->end; i++) { vector obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]); - obs[i] = obsi; + obsDistance[i] = obsi; //calc Qav vector q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile); @@ -617,14 +705,56 @@ void Pintail::createProcesses() { //get de and deviation float dei = decalc->calcDE(obsi, exp); - de[i] = dei; + DE[i] = dei; it = trimmed[i].begin(); float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); - dev[i] = dist; + deviation[i] = dist; } mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine(); + //write out data to file so parent can read it + ofstream out; + string s = toString(getpid()) + ".temp"; + openOutputFile(s, out); + + int size = lines[process]->end - lines[process]->start; + out << size << endl; + + //output observed distances + for (int i = lines[process]->start; i < lines[process]->end; i++) { + out << obsDistance[i].size() << '\t'; + for (int j = 0; j < obsDistance[i].size(); j++) { + out << obsDistance[i][j] << '\t'; + } + out << endl; + } + + + //output expected distances + for (int i = lines[process]->start; i < lines[process]->end; i++) { + out << expectedDistance[i].size() << '\t'; + for (int j = 0; j < expectedDistance[i].size(); j++) { + out << expectedDistance[i][j] << '\t'; + } + out << endl; + } + + + //output de values + for (int i = lines[process]->start; i < lines[process]->end; i++) { + out << DE[i] << '\t'; + } + out << endl; + + //output de values + for (int i = lines[process]->start; i < lines[process]->end; i++) { + out << deviation[i] << '\t'; + } + out << endl; + + out.close(); + exit(0); }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); } } @@ -635,11 +765,78 @@ void Pintail::createProcesses() { wait(&temp); } - obsDistance = obs; - expectedDistance = exp; - DE = de; - deviation = dev; - + //get data created by processes + for (int i=0;i> size; gobble(in); + + //get observed distances + int count = lines[i]->start; + for (int m = 0; m < size; m++) { + int num; + in >> num; + + vector obs; float w; + for (int j = 0; j < num; j++) { + in >> w; + obs.push_back(w); + } + + obsDistance[count] = obs; + count++; + gobble(in); + } + + gobble(in); + + //get expected distances + count = lines[i]->start; + for (int m = 0; m < size; m++) { + int num; + in >> num; + + vector exp; float w; + for (int j = 0; j < num; j++) { + in >> w; + exp.push_back(w); + } + + expectedDistance[count] = exp; + count++; + gobble(in); + } + + gobble(in); + + count = lines[i]->start; + for (int m = 0; m < size; m++) { + float num; + in >> num; + + DE[count] = num; + count++; + } + + gobble(in); + + count = lines[i]->start; + for (int m = 0; m < size; m++) { + float num; + in >> num; + + deviation[count] = num; + count++; + } + + in.close(); + remove(s.c_str()); + } + + #else mothurOut("Calculating observed distance... "); cout.flush(); for (int i = lines[0]->start; i < lines[0]->end; i++) { @@ -704,7 +901,6 @@ void Pintail::createProcessesQuan() { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 0; vector processIDS; - vector< vector > quan; quan.resize(100); //loop through and create all the processes you want while (process != processors) { @@ -715,19 +911,25 @@ void Pintail::createProcessesQuan() { process++; }else if (pid == 0){ - vector< vector > q = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end); + quantiles = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end); - for (int i = 0; i < q.size(); i++) { - //put all values of q[i] into quan[i] - quan[i].insert(quan[i].begin(), q[i].begin(), q[i].end()); - } + //write out data to file so parent can read it + ofstream out; + string s = toString(getpid()) + ".temp"; + openOutputFile(s, out); - for (int i = 0; i < quan.size(); i++) { - cout << i+1 << '\t'; - for (int j = 0; j < quan[i].size(); j++) { cout << quan[i][j] << '\t'; } - cout << endl; + + //output observed distances + for (int i = 0; i < quantiles.size(); i++) { + out << quantiles[i].size() << '\t'; + for (int j = 0; j < quantiles[i].size(); j++) { + out << quantiles[i][j] << '\t'; + } + out << endl; } - + + out.close(); + exit(0); }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); } } @@ -738,7 +940,40 @@ void Pintail::createProcessesQuan() { wait(&temp); } - quantiles = quan; + //get data created by processes + for (int i=0;i > quan; quan.resize(100); + + //get quantiles + for (int m = 0; m < quan.size(); m++) { + int num; + in >> num; + + vector q; float w; + for (int j = 0; j < num; j++) { + in >> w; + q.push_back(w); + } + + quan[m] = q; + gobble(in); + } + + + //save quan in quantiles + for (int i = 0; i < quan.size(); i++) { + //put all values of q[i] into quan[i] + quantiles[i].insert(quantiles[i].begin(), quan[i].begin(), quan[i].end()); + } + + in.close(); + remove(s.c_str()); + } + #else quantiles = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size()); #endif diff --git a/pintail.h b/pintail.h index 8afc754..2e84147 100644 --- a/pintail.h +++ b/pintail.h @@ -53,7 +53,7 @@ class Pintail : public Chimera { vector querySeqs; vector templateSeqs; - vector bestfit; //bestfit[0] matches queryseqs[0]... + vector bestfit; //bestfit[0] matches queryseqs[0]... vector< vector > obsDistance; //obsDistance[0] is the vector of observed distances for queryseqs[0]... vector< vector > expectedDistance; //expectedDistance[0] is the vector of expected distances for queryseqs[0]... @@ -77,9 +77,10 @@ class Pintail : public Chimera { vector readFreq(); vector< vector > readQuantiles(); - vector findPairs(int, int); + vector findPairs(int, int); void createProcessesSpots(); + void createProcessesPairs(); void createProcesses(); void createProcessesQuan(); -- 2.39.2