5 * Created by Sarah Westcott on 7/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
11 #include "ignoregaps.h"
13 //***************************************************************************************************************
15 Pintail::Pintail(string filename, string temp) { fastafile = filename; templateFile = temp; }
16 //***************************************************************************************************************
20 for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
21 for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
24 errorOut(e, "Pintail", "~Pintail");
28 //***************************************************************************************************************
29 void Pintail::print(ostream& out) {
32 for (int i = 0; i < querySeqs.size(); i++) {
34 int index = ceil(deviation[i]);
36 //is your DE value higher than the 95%
38 if (DE[i] > quantiles[index][4]) { chimera = "Yes"; }
39 else { chimera = "No"; }
41 out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
44 for (int j = 0; j < obsDistance[i].size(); j++) { out << obsDistance[i][j] << '\t'; }
49 for (int m = 0; m < expectedDistance[i].size(); m++) { out << expectedDistance[i][m] << '\t'; }
55 errorOut(e, "Pintail", "print");
60 //***************************************************************************************************************
61 void Pintail::getChimeras() {
64 //read in query sequences and subject sequences
65 mothurOut("Reading sequences and template file... "); cout.flush();
66 querySeqs = readSeqs(fastafile);
67 templateSeqs = readSeqs(templateFile);
68 mothurOut("Done."); mothurOutEndLine();
70 int numSeqs = querySeqs.size();
72 obsDistance.resize(numSeqs);
73 expectedDistance.resize(numSeqs);
74 seqCoef.resize(numSeqs);
77 bestfit.resize(numSeqs);
78 deviation.resize(numSeqs);
79 trimmed.resize(numSeqs);
80 windowSizes.resize(numSeqs, window);
81 windowSizesTemplate.resize(templateSeqs.size(), window);
82 windowsForeachQuery.resize(numSeqs);
83 quantiles.resize(100); //one for every percent mismatch
85 //break up file if needed
86 int linesPerProcess = processors / numSeqs;
88 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
89 //find breakup of sequences for all times we will Parallelize
90 if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
93 for (int i = 0; i < (processors-1); i++) {
94 lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
96 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
97 int i = processors - 1;
98 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
101 //find breakup of templatefile for quantiles
102 if (processors == 1) { templateLines.push_back(new linePair(0, templateSeqs.size())); }
104 for (int i = 0; i < processors; i++) {
105 templateLines.push_back(new linePair());
106 templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
107 templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
111 lines.push_back(new linePair(0, numSeqs));
112 templateLines.push_back(new linePair(0, templateSeqs.size()));
115 distcalculator = new ignoreGaps();
118 if (processors == 1) {
119 mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
120 bestfit = findPairs(lines[0]->start, lines[0]->end);
122 //ex.align matches from wigeon
123 /*for (int m = 0; m < templateSeqs.size(); m++) {
124 if (templateSeqs[m]->getName() == "159481") { bestfit[17] = *(templateSeqs[m]); }
125 if (templateSeqs[m]->getName() == "100137") { bestfit[16] = *(templateSeqs[m]); }
126 if (templateSeqs[m]->getName() == "112956") { bestfit[15] = *(templateSeqs[m]); }
127 if (templateSeqs[m]->getName() == "102326") { bestfit[14] = *(templateSeqs[m]); }
128 if (templateSeqs[m]->getName() == "66229") { bestfit[13] = *(templateSeqs[m]); }
129 if (templateSeqs[m]->getName() == "206276") { bestfit[12] = *(templateSeqs[m]); }
130 if (templateSeqs[m]->getName() == "63607") { bestfit[11] = *(templateSeqs[m]); }
131 if (templateSeqs[m]->getName() == "7056") { bestfit[10] = *(templateSeqs[m]); }
132 if (templateSeqs[m]->getName() == "7088") { bestfit[9] = *(templateSeqs[m]); }
133 if (templateSeqs[m]->getName() == "17553") { bestfit[8] = *(templateSeqs[m]); }
134 if (templateSeqs[m]->getName() == "131723") { bestfit[7] = *(templateSeqs[m]); }
135 if (templateSeqs[m]->getName() == "69013") { bestfit[6] = *(templateSeqs[m]); }
136 if (templateSeqs[m]->getName() == "24543") { bestfit[5] = *(templateSeqs[m]); }
137 if (templateSeqs[m]->getName() == "27824") { bestfit[4] = *(templateSeqs[m]); }
138 if (templateSeqs[m]->getName() == "1456") { bestfit[3] = *(templateSeqs[m]); }
139 if (templateSeqs[m]->getName() == "1456") { bestfit[2] = *(templateSeqs[m]); }
140 if (templateSeqs[m]->getName() == "141312") { bestfit[1] = *(templateSeqs[m]); }
141 if (templateSeqs[m]->getName() == "141312") { bestfit[0] = *(templateSeqs[m]); }
146 for (int j = 0; j < bestfit.size(); j++) {
147 //chops off beginning and end of sequences so they both start and end with a base
148 trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);
150 mothurOut("Done."); mothurOutEndLine();
152 mothurOut("Finding window breaks... "); cout.flush();
153 for (int i = lines[0]->start; i < lines[0]->end; i++) {
154 it = trimmed[i].begin();
155 cout << "trimmed = " << it->first << '\t' << it->second << endl;
156 vector<int> win = findWindows(querySeqs[i], it->first, it->second, windowSizes[i]);
157 windowsForeachQuery[i] = win;
159 mothurOut("Done."); mothurOutEndLine();
161 }else { createProcessesSpots(); }
164 mothurOut("Getting conservation... "); cout.flush();
165 if (consfile == "") {
166 mothurOut("Calculating probability of conservation for your template sequences. This can take a while... I will output the quantiles to a .prob file so that you can input them using the conservation parameter next time you run this command. Providing the .prob file will dramatically improve speed. "); cout.flush();
167 probabilityProfile = calcFreq(templateSeqs);
168 mothurOut("Done."); mothurOutEndLine();
169 }else { probabilityProfile = readFreq(); }
172 for (int i = 0; i < probabilityProfile.size(); i++) { probabilityProfile[i] = 1 - probabilityProfile[i]; }
173 mothurOut("Done."); mothurOutEndLine();
175 if (processors == 1) {
177 mothurOut("Calculating observed distance... "); cout.flush();
178 for (int i = lines[0]->start; i < lines[0]->end; i++) {
179 cout << querySeqs[i]->getName() << '\t' << bestfit[i].getName() << " windows = " << windowsForeachQuery[i].size() << " size = " << windowSizes[i] << endl;
180 vector<float> obsi = calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
181 obsDistance[i] = obsi;
183 mothurOut("Done."); mothurOutEndLine();
186 mothurOut("Finding variability... "); cout.flush();
187 for (int i = lines[0]->start; i < lines[0]->end; i++) {
188 vector<float> q = findQav(windowsForeachQuery[i], windowSizes[i]);
191 mothurOut("Done."); mothurOutEndLine();
194 mothurOut("Calculating alpha... "); cout.flush();
195 for (int i = lines[0]->start; i < lines[0]->end; i++) {
196 float alpha = getCoef(obsDistance[i], Qav[i]);
197 seqCoef.push_back(alpha);
199 mothurOut("Done."); mothurOutEndLine();
202 mothurOut("Calculating expected distance... "); cout.flush();
203 for (int i = lines[0]->start; i < lines[0]->end; i++) {
204 vector<float> exp = calcExpected(Qav[i], seqCoef[i]);
205 expectedDistance[i] = exp;
207 mothurOut("Done."); mothurOutEndLine();
210 mothurOut("Finding deviation... "); cout.flush();
211 for (int i = lines[0]->start; i < lines[0]->end; i++) {
212 float de = calcDE(obsDistance[i], expectedDistance[i]);
215 it = trimmed[i].begin();
216 float dist = calcDist(querySeqs[i], bestfit[i], it->first, it->second);
219 mothurOut("Done."); mothurOutEndLine();
222 else { createProcesses(); }
225 //quantiles are used to determine whether the de values found indicate a chimera
226 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each
227 //combination of sequences in the template
228 if (quanfile != "") { quantiles = readQuantiles(); }
231 mothurOut("Calculating quantiles for your template. This can take a while... I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command. Providing the .quan file will dramatically improve speed. "); cout.flush();
232 if (processors == 1) {
233 quantiles = getQuantiles(0, templateSeqs.size());
234 }else { createProcessesQuan(); }
237 string o = getRootName(templateFile) + "quan";
239 openOutputFile(o, out4);
242 for (int i = 0; i < quantiles.size(); i++) {
243 if (quantiles[i].size() == 0) {
244 //in case this is not a distance found in your template files
245 for (int g = 0; g < 6; g++) {
246 quantiles[i].push_back(0.0);
250 sort(quantiles[i].begin(), quantiles[i].end());
254 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)]);
256 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)]);
258 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)]);
260 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)]);
262 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)]);
264 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)]);
271 for (int u = 0; u < quantiles[i].size(); u++) { out4 << quantiles[i][u] << '\t'; }
276 mothurOut("Done."); mothurOutEndLine();
280 for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
281 for (int i = 0; i < templateLines.size(); i++) { delete templateLines[i]; }
283 delete distcalculator;
285 catch(exception& e) {
286 errorOut(e, "Pintail", "getChimeras");
290 //***************************************************************************************************************
291 //num is query's spot in querySeqs
292 void Pintail::trimSeqs(Sequence* query, Sequence subject, map<int, int>& trim) {
295 string q = query->getAligned();
296 string s = subject.getAligned();
299 for (int i = 0; i < q.length(); i++) {
300 if (isalpha(q[i]) && isalpha(s[i])) { front = i; break; }
304 for (int i = q.length(); i >= 0; i--) {
305 if (isalpha(q[i]) && isalpha(s[i])) { back = i; break; }
311 catch(exception& e) {
312 errorOut(e, "Pintail", "trimSeqs");
317 //***************************************************************************************************************
319 vector<float> Pintail::readFreq() {
323 openInputFile(consfile, in);
327 //read in probabilities and store in vector
334 //do you want this spot
344 catch(exception& e) {
345 errorOut(e, "Pintail", "readFreq");
350 //***************************************************************************************************************
352 vector< vector<float> > Pintail::readQuantiles() {
356 openInputFile(quanfile, in);
358 vector< vector<float> > quan;
360 int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine;
364 in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine;
369 temp.push_back(twentyfive);
370 temp.push_back(fifty);
371 temp.push_back(seventyfive);
372 temp.push_back(ninetyfive);
373 temp.push_back(ninetynine);
375 quan.push_back(temp);
384 catch(exception& e) {
385 errorOut(e, "Pintail", "readQuantiles");
389 //***************************************************************************************************************
390 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
391 vector<Sequence> Pintail::findPairs(int start, int end) {
394 vector<Sequence> seqsMatches; seqsMatches.resize(end-start);
396 for(int i = start; i < end; i++){
398 float smallest = 10000.0;
399 Sequence query = *(querySeqs[i]);
401 for(int j = 0; j < templateSeqs.size(); j++){
403 Sequence temp = *(templateSeqs[j]);
405 distcalculator->calcDist(query, temp);
406 float dist = distcalculator->getDist();
408 if (dist < smallest) {
409 seqsMatches[i] = *(templateSeqs[j]);
418 catch(exception& e) {
419 errorOut(e, "Pintail", "findPairs");
424 //***************************************************************************************************************
425 //find the window breaks for each sequence - this is so you can move ahead by bases.
426 vector<int> Pintail::findWindows(Sequence* query, int front, int back, int& size) {
431 int cutoff = back - front; //back - front
433 //if window is set to default
434 if (size == 0) { if (cutoff > 1200) { size = 300; }
435 else{ size = (cutoff / 4); } }
436 else if (size > (cutoff / 4)) {
437 mothurOut("You have selected to large a window size for sequence " + query->getName() + ". I will choose an appropriate window size."); mothurOutEndLine();
441 string seq = query->getAligned().substr(front, cutoff);
445 for (int l = 0; l < seq.length(); l++) { if (isalpha(seq[l])) { numBases++; } }
448 win.push_back(front);
450 //move ahead increment bases at a time until all bases are in a window
452 int totalBases = 0; //used to eliminate window of blanks at end of sequence
454 seq = query->getAligned();
455 for (int m = front; m < (back - size) ; m++) {
457 //count number of bases you see
458 if (isalpha(seq[m])) { countBases++; totalBases++; }
460 //if you have seen enough bases to make a new window
461 if (countBases >= increment) {
462 win.push_back(m); //save spot in alignment
463 countBases = 0; //reset bases you've seen in this window
466 //no need to continue if all your bases are in a window
467 if (totalBases == numBases) { break; }
473 catch(exception& e) {
474 errorOut(e, "Pintail", "findWindows");
479 //***************************************************************************************************************
480 vector<float> Pintail::calcObserved(Sequence* query, Sequence subject, vector<int> window, int size) {
484 //cout << "query length = " << query->getAligned().length() << '\t' << " subject length = " << subject.getAligned().length() << endl;
485 for (int m = 0; m < window.size(); m++) {
487 string seqFrag = query->getAligned().substr(window[m], size);
488 string seqFragsub = subject.getAligned().substr(window[m], size);
489 //cout << "start point = " << window[m] << " end point = " << window[m]+size << endl;
491 for (int b = 0; b < seqFrag.length(); b++) {
492 if (seqFrag[b] != seqFragsub[b]) { diff++; }
495 //percentage of mismatched bases
497 dist = diff / (float) seqFrag.length() * 100;
499 temp.push_back(dist);
504 catch(exception& e) {
505 errorOut(e, "Pintail", "calcObserved");
509 //***************************************************************************************************************
510 float Pintail::calcDist(Sequence* query, Sequence subject, int front, int back) {
513 //so you only look at the trimmed part of the sequence
514 int cutoff = back - front;
516 //from first startpoint with length back-front
517 string seqFrag = query->getAligned().substr(front, cutoff);
518 string seqFragsub = subject.getAligned().substr(front, cutoff);
521 for (int b = 0; b < seqFrag.length(); b++) {
522 if (seqFrag[b] != seqFragsub[b]) { diff++; }
525 //percentage of mismatched bases
526 float dist = diff / (float) seqFrag.length() * 100;
530 catch(exception& e) {
531 errorOut(e, "Pintail", "calcDist");
536 //***************************************************************************************************************
537 vector<float> Pintail::calcExpected(vector<float> qav, float coef) {
541 vector<float> queryExpected;
543 for (int m = 0; m < qav.size(); m++) {
545 float expected = qav[m] * coef;
547 queryExpected.push_back(expected);
550 return queryExpected;
553 catch(exception& e) {
554 errorOut(e, "Pintail", "calcExpected");
558 //***************************************************************************************************************
559 float Pintail::calcDE(vector<float> obs, vector<float> exp) {
563 float sum = 0.0; //sum = sum from 1 to m of (oi-ei)^2
564 for (int m = 0; m < obs.size(); m++) { sum += ((obs[m] - exp[m]) * (obs[m] - exp[m])); }
566 float de = sqrt((sum / (obs.size() - 1)));
570 catch(exception& e) {
571 errorOut(e, "Pintail", "calcDE");
576 //***************************************************************************************************************
578 vector<float> Pintail::calcFreq(vector<Sequence*> seqs) {
582 string freqfile = getRootName(templateFile) + "prob";
585 openOutputFile(freqfile, outFreq);
587 //at each position in the sequence
588 for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
590 vector<int> freq; freq.resize(4,0);
593 //find the frequency of each nucleotide
594 for (int j = 0; j < seqs.size(); j++) {
596 char value = seqs[j]->getAligned()[i];
598 if(toupper(value) == 'A') { freq[0]++; }
599 else if(toupper(value) == 'T' || toupper(value) == 'U') { freq[1]++; }
600 else if(toupper(value) == 'G') { freq[2]++; }
601 else if(toupper(value) == 'C') { freq[3]++; }
605 //find base with highest frequency
607 for (int m = 0; m < freq.size(); m++) { if (freq[m] > highest) { highest = freq[m]; } }
610 //subtract gaps to "ignore them"
611 if ( (seqs.size() - gaps) == 0 ) { highFreq = 1.0; }
612 else { highFreq = highest / (float) (seqs.size() - gaps); }
615 Pi = (highFreq - 0.25) / 0.75;
617 //cannot have probability less than 0.
618 if (Pi < 0) { Pi = 0.0; }
620 //saves this for later
621 outFreq << i+1 << '\t' << Pi << endl;
631 catch(exception& e) {
632 errorOut(e, "Pintail", "calcFreq");
636 //***************************************************************************************************************
637 vector<float> Pintail::findQav(vector<int> window, int size) {
639 vector<float> averages;
641 //for each window find average
642 for (int m = 0; m < window.size(); m++) {
646 //while you are in the window for this sequence
647 for (int j = window[m]; j < (window[m]+size); j++) { average += probabilityProfile[j]; }
649 average = average / size;
651 //save this windows average
652 averages.push_back(average);
657 catch(exception& e) {
658 errorOut(e, "Pintail", "findQav");
663 //***************************************************************************************************************
664 vector< vector<float> > Pintail::getQuantiles(int start, int end) {
666 vector< vector<float> > quan;
668 //percentage of mismatched pairs 1 to 100
673 for(int i = start; i < end; i++){
675 mothurOut("Processing template sequence " + toString(i)); mothurOutEndLine();
676 Sequence* query = templateSeqs[i];
678 //compare to every other sequence in template
679 for(int j = 0; j < i; j++){
681 Sequence subject = *(templateSeqs[j]);
684 trimSeqs(query, subject, trim);
687 int front = it->first; int back = it->second;
689 //reset window for each new comparison
690 windowSizesTemplate[i] = window;
692 vector<int> win = findWindows(query, front, back, windowSizesTemplate[i]);
694 vector<float> obsi = calcObserved(query, subject, win, windowSizesTemplate[i]);
696 vector<float> q = findQav(win, windowSizesTemplate[i]);
698 float alpha = getCoef(obsi, q);
700 vector<float> exp = calcExpected(q, alpha);
702 float de = calcDE(obsi, exp);
704 float dist = calcDist(query, subject, front, back);
708 //dist-1 because vector indexes start at 0.
709 quan[dist-1].push_back(de);
717 catch(exception& e) {
718 errorOut(e, "Pintail", "findQav");
723 //***************************************************************************************************************
724 float Pintail::getCoef(vector<float> obs, vector<float> qav) {
727 //find average prob for this seqs windows
728 float probAverage = 0.0;
729 for (int j = 0; j < qav.size(); j++) { probAverage += qav[j]; }
730 probAverage = probAverage / (float) qav.size();
732 //find observed average
733 float obsAverage = 0.0;
734 for (int j = 0; j < obs.size(); j++) { obsAverage += obs[j]; }
735 obsAverage = obsAverage / (float) obs.size();
738 float coef = obsAverage / probAverage;
742 catch(exception& e) {
743 errorOut(e, "Pintail", "getCoef");
747 /**************************************************************************************************/
749 void Pintail::createProcessesSpots() {
751 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
753 vector<int> processIDS;
754 vector< vector<int> > win; win.resize(querySeqs.size());
756 //loop through and create all the processes you want
757 while (process != processors) {
761 processIDS.push_back(pid);
765 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
766 vector<Sequence> tempbest;
767 tempbest = findPairs(lines[process]->start, lines[process]->end);
769 for (int i = lines[process]->start; i < lines[process]->end; i++) {
770 bestfit[i] = tempbest[count];
772 //chops off beginning and end of sequences so they both start and end with a base
773 trimSeqs(querySeqs[i], bestfit[i], trimmed[i]);
776 mothurOut("Done finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
778 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
779 for (int i = lines[process]->start; i < lines[process]->end; i++) {
780 vector<int> temp = findWindows(querySeqs[i], it->first, it->second, windowSizes[i]);
783 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
786 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
789 //force parent to wait until all the processes are done
790 for (int i=0;i<processors;i++) {
791 int temp = processIDS[i];
795 windowsForeachQuery = win;
797 bestfit = findPairs(lines[0]->start, lines[0]->end);
798 for (int j = 0; j < bestfit.size(); j++) {
799 //chops off beginning and end of sequences so they both start and end with a base
800 trimSeqs(querySeqs[j], bestfit[j], j);
803 for (int i = lines[0]->start; i < lines[0]->end; i++) {
804 it = trimmed[i].begin();
805 map<int, int> win = findWindows(querySeqs[i], it->first, it->second, windowSizes[i]);
811 catch(exception& e) {
812 errorOut(e, "Pintail", "createProcessesSpots");
818 /**************************************************************************************************/
820 void Pintail::createProcesses() {
822 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
824 vector<int> processIDS;
826 vector< vector<float> > exp; exp.resize(querySeqs.size());
827 vector<float> de; de.resize(querySeqs.size());
828 vector< vector<float> > obs; obs.resize(querySeqs.size());
829 vector<float> dev; dev.resize(querySeqs.size());
832 //loop through and create all the processes you want
833 while (process != processors) {
837 processIDS.push_back(pid);
841 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
842 for (int i = lines[process]->start; i < lines[process]->end; i++) {
844 vector<float> obsi = calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
848 vector<float> q = findQav(windowsForeachQuery[i], windowSizes[i]);
851 float alpha = getCoef(obsDistance[i], q);
854 vector<float> exp = calcExpected(q, alpha);
855 expectedDistance[i] = exp;
857 //get de and deviation
858 float dei = calcDE(obsi, exp);
861 it = trimmed[i].begin();
862 float dist = calcDist(querySeqs[i], bestfit[i], it->first, it->second);
865 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
868 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
871 //force parent to wait until all the processes are done
872 for (int i=0;i<processors;i++) {
873 int temp = processIDS[i];
878 expectedDistance = exp;
883 mothurOut("Calculating observed distance... "); cout.flush();
884 for (int i = lines[0]->start; i < lines[0]->end; i++) {
885 vector<float> obsi = calcObserved(querySeqs[i], bestfit[i], windows[i], windowSizes[i]);
886 obsDistance[i] = obsi;
888 mothurOut("Done."); mothurOutEndLine();
892 mothurOut("Finding variability... "); cout.flush();
893 for (int i = lines[0]->start; i < lines[0]->end; i++) {
894 vector<float> q = findQav(windows[i], windowSizes[i]);
897 mothurOut("Done."); mothurOutEndLine();
901 mothurOut("Calculating alpha... "); cout.flush();
902 for (int i = lines[0]->start; i < lines[0]->end; i++) {
903 float alpha = getCoef(obsDistance[i], Qav[i]);
904 seqCoef.push_back(alpha);
906 mothurOut("Done."); mothurOutEndLine();
910 mothurOut("Calculating expected distance... "); cout.flush();
911 for (int i = lines[0]->start; i < lines[0]->end; i++) {
912 vector<float> exp = calcExpected(Qav[i], seqCoef[i]);
913 expectedDistance[i] = exp;
915 mothurOut("Done."); mothurOutEndLine();
919 mothurOut("Finding deviation... "); cout.flush();
920 for (int i = lines[0]->start; i < lines[0]->end; i++) {
921 float de = calcDE(obsDistance[i], expectedDistance[i]);
924 it = trimmed[i].begin();
925 float dist = calcDist(querySeqs[i], bestfit[i], it->first, it->second);
928 mothurOut("Done."); mothurOutEndLine();
932 catch(exception& e) {
933 errorOut(e, "Pintail", "createProcesses");
939 /**************************************************************************************************/
941 void Pintail::createProcessesQuan() {
943 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
945 vector<int> processIDS;
946 vector< vector<float> > quan; quan.resize(100);
948 //loop through and create all the processes you want
949 while (process != processors) {
953 processIDS.push_back(pid);
957 vector< vector<float> > q = getQuantiles(templateLines[process]->start, templateLines[process]->end);
959 for (int i = 0; i < q.size(); i++) {
960 //put all values of q[i] into quan[i]
961 quan[i].insert(quan[i].begin(), q[i].begin(), q[i].end());
964 for (int i = 0; i < quan.size(); i++) {
966 for (int j = 0; j < quan[i].size(); j++) { cout << quan[i][j] << '\t'; }
971 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
974 //force parent to wait until all the processes are done
975 for (int i=0;i<processors;i++) {
976 int temp = processIDS[i];
982 quantiles = getQuantiles(0, templateSeqs.size());
985 catch(exception& e) {
986 errorOut(e, "Pintail", "createProcessesQuan");
992 //***************************************************************************************************************