5 * Created by westcott on 8/24/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
15 //***************************************************************************************************************
16 Ccode::Ccode(string filename, string temp, string o) { fastafile = filename; templateFile = temp; outputDir = o; }
17 //***************************************************************************************************************
21 for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
22 for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; }
25 errorOut(e, "Ccode", "~Ccode");
29 //***************************************************************************************************************
30 void Ccode::print(ostream& out) {
35 string mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
37 openOutputFile(mapInfo, out2);
39 out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
41 for (int j = 0; j < querySeqs.size(); j++) {
42 out2 << querySeqs[j]->getName() << endl;
43 for (it = spotMap[j].begin(); it!= spotMap[j].end(); it++) {
44 out2 << it->first << '\t' << it->second << endl;
49 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
51 out << "For full window mapping info refer to " << mapInfo << endl << endl;
53 for (int i = 0; i < querySeqs.size(); i++) {
55 out << querySeqs[i]->getName() << endl << endl << "Reference sequences used and distance to query:" << endl;
57 for (int j = 0; j < closest[i].size(); j++) {
58 out << setprecision(3) << closest[i][j].seq->getName() << '\t' << closest[i][j].dist << endl;
63 //window mapping info.
64 out << "Mapping information: ";
65 //you mask and did not filter
66 if ((seqMask != "") && (!filter)) { out << "mask and trim."; }
68 //you filtered and did not mask
69 if ((seqMask == "") && (filter)) { out << "filter and trim."; }
71 //you masked and filtered
72 if ((seqMask != "") && (filter)) { out << "mask, filter and trim."; }
74 out << endl << "Window\tStartPos\tEndPos" << endl;
77 for (int k = 0; k < windows[i].size()-1; k++) {
78 out << k+1 << '\t' << spotMap[i][windows[i][k]-it->first] << '\t' << spotMap[i][windows[i][k]-it->first+windowSizes[i]] << endl;
81 out << windows[i].size() << '\t' << spotMap[i][windows[i][windows[i].size()-1]-it->first] << '\t' << spotMap[i][it->second-it->first-1] << endl;
84 out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl;
85 for (int k = 0; k < windows[i].size(); k++) {
86 float ds = averageQuery[i][k] / averageRef[i][k];
87 out << k+1 << '\t' << averageQuery[i][k] << '\t' << sdQuery[i][k] << '\t' << averageRef[i][k] << '\t'<< sdRef[i][k] << '\t' << ds << '\t' << anova[i][k] << endl;
93 /* F test for differences among variances.
94 * varQuery is expected to be higher or similar than varRef */
95 //float fs = varQuery[query][i] / varRef[query][i]; /* F-Snedecor, test for differences of variances */
99 //confidence limit, t - Student, anova
100 out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl;
102 for (int k = 0; k < windows[i].size(); k++) {
104 if (isChimericConfidence[i][k]) { temp += "*\t"; }
105 else { temp += "\t"; }
107 if (isChimericTStudent[i][k]) { temp += "*\t"; }
108 else { temp += "\t"; }
110 if (isChimericANOVA[i][k]) { temp += "*\t"; }
111 else { temp += "\t"; }
113 out << k+1 << '\t' << temp << endl;
115 if (temp == "*\t*\t*\t") { results = true; }
120 mothurOut(querySeqs[i]->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
124 catch(exception& e) {
125 errorOut(e, "Ccode", "print");
130 //***************************************************************************************************************
131 int Ccode::getChimeras() {
134 //read in query sequences and subject sequences
135 mothurOut("Reading sequences and template file... "); cout.flush();
136 querySeqs = readSeqs(fastafile);
137 templateSeqs = readSeqs(templateFile);
138 mothurOut("Done."); mothurOutEndLine();
140 int numSeqs = querySeqs.size();
142 if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon ccode."); mothurOutEndLine(); return 1; }
144 closest.resize(numSeqs);
146 refCombo.resize(numSeqs, 0);
147 sumRef.resize(numSeqs);
148 varRef.resize(numSeqs);
149 varQuery.resize(numSeqs);
150 sdRef.resize(numSeqs);
151 sdQuery.resize(numSeqs);
152 sumQuery.resize(numSeqs);
153 sumSquaredRef.resize(numSeqs);
154 sumSquaredQuery.resize(numSeqs);
155 averageRef.resize(numSeqs);
156 averageQuery.resize(numSeqs);
157 anova.resize(numSeqs);
158 isChimericConfidence.resize(numSeqs);
159 isChimericTStudent.resize(numSeqs);
160 isChimericANOVA.resize(numSeqs);
161 trim.resize(numSeqs);
162 spotMap.resize(numSeqs);
163 windowSizes.resize(numSeqs, window);
164 windows.resize(numSeqs);
166 //break up file if needed
167 int linesPerProcess = numSeqs / processors ;
169 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
170 //find breakup of sequences for all times we will Parallelize
171 if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
174 for (int i = 0; i < (processors-1); i++) {
175 lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
177 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
178 int i = processors - 1;
179 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
183 lines.push_back(new linePair(0, numSeqs));
186 distCalc = new eachGapDist();
187 decalc = new DeCalculator();
190 if (processors == 1) {
191 mothurOut("Finding top matches for sequences... "); cout.flush();
192 closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
193 mothurOut("Done."); mothurOutEndLine();
194 }else { createProcessesClosest(); }
197 for (int j = 0; j < numSeqs; j++) {
198 for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
203 //mask sequences if the user wants to
205 decalc->setMask(seqMask);
208 for (int i = 0; i < querySeqs.size(); i++) {
209 decalc->runMask(querySeqs[i]);
213 for (int i = 0; i < templateSeqs.size(); i++) {
214 decalc->runMask(templateSeqs[i]);
217 for (int i = 0; i < numSeqs; i++) {
218 spotMap[i] = decalc->getMaskMap();
223 vector<Sequence*> temp = templateSeqs;
224 for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]); }
228 runFilter(querySeqs);
229 runFilter(templateSeqs);
232 map<int, int> newMap;
236 for (int i = 0; i < filterString.length(); i++) {
237 if (filterString[i] == '1') {
239 newMap[spot] = spotMap[j][i];
244 for (int i = 0; i < numSeqs; i++) {
249 //trim sequences - this follows ccodes remove_extra_gaps
250 for (int i = 0; i < querySeqs.size(); i++) {
254 //windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length().
255 //Our default will be 10% and we will warn if user tries to use a window above or below these recommendations
256 for (int i = 0; i < querySeqs.size(); i++) {
257 windows[i] = findWindows(i);
260 //remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later
261 if (processors == 1) {
262 for (int i = 0; i < closest.size(); i++) {
263 removeBadReferenceSeqs(closest[i], i);
265 }else { createProcessesRemoveBad(); }
268 if (processors == 1) {
269 //find the averages for each querys references
270 for (int i = 0; i < numSeqs; i++) {
271 getAverageRef(closest[i], i); //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
274 //find the averages for the query
275 for (int i = 0; i < numSeqs; i++) {
276 getAverageQuery(closest[i], i); //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
278 }else { createProcessesAverages(); }
280 if (processors == 1) {
281 //find the averages for each querys references
282 for (int i = 0; i < numSeqs; i++) {
283 findVarianceRef(i); //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
286 //find the averages for the query
287 for (int i = 0; i < numSeqs; i++) {
288 findVarianceQuery(i); //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
290 }else { createProcessesVariances(); }
292 if (processors == 1) {
293 for (int i = 0; i < numSeqs; i++) {
294 determineChimeras(i); //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
296 }else { createProcessesDetermine(); }
299 for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
305 catch(exception& e) {
306 errorOut(e, "Ccode", "getChimeras");
310 /***************************************************************************************************************/
311 //ccode algo says it does this to "Removes the initial and final gaps to avoid biases due to incomplete sequences."
312 void Ccode::trimSequences(int query) {
315 int frontPos = 0; //should contain first position in all seqs that is not a gap character
316 int rearPos = querySeqs[query]->getAligned().length();
318 //********find first position in closest seqs that is a non gap character***********//
319 //find first position all query seqs that is a non gap character
320 for (int i = 0; i < closest[query].size(); i++) {
322 string aligned = closest[query][i].seq->getAligned();
325 //find first spot in this seq
326 for (int j = 0; j < aligned.length(); j++) {
327 if (isalpha(aligned[j])) {
333 //save this spot if it is the farthest
334 if (pos > frontPos) { frontPos = pos; }
337 //find first position all querySeq[query] that is a non gap character
338 string aligned = querySeqs[query]->getAligned();
341 //find first spot in this seq
342 for (int j = 0; j < aligned.length(); j++) {
343 if (isalpha(aligned[j])) {
349 //save this spot if it is the farthest
350 if (pos > frontPos) { frontPos = pos; }
353 //********find last position in closest seqs that is a non gap character***********//
354 for (int i = 0; i < closest[query].size(); i++) {
356 string aligned = closest[query][i].seq->getAligned();
357 int pos = aligned.length();
359 //find first spot in this seq
360 for (int j = aligned.length()-1; j >= 0; j--) {
361 if (isalpha(aligned[j])) {
367 //save this spot if it is the farthest
368 if (pos < rearPos) { rearPos = pos; }
371 //find last position all querySeqs[query] that is a non gap character
372 aligned = querySeqs[query]->getAligned();
373 pos = aligned.length();
375 //find first spot in this seq
376 for (int j = aligned.length()-1; j >= 0; j--) {
377 if (isalpha(aligned[j])) {
383 //save this spot if it is the farthest
384 if (pos < rearPos) { rearPos = pos; }
387 //check to make sure that is not whole seq
388 if ((rearPos - frontPos - 1) <= 0) { mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1); }
390 map<int, int> tempTrim;
391 tempTrim[frontPos] = rearPos;
393 //save trimmed locations
394 trim[query] = tempTrim;
397 map<int, int> newMap;
400 for (int i = frontPos; i < rearPos; i++) {
402 //cout << query << '\t' << i << '\t' << spotMap[query][i] << endl;
403 newMap[spot] = spotMap[query][i];
407 //for (it = newMap.begin(); it!=newMap.end(); it++) {
408 //cout << query << '\t' << it->first << '\t' << it->second << endl;
411 spotMap[query] = newMap;
415 catch(exception& e) {
416 errorOut(e, "Ccode", "trimSequences");
421 /***************************************************************************************************************/
422 vector<int> Ccode::findWindows(int query) {
426 it = trim[query].begin();
428 int length = it->second - it->first;
430 //default is wanted = 10% of total length
431 if (windowSizes[query] > length) {
432 mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length.");
433 windowSizes[query] = length / 10;
434 }else if (windowSizes[query] == 0) { windowSizes[query] = length / 10; }
435 else if (windowSizes[query] > (length * 0.20)) {
436 mothurOut("You have selected a window that is larger than 20% of your sequence length. This is not recommended, but I will continue anyway."); mothurOutEndLine();
437 }else if (windowSizes[query] < (length * 0.05)) {
438 mothurOut("You have selected a window that is smaller than 5% of your sequence length. This is not recommended, but I will continue anyway."); mothurOutEndLine();
441 //save starting points of each window
442 for (int m = it->first; m < (it->second-windowSizes[query]); m+=windowSizes[query]) { win.push_back(m); }
445 if (win[win.size()-1] < (it->first+length)) {
446 win.push_back(win[win.size()-1]+windowSizes[query]); // ex. string length is 115, window is 25, without this you would get 0, 25, 50, 75
447 } //with this you would get 1,25,50,75,100
453 catch(exception& e) {
454 errorOut(e, "Ccode", "findWindows");
458 //***************************************************************************************************************
459 int Ccode::getDiff(string seqA, string seqB) {
464 for (int i = 0; i < seqA.length(); i++) {
465 //if you are both not gaps
466 //if (isalpha(seqA[i]) && isalpha(seqA[i])) {
468 if (seqA[i] != seqB[i]) {
469 int ok; /* ok=1 means equivalent base. Checks for degenerate bases */
471 /* the char in base_a and base_b have been checked and they are different */
472 if ((seqA[i] == 'N') && (seqB[i] != '-')) ok = 1;
473 else if ((seqB[i] == 'N') && (seqA[i] != '-')) ok = 1;
474 else if ((seqA[i] == 'Y') && ((seqB[i] == 'C') || (seqB[i] == 'T'))) ok = 1;
475 else if ((seqB[i] == 'Y') && ((seqA[i] == 'C') || (seqA[i] == 'T'))) ok = 1;
476 else if ((seqA[i] == 'R') && ((seqB[i] == 'G') || (seqB[i] == 'A'))) ok = 1;
477 else if ((seqB[i] == 'R') && ((seqA[i] == 'G') || (seqA[i] == 'A'))) ok = 1;
478 else if ((seqA[i] == 'S') && ((seqB[i] == 'C') || (seqB[i] == 'G'))) ok = 1;
479 else if ((seqB[i] == 'S') && ((seqA[i] == 'C') || (seqA[i] == 'G'))) ok = 1;
480 else if ((seqA[i] == 'W') && ((seqB[i] == 'T') || (seqB[i] == 'A'))) ok = 1;
481 else if ((seqB[i] == 'W') && ((seqA[i] == 'T') || (seqA[i] == 'A'))) ok = 1;
482 else if ((seqA[i] == 'M') && ((seqB[i] == 'A') || (seqB[i] == 'C'))) ok = 1;
483 else if ((seqB[i] == 'M') && ((seqA[i] == 'A') || (seqA[i] == 'C'))) ok = 1;
484 else if ((seqA[i] == 'K') && ((seqB[i] == 'T') || (seqB[i] == 'G'))) ok = 1;
485 else if ((seqB[i] == 'K') && ((seqA[i] == 'T') || (seqA[i] == 'G'))) ok = 1;
486 else if ((seqA[i] == 'V') && ((seqB[i] == 'C') || (seqB[i] == 'A') || (seqB[i] == 'G'))) ok = 1;
487 else if ((seqB[i] == 'V') && ((seqA[i] == 'C') || (seqA[i] == 'A') || (seqA[i] == 'G'))) ok = 1;
488 else if ((seqA[i] == 'H') && ((seqB[i] == 'T') || (seqB[i] == 'A') || (seqB[i] == 'C'))) ok = 1;
489 else if ((seqB[i] == 'H') && ((seqA[i] == 'T') || (seqA[i] == 'A') || (seqA[i] == 'C'))) ok = 1;
490 else if ((seqA[i] == 'D') && ((seqB[i] == 'T') || (seqB[i] == 'A') || (seqB[i] == 'G'))) ok = 1;
491 else if ((seqB[i] == 'D') && ((seqA[i] == 'T') || (seqA[i] == 'A') || (seqA[i] == 'G'))) ok = 1;
492 else if ((seqA[i] == 'B') && ((seqB[i] == 'C') || (seqB[i] == 'T') || (seqB[i] == 'G'))) ok = 1;
493 else if ((seqB[i] == 'B') && ((seqA[i] == 'C') || (seqA[i] == 'T') || (seqA[i] == 'G'))) ok = 1;
494 else ok = 0; /* the bases are different and not equivalent */
496 //check if they are both blanks
497 if ((seqA[i] == '.') && (seqB[i] == '-')) ok = 1;
498 else if ((seqB[i] == '.') && (seqA[i] == '-')) ok = 1;
500 if (ok == 0) { numDiff++; }
508 catch(exception& e) {
509 errorOut(e, "Ccode", "getDiff");
513 //***************************************************************************************************************
514 //tried to make this look most like ccode original implementation
515 void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
518 vector< vector<int> > numDiffBases;
519 numDiffBases.resize(seqs.size());
521 for (int i = 0; i < numDiffBases.size(); i++) { numDiffBases[i].resize(seqs.size(),0); }
523 it = trim[query].begin();
524 int length = it->second - it->first;
526 //calc differences from each sequence to everyother seq in the set
527 for (int i = 0; i < seqs.size(); i++) {
529 string seqA = seqs[i].seq->getAligned().substr(it->first, length);
531 //so you don't calc i to j and j to i since they are the same
532 for (int j = 0; j < i; j++) {
534 string seqB = seqs[j].seq->getAligned().substr(it->first, length);
537 int numDiff = getDiff(seqA, seqB);
539 numDiffBases[i][j] = numDiff;
540 numDiffBases[j][i] = numDiff;
544 //initailize remove to 0
545 vector<int> remove; remove.resize(seqs.size(), 0);
546 float top = ((20*length) / (float) 100);
547 float bottom = ((0.5*length) / (float) 100);
549 //check each numDiffBases and if any are higher than threshold set remove to 1 so you can remove those seqs from the closest set
550 for (int i = 0; i < numDiffBases.size(); i++) {
551 for (int j = 0; j < i; j++) {
552 //are you more than 20% different
553 if (numDiffBases[i][j] > top) { remove[j] = 1; }
554 //are you less than 0.5% different
555 if (numDiffBases[i][j] < bottom) { remove[j] = 1; }
561 //count seqs that are not going to be removed
562 for (int i = 0; i < remove.size(); i++) {
563 if (remove[i] == 0) { numSeqsLeft++; }
566 //if you have enough then remove bad ones
567 if (numSeqsLeft >= 3) {
568 vector<SeqDist> goodSeqs;
570 for (int i = 0; i < remove.size(); i++) {
571 if (remove[i] == 0) {
572 goodSeqs.push_back(seqs[i]);
578 }else { //warn, but dont remove any
579 mothurOut(querySeqs[query]->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity. I will continue, but please check."); mothurOutEndLine();
584 catch(exception& e) {
585 errorOut(e, "Ccode", "removeBadReferenceSeqs");
589 //***************************************************************************************************************
590 vector< vector<SeqDist> > Ccode::findClosest(int start, int end, int numWanted) {
593 vector< vector<SeqDist> > topMatches; topMatches.resize(querySeqs.size());
595 float smallestOverall, smallestLeft, smallestRight;
596 smallestOverall = 1000; smallestLeft = 1000; smallestRight = 1000;
598 //for each sequence in querySeqs - find top matches to use as reference
599 for(int j = start; j < end; j++){
601 Sequence query = *(querySeqs[j]);
603 vector<SeqDist> distances;
605 //calc distance to each sequence in template seqs
606 for (int i = 0; i < templateSeqs.size(); i++) {
608 Sequence ref = *(templateSeqs[i]);
611 distCalc->calcDist(query, ref);
612 float dist = distCalc->getDist();
616 temp.seq = templateSeqs[i];
619 distances.push_back(temp);
622 sort(distances.begin(), distances.end(), compareSeqDist);
624 //save the number of top matches wanted
625 for (int h = 0; h < numWanted; h++) {
626 topMatches[j].push_back(distances[h]);
633 catch(exception& e) {
634 errorOut(e, "Ccode", "findClosestSides");
638 /**************************************************************************************************/
639 //find the distances from each reference sequence to every other reference sequence for each window for this query
640 void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
643 vector< vector< vector<int> > > diffs; //diffs[0][1][2] is the number of differences between ref seq 0 and ref seq 1 at window 2.
645 //initialize diffs vector
646 diffs.resize(ref.size());
647 for (int i = 0; i < diffs.size(); i++) {
648 diffs[i].resize(ref.size());
649 for (int j = 0; j < diffs[i].size(); j++) {
650 diffs[i][j].resize(windows[query].size(), 0);
654 it = trim[query].begin();
656 //find the distances from each reference sequence to every other reference sequence for each window for this query
657 for (int i = 0; i < ref.size(); i++) {
659 string refI = ref[i].seq->getAligned();
661 //j<i, so you don't find distances from i to j and then j to i.
662 for (int j = 0; j < i; j++) {
664 string refJ = ref[j].seq->getAligned();
666 for (int k = 0; k < windows[query].size(); k++) {
668 string refIWindowk, refJWindowk;
670 if (k < windows[query].size()-1) {
672 refIWindowk = refI.substr(windows[query][k], windowSizes[query]);
673 refJWindowk = refJ.substr(windows[query][k], windowSizes[query]);
674 }else { //last window may be smaller than rest - see findwindows
676 refIWindowk = refI.substr(windows[query][k], (it->second-windows[query][k]));
677 refJWindowk = refJ.substr(windows[query][k], (it->second-windows[query][k]));
681 int diff = getDiff(refIWindowk, refJWindowk);
682 //cout << i << '\t' << j << '\t' << k << '\t' << diff << endl;
683 //save differences in [i][j][k] and [j][i][k] since they are the same
684 diffs[i][j][k] = diff;
685 diffs[j][i][k] = diff;
693 //initialize sumRef for this query
694 sumRef[query].resize(windows[query].size(), 0);
695 sumSquaredRef[query].resize(windows[query].size(), 0);
696 averageRef[query].resize(windows[query].size(), 0);
698 //find the sum of the differences for hte reference sequences
699 for (int i = 0; i < diffs.size(); i++) {
700 for (int j = 0; j < i; j++) {
702 //increment this querys reference sequences combos
705 for (int k = 0; k < diffs[i][j].size(); k++) {
706 sumRef[query][k] += diffs[i][j][k];
707 sumSquaredRef[query][k] += (diffs[i][j][k]*diffs[i][j][k]);
714 //find the average of the differences for the references for each window
715 for (int i = 0; i < windows[query].size(); i++) {
716 averageRef[query][i] = sumRef[query][i] / (float) refCombo[query];
720 catch(exception& e) {
721 errorOut(e, "Ccode", "getAverageRef");
725 /**************************************************************************************************/
726 void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
729 vector< vector<int> > diffs; //diffs[1][2] is the number of differences between querySeqs[query] and ref seq 1 at window 2.
731 //initialize diffs vector
732 diffs.resize(ref.size());
733 for (int j = 0; j < diffs.size(); j++) {
734 diffs[j].resize(windows[query].size(), 0);
737 it = trim[query].begin();
739 string refQuery = querySeqs[query]->getAligned();
741 //j<i, so you don't find distances from i to j and then j to i.
742 for (int j = 0; j < ref.size(); j++) {
744 string refJ = ref[j].seq->getAligned();
746 for (int k = 0; k < windows[query].size(); k++) {
748 string QueryWindowk, refJWindowk;
750 if (k < windows[query].size()-1) {
752 QueryWindowk = refQuery.substr(windows[query][k], windowSizes[query]);
753 refJWindowk = refJ.substr(windows[query][k], windowSizes[query]);
754 }else { //last window may be smaller than rest - see findwindows
756 QueryWindowk = refQuery.substr(windows[query][k], (it->second-windows[query][k]));
757 refJWindowk = refJ.substr(windows[query][k], (it->second-windows[query][k]));
761 int diff = getDiff(QueryWindowk, refJWindowk);
762 //cout << j << '\t' << k << '\t' << diff << endl;
770 //initialize sumRef for this query
771 sumQuery[query].resize(windows[query].size(), 0);
772 sumSquaredQuery[query].resize(windows[query].size(), 0);
773 averageQuery[query].resize(windows[query].size(), 0);
775 //find the sum of the differences
776 for (int j = 0; j < diffs.size(); j++) {
777 for (int k = 0; k < diffs[j].size(); k++) {
778 sumQuery[query][k] += diffs[j][k];
779 sumSquaredQuery[query][k] += (diffs[j][k]*diffs[j][k]);
784 //find the average of the differences for the references for each window
785 for (int i = 0; i < windows[query].size(); i++) {
786 averageQuery[query][i] = sumQuery[query][i] / (float) ref.size();
791 catch(exception& e) {
792 errorOut(e, "Ccode", "getAverageQuery");
796 /**************************************************************************************************/
797 void Ccode::findVarianceRef (int query) {
800 varRef[query].resize(windows[query].size(), 0);
801 sdRef[query].resize(windows[query].size(), 0);
804 for (int i = 0; i < windows[query].size(); i++) {
805 varRef[query][i] = (sumSquaredRef[query][i] - ((sumRef[query][i]*sumRef[query][i])/(float)refCombo[query])) / (float)(refCombo[query]-1);
806 sdRef[query][i] = sqrt(varRef[query][i]);
808 //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
809 if (averageRef[query][i] < 0.001) { averageRef[query][i] = 0.001; }
810 if (sumRef[query][i] < 0.001) { sumRef[query][i] = 0.001; }
811 if (varRef[query][i] < 0.001) { varRef[query][i] = 0.001; }
812 if (sumSquaredRef[query][i] < 0.001) { sumSquaredRef[query][i] = 0.001; }
813 if (sdRef[query][i] < 0.001) { sdRef[query][i] = 0.001; }
817 catch(exception& e) {
818 errorOut(e, "Ccode", "findVarianceRef");
822 /**************************************************************************************************/
823 void Ccode::findVarianceQuery (int query) {
825 varQuery[query].resize(windows[query].size(), 0);
826 sdQuery[query].resize(windows[query].size(), 0);
829 for (int i = 0; i < windows[query].size(); i++) {
830 varQuery[query][i] = (sumSquaredQuery[query][i] - ((sumQuery[query][i]*sumQuery[query][i])/(float) closest[query].size())) / (float) (closest[query].size()-1);
831 sdQuery[query][i] = sqrt(varQuery[query][i]);
833 //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
834 if (averageQuery[query][i] < 0.001) { averageQuery[query][i] = 0.001; }
835 if (sumQuery[query][i] < 0.001) { sumQuery[query][i] = 0.001; }
836 if (varQuery[query][i] < 0.001) { varQuery[query][i] = 0.001; }
837 if (sumSquaredQuery[query][i] < 0.001) { sumSquaredQuery[query][i] = 0.001; }
838 if (sdQuery[query][i] < 0.001) { sdQuery[query][i] = 0.001; }
842 catch(exception& e) {
843 errorOut(e, "Ccode", "findVarianceQuery");
847 /**************************************************************************************************/
848 void Ccode::determineChimeras (int query) {
851 isChimericConfidence[query].resize(windows[query].size(), false);
852 isChimericTStudent[query].resize(windows[query].size(), false);
853 isChimericANOVA[query].resize(windows[query].size(), false);
854 anova[query].resize(windows[query].size());
858 for (int i = 0; i < windows[query].size(); i++) {
860 //get confidence limits
861 float t = getT(closest[query].size()-1); //how many seqs you are comparing to this querySeq
862 float dsUpper = (averageQuery[query][i] + (t * sdQuery[query][i])) / averageRef[query][i];
863 float dsLower = (averageQuery[query][i] - (t * sdQuery[query][i])) / averageRef[query][i];
864 //cout << t << '\t' << "ds upper = " << dsUpper << " dsLower = " << dsLower << endl;
865 if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[query][i] > averageRef[query][i])) { /* range does not include 1 */
866 isChimericConfidence[query][i] = true; /* significantly higher at P<0.05 */
867 //cout << i << " made it here" << endl;
871 int degreeOfFreedom = refCombo[query] + closest[query].size() - 2;
872 float denomForT = (((refCombo[query]-1) * varQuery[query][i] + (closest[query].size() - 1) * varRef[query][i]) / (float) degreeOfFreedom) * ((refCombo[query] + closest[query].size()) / (float) (refCombo[query] * closest[query].size())); /* denominator, without sqrt(), for ts calculations */
874 float ts = fabs((averageQuery[query][i] - averageRef[query][i]) / (sqrt(denomForT))); /* value of ts for t-student test */
875 t = getT(degreeOfFreedom);
876 //cout << i << '\t' << t << '\t' << ts << endl;
877 if ((ts >= t) && (averageQuery[query][i] > averageRef[query][i])) {
878 isChimericTStudent[query][i] = true; /* significantly higher at P<0.05 */
882 float value1 = sumQuery[query][i] + sumRef[query][i];
883 float value2 = sumSquaredQuery[query][i] + sumSquaredRef[query][i];
884 float value3 = ((sumQuery[query][i]*sumQuery[query][i]) / (float) (closest[query].size())) + ((sumRef[query][i] * sumRef[query][i]) / (float) refCombo[query]);
885 float value4 = (value1 * value1) / ( (float) (closest[query].size() + refCombo[query]) );
886 float value5 = value2 - value4;
887 float value6 = value3 - value4;
888 float value7 = value5 - value6;
889 float value8 = value7 / ((float) degreeOfFreedom);
890 float anovaValue = value6 / value8;
892 float f = getF(degreeOfFreedom);
894 if ((anovaValue >= f) && (averageQuery[query][i] > averageRef[query][i])) {
895 isChimericANOVA[query][i] = true; /* significant P<0.05 */
898 if (isnan(anovaValue) || isinf(anovaValue)) { anovaValue = 0.0; }
900 anova[query][i] = anovaValue;
904 catch(exception& e) {
905 errorOut(e, "Ccode", "determineChimeras");
909 /**************************************************************************************************/
910 float Ccode::getT(int numseq) {
915 /* t-student critical values for different degrees of freedom and alpha 0.1 in one-tail tests (equivalent to 0.05) */
916 if (numseq > 120) tvalue = 1.645;
917 else if (numseq > 60) tvalue = 1.658;
918 else if (numseq > 40) tvalue = 1.671;
919 else if (numseq > 30) tvalue = 1.684;
920 else if (numseq > 29) tvalue = 1.697;
921 else if (numseq > 28) tvalue = 1.699;
922 else if (numseq > 27) tvalue = 1.701;
923 else if (numseq > 26) tvalue = 1.703;
924 else if (numseq > 25) tvalue = 1.706;
925 else if (numseq > 24) tvalue = 1.708;
926 else if (numseq > 23) tvalue = 1.711;
927 else if (numseq > 22) tvalue = 1.714;
928 else if (numseq > 21) tvalue = 1.717;
929 else if (numseq > 20) tvalue = 1.721;
930 else if (numseq > 19) tvalue = 1.725;
931 else if (numseq > 18) tvalue = 1.729;
932 else if (numseq > 17) tvalue = 1.734;
933 else if (numseq > 16) tvalue = 1.740;
934 else if (numseq > 15) tvalue = 1.746;
935 else if (numseq > 14) tvalue = 1.753;
936 else if (numseq > 13) tvalue = 1.761;
937 else if (numseq > 12) tvalue = 1.771;
938 else if (numseq > 11) tvalue = 1.782;
939 else if (numseq > 10) tvalue = 1.796;
940 else if (numseq > 9) tvalue = 1.812;
941 else if (numseq > 8) tvalue = 1.833;
942 else if (numseq > 7) tvalue = 1.860;
943 else if (numseq > 6) tvalue = 1.895;
944 else if (numseq > 5) tvalue = 1.943;
945 else if (numseq > 4) tvalue = 2.015;
946 else if (numseq > 3) tvalue = 2.132;
947 else if (numseq > 2) tvalue = 2.353;
948 else if (numseq > 1) tvalue = 2.920;
949 else if (numseq <= 1) {
950 mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine();
955 catch(exception& e) {
956 errorOut(e, "Ccode", "getT");
960 /**************************************************************************************************/
961 float Ccode::getF(int numseq) {
966 /* F-Snedecor critical values for v1=1 and different degrees of freedom v2 and alpha 0.05 */
967 if (numseq > 120) fvalue = 3.84;
968 else if (numseq > 60) fvalue = 3.92;
969 else if (numseq > 40) fvalue = 4.00;
970 else if (numseq > 30) fvalue = 4.08;
971 else if (numseq > 29) fvalue = 4.17;
972 else if (numseq > 28) fvalue = 4.18;
973 else if (numseq > 27) fvalue = 4.20;
974 else if (numseq > 26) fvalue = 4.21;
975 else if (numseq > 25) fvalue = 4.23;
976 else if (numseq > 24) fvalue = 4.24;
977 else if (numseq > 23) fvalue = 4.26;
978 else if (numseq > 22) fvalue = 4.28;
979 else if (numseq > 21) fvalue = 4.30;
980 else if (numseq > 20) fvalue = 4.32;
981 else if (numseq > 19) fvalue = 4.35;
982 else if (numseq > 18) fvalue = 4.38;
983 else if (numseq > 17) fvalue = 4.41;
984 else if (numseq > 16) fvalue = 4.45;
985 else if (numseq > 15) fvalue = 4.49;
986 else if (numseq > 14) fvalue = 4.54;
987 else if (numseq > 13) fvalue = 4.60;
988 else if (numseq > 12) fvalue = 4.67;
989 else if (numseq > 11) fvalue = 4.75;
990 else if (numseq > 10) fvalue = 4.84;
991 else if (numseq > 9) fvalue = 4.96;
992 else if (numseq > 8) fvalue = 5.12;
993 else if (numseq > 7) fvalue = 5.32;
994 else if (numseq > 6) fvalue = 5.59;
995 else if (numseq > 5) fvalue = 5.99;
996 else if (numseq > 4) fvalue = 6.61;
997 else if (numseq > 3) fvalue = 7.71;
998 else if (numseq > 2) fvalue = 10.1;
999 else if (numseq > 1) fvalue = 18.5;
1000 else if (numseq > 0) fvalue = 161;
1001 else if (numseq <= 0) {
1002 mothurOut("Two or more reference sequences are required, your data will be flawed.\n"); mothurOutEndLine();
1007 catch(exception& e) {
1008 errorOut(e, "Ccode", "getF");
1013 /**************************************************************************************************/
1014 void Ccode::createProcessesClosest() {
1016 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1018 vector<int> processIDS;
1020 //loop through and create all the processes you want
1021 while (process != processors) {
1025 processIDS.push_back(pid);
1027 }else if (pid == 0){
1029 mothurOut("Finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
1030 closest = findClosest(lines[process]->start, lines[process]->end, numWanted);
1031 mothurOut("Done finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
1033 //write out data to file so parent can read it
1035 string s = toString(getpid()) + ".temp";
1036 openOutputFile(s, out);
1039 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1040 for (int j = 0; j < closest[i].size(); j++) {
1041 closest[i][j].seq->printSequence(out);
1044 out << ">" << endl; //to stop sequence read
1046 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1047 for (int j = 0; j < closest[i].size(); j++) {
1048 out << closest[i][j].dist << '\t';
1054 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
1057 //force parent to wait until all the processes are done
1058 for (int i=0;i<processors;i++) {
1059 int temp = processIDS[i];
1063 //get data created by processes
1064 for (int i=0;i<processors;i++) {
1066 string s = toString(processIDS[i]) + ".temp";
1067 openInputFile(s, in);
1069 vector< vector<Sequence*> > tempClosest; tempClosest.resize(querySeqs.size());
1071 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1072 vector<Sequence*> tempVector;
1074 for (int j = 0; j < numWanted; j++) {
1076 Sequence* temp = new Sequence(in);
1079 tempVector.push_back(temp);
1082 tempClosest[k] = tempVector;
1086 in >> junk; gobble(in); // to get ">"
1088 vector< vector<float> > dists; dists.resize(querySeqs.size());
1090 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1091 dists[k].resize(numWanted);
1092 for (int j = 0; j < numWanted; j++) {
1099 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1100 closest[k].resize(numWanted);
1101 for (int j = 0; j < closest[k].size(); j++) {
1102 closest[k][j].seq = tempClosest[k][j];
1103 closest[k][j].dist = dists[k][j];
1113 closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
1117 catch(exception& e) {
1118 errorOut(e, "Ccode", "createProcessesClosest");
1123 //***************************************************************************************************************
1124 void Ccode::createProcessesRemoveBad() {
1126 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1128 vector<int> processIDS;
1130 //loop through and create all the processes you want
1131 while (process != processors) {
1135 processIDS.push_back(pid);
1137 }else if (pid == 0){
1139 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1140 removeBadReferenceSeqs(closest[i], i);
1143 //write out data to file so parent can read it
1145 string s = toString(getpid()) + ".temp";
1146 openOutputFile(s, out);
1149 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1150 out << closest[i].size() << endl;
1151 for (int j = 0; j < closest[i].size(); j++) {
1152 closest[i][j].seq->printSequence(out);
1154 out << ">" << endl; //to stop sequence read
1157 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1158 for (int j = 0; j < closest[i].size(); j++) {
1159 out << closest[i][j].dist << '\t';
1167 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
1170 //force parent to wait until all the processes are done
1171 for (int i=0;i<processors;i++) {
1172 int temp = processIDS[i];
1176 //get data created by processes
1177 for (int i=0;i<processors;i++) {
1179 string s = toString(processIDS[i]) + ".temp";
1180 openInputFile(s, in);
1182 vector< vector<Sequence*> > tempClosest; tempClosest.resize(querySeqs.size());
1185 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1189 sizes.push_back(num);
1192 vector<Sequence*> tempVector;
1194 for (int j = 0; j < num; j++) {
1196 Sequence* temp = new Sequence(in);
1199 tempVector.push_back(temp);
1202 in >> junk; gobble(in); // to get ">"
1204 tempClosest[k] = tempVector;
1207 vector< vector<float> > dists; dists.resize(querySeqs.size());
1209 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1210 dists[k].resize(sizes[count]);
1211 for (int j = 0; j < sizes[count]; j++) {
1219 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1220 for (int j = 0; j < sizes[count]; j++) {
1221 closest[k][j].seq = tempClosest[k][j];
1222 closest[k][j].dist = dists[k][j];
1231 for (int i = 0; i < closest.size(); i++) {
1232 removeBadReferenceSeqs(closest[i], i);
1237 catch(exception& e) {
1238 errorOut(e, "Ccode", "createProcessesRemoveBad");
1242 //***************************************************************************************************************
1243 void Ccode::createProcessesAverages() {
1245 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1247 vector<int> processIDS;
1249 //loop through and create all the processes you want
1250 while (process != processors) {
1254 processIDS.push_back(pid);
1256 }else if (pid == 0){
1258 //find the averages for each querys references
1259 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1260 getAverageRef(closest[i], i); //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
1263 //find the averages for the query
1264 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1265 getAverageQuery(closest[i], i); //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
1269 //write out data to file so parent can read it
1271 string s = toString(getpid()) + ".temp";
1272 openOutputFile(s, out);
1275 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1276 for (int j = 0; j < windows[i].size(); j++) {
1277 out << sumRef[i][j] << '\t';
1280 for (int j = 0; j < windows[i].size(); j++) {
1281 out << averageRef[i][j] << '\t';
1284 for (int j = 0; j < windows[i].size(); j++) {
1285 out << sumSquaredRef[i][j] << '\t';
1288 for (int j = 0; j < windows[i].size(); j++) {
1289 out << sumQuery[i][j] << '\t';
1292 for (int j = 0; j < windows[i].size(); j++) {
1293 out << averageQuery[i][j] << '\t';
1296 for (int j = 0; j < windows[i].size(); j++) {
1297 out << sumSquaredQuery[i][j] << '\t';
1300 out << refCombo[i] << endl;
1306 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
1309 //force parent to wait until all the processes are done
1310 for (int i=0;i<processors;i++) {
1311 int temp = processIDS[i];
1315 //get data created by processes
1316 for (int i=0;i<processors;i++) {
1318 string s = toString(processIDS[i]) + ".temp";
1319 openInputFile(s, in);
1322 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1323 sumRef[k].resize(windows[k].size());
1324 averageRef[k].resize(windows[k].size());
1325 sumSquaredRef[k].resize(windows[k].size());
1326 averageQuery[k].resize(windows[k].size());
1327 sumQuery[k].resize(windows[k].size());
1328 sumSquaredQuery[k].resize(windows[k].size());
1330 for (int j = 0; j < windows[k].size(); j++) {
1334 for (int j = 0; j < windows[k].size(); j++) {
1335 in >> averageRef[k][j];
1338 for (int j = 0; j < windows[k].size(); j++) {
1339 in >> sumSquaredRef[k][j];
1342 for (int j = 0; j < windows[k].size(); j++) {
1343 in >> sumQuery[k][j];
1346 for (int j = 0; j < windows[k].size(); j++) {
1347 in >> averageQuery[k][j];
1350 for (int j = 0; j < windows[k].size(); j++) {
1351 in >> sumSquaredQuery[k][j];
1362 //find the averages for each querys references
1363 for (int i = 0; i < querySeqs.size(); i++) {
1364 getAverageRef(closest[i], i); //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
1367 //find the averages for the query
1368 for (int i = 0; i < querySeqs.size(); i++) {
1369 getAverageQuery(closest[i], i); //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
1375 catch(exception& e) {
1376 errorOut(e, "Ccode", "createProcessesAverages");
1380 //***************************************************************************************************************
1381 void Ccode::createProcessesVariances() {
1383 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1385 vector<int> processIDS;
1387 //loop through and create all the processes you want
1388 while (process != processors) {
1392 processIDS.push_back(pid);
1394 }else if (pid == 0){
1396 //find the averages for each querys references
1397 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1398 findVarianceRef(i); //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
1401 //find the averages for the query
1402 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1403 findVarianceQuery(i); //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
1407 //write out data to file so parent can read it
1409 string s = toString(getpid()) + ".temp";
1410 openOutputFile(s, out);
1413 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1414 for (int j = 0; j < windows[i].size(); j++) {
1415 out << varRef[i][j] << '\t';
1418 for (int j = 0; j < windows[i].size(); j++) {
1419 out << sdRef[i][j] << '\t';
1422 for (int j = 0; j < windows[i].size(); j++) {
1423 out << varQuery[i][j] << '\t';
1426 for (int j = 0; j < windows[i].size(); j++) {
1427 out << sdQuery[i][j] << '\t';
1435 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
1438 //force parent to wait until all the processes are done
1439 for (int i=0;i<processors;i++) {
1440 int temp = processIDS[i];
1444 //get data created by processes
1445 for (int i=0;i<processors;i++) {
1447 string s = toString(processIDS[i]) + ".temp";
1448 openInputFile(s, in);
1451 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1452 varRef[k].resize(windows[k].size());
1453 sdRef[k].resize(windows[k].size());
1454 varQuery[k].resize(windows[k].size());
1455 sdQuery[k].resize(windows[k].size());
1457 for (int j = 0; j < windows[k].size(); j++) {
1461 for (int j = 0; j < windows[k].size(); j++) {
1465 for (int j = 0; j < windows[k].size(); j++) {
1466 in >> varQuery[k][j];
1469 for (int j = 0; j < windows[k].size(); j++) {
1470 in >> sdQuery[k][j];
1479 //find the averages for each querys references
1480 for (int i = 0; i < querySeqs.size(); i++) {
1481 findVarianceRef(i); //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
1484 //find the averages for the query
1485 for (int i = 0; i < querySeqs.size(); i++) {
1486 findVarianceQuery(i); //fills v arQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
1490 catch(exception& e) {
1491 errorOut(e, "Ccode", "createProcessesVariances");
1495 //***************************************************************************************************************
1496 void Ccode::createProcessesDetermine() {
1498 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1500 vector<int> processIDS;
1502 //loop through and create all the processes you want
1503 while (process != processors) {
1507 processIDS.push_back(pid);
1509 }else if (pid == 0){
1511 //find the averages for each querys references
1512 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1513 determineChimeras(i); //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
1516 //write out data to file so parent can read it
1518 string s = toString(getpid()) + ".temp";
1519 openOutputFile(s, out);
1522 for (int i = lines[process]->start; i < lines[process]->end; i++) {
1523 for (int j = 0; j < windows[i].size(); j++) {
1524 out << anova[i][j] << '\t';
1527 for (int j = 0; j < windows[i].size(); j++) {
1528 out << isChimericConfidence[i][j] << '\t';
1531 for (int j = 0; j < windows[i].size(); j++) {
1532 out << isChimericTStudent[i][j] << '\t';
1535 for (int j = 0; j < windows[i].size(); j++) {
1536 out << isChimericANOVA[i][j] << '\t';
1544 }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
1547 //force parent to wait until all the processes are done
1548 for (int i=0;i<processors;i++) {
1549 int temp = processIDS[i];
1553 //get data created by processes
1554 for (int i=0;i<processors;i++) {
1556 string s = toString(processIDS[i]) + ".temp";
1557 openInputFile(s, in);
1560 for (int k = lines[i]->start; k < lines[i]->end; k++) {
1561 anova[k].resize(windows[k].size());
1562 isChimericConfidence[k].resize(windows[k].size(), false);
1563 isChimericTStudent[k].resize(windows[k].size(), false);
1564 isChimericANOVA[k].resize(windows[k].size(), false);
1567 for (int j = 0; j < windows[k].size(); j++) {
1571 for (int j = 0; j < windows[k].size(); j++) {
1573 if (tempBool == 1) { isChimericConfidence[k][j] = true; }
1576 for (int j = 0; j < windows[k].size(); j++) {
1578 if (tempBool == 1) { isChimericTStudent[k][j] = true; }
1581 for (int j = 0; j < windows[k].size(); j++) {
1583 if (tempBool == 1) { isChimericANOVA[k][j] = true; }
1592 for (int i = 0; i < querySeqs.size(); i++) {
1593 determineChimeras(i); //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
1598 catch(exception& e) {
1599 errorOut(e, "Ccode", "createProcessesDetermine");
1603 //***************************************************************************************************************