-
-//***************************************************************************************************************
-//find the window breaks for each sequence - this is so you can move ahead by bases.
-vector<int> Pintail::findWindows(Sequence* query, int front, int back, int& size) {
- try {
-
- vector<int> win;
-
- int cutoff = back - front; //back - front
-
- //if window is set to default
- if (size == 0) { if (cutoff > 1200) { size = 300; }
- else{ size = (cutoff / 4); } }
- else if (size > (cutoff / 4)) {
- mothurOut("You have selected to large a window size for sequence " + query->getName() + ". I will choose an appropriate window size."); mothurOutEndLine();
- size = (cutoff / 4);
- }
-
- string seq = query->getAligned().substr(front, cutoff);
-
- //count bases
- int numBases = 0;
- for (int l = 0; l < seq.length(); l++) { if (isalpha(seq[l])) { numBases++; } }
-
- //save start of seq
- win.push_back(front);
-
- //move ahead increment bases at a time until all bases are in a window
- int countBases = 0;
- int totalBases = 0; //used to eliminate window of blanks at end of sequence
-
- seq = query->getAligned();
- for (int m = front; m < (back - size) ; m++) {
-
- //count number of bases you see
- if (isalpha(seq[m])) { countBases++; totalBases++; }
-
- //if you have seen enough bases to make a new window
- if (countBases >= increment) {
- win.push_back(m); //save spot in alignment
- countBases = 0; //reset bases you've seen in this window
- }
-
- //no need to continue if all your bases are in a window
- if (totalBases == numBases) { break; }
- }
-
- return win;
-
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "findWindows");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-vector<float> Pintail::calcObserved(Sequence* query, Sequence subject, vector<int> window, int size) {
- try {
-
- vector<float> temp;
-
- int startpoint = 0;
- for (int m = 0; m < windows.size(); m++) {
-
- string seqFrag = query->getAligned().substr(window[startpoint], size);
- string seqFragsub = subject.getAligned().substr(window[startpoint], size);
-
- int diff = 0;
- for (int b = 0; b < seqFrag.length(); b++) {
- if (seqFrag[b] != seqFragsub[b]) { diff++; }
- }
-
- //percentage of mismatched bases
- float dist;
- dist = diff / (float) seqFrag.length() * 100;
-
- temp.push_back(dist);
-
- startpoint++;
- }
-
- return temp;
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "calcObserved");
- exit(1);
- }
-}
-//***************************************************************************************************************
-float Pintail::calcDist(Sequence* query, Sequence subject, int front, int back) {
- try {
-
- //so you only look at the trimmed part of the sequence
- int cutoff = back - front;
-
- //from first startpoint with length back-front
- string seqFrag = query->getAligned().substr(front, cutoff);
- string seqFragsub = subject.getAligned().substr(front, cutoff);
-
- int diff = 0;
- for (int b = 0; b < seqFrag.length(); b++) {
- if (seqFrag[b] != seqFragsub[b]) { diff++; }
- }
-
- //percentage of mismatched bases
- float dist = diff / (float) seqFrag.length() * 100;
-
- return dist;
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "calcDist");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-vector<float> Pintail::calcExpected(vector<float> qav, float coef) {
- try {
-
- //for each window
- vector<float> queryExpected;
-
- for (int m = 0; m < qav.size(); m++) {
-
- float expected = qav[m] * coef;
-
- queryExpected.push_back(expected);
- }
-
- return queryExpected;
-
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "calcExpected");
- exit(1);
- }
-}
-//***************************************************************************************************************
-float Pintail::calcDE(vector<float> obs, vector<float> exp) {
- try {
-
- //for each window
- float sum = 0.0; //sum = sum from 1 to m of (oi-ei)^2
- for (int m = 0; m < obsDistance.size(); m++) { sum += ((obs[m] - exp[m]) * (obs[m] - exp[m])); }
-
- float de = sqrt((sum / (obsDistance.size() - 1)));
-
- return de;
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "calcDE");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-
-vector<float> Pintail::calcFreq(vector<Sequence*> seqs) {
- try {
-
- vector<float> prob;
- string freqfile = getRootName(templateFile) + "probability";
- ofstream outFreq;
-
- openOutputFile(freqfile, outFreq);
-
- //at each position in the sequence
- for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
-
- vector<int> freq; freq.resize(4,0);
- int gaps = 0;
-
- //find the frequency of each nucleotide
- for (int j = 0; j < seqs.size(); j++) {
-
- char value = seqs[j]->getAligned()[i];
-
- if(toupper(value) == 'A') { freq[0]++; }
- else if(toupper(value) == 'T' || toupper(value) == 'U') { freq[1]++; }
- else if(toupper(value) == 'G') { freq[2]++; }
- else if(toupper(value) == 'C') { freq[3]++; }
- else { gaps++; }
- }
-
- //find base with highest frequency
- int highest = 0;
- for (int m = 0; m < freq.size(); m++) { if (freq[m] > highest) { highest = freq[m]; } }
-
- float highFreq;
- if ( (seqs.size() - gaps) == 0 ) { highFreq = 1.0; }
- else { highFreq = highest / (float) (seqs.size() - gaps); }
- //highFreq = highest / (float) seqs.size();
- //cout << i << '\t' << highFreq << endl;
-
- float Pi;
- Pi = (highFreq - 0.25) / 0.75;
-
- //cannot have probability less than 0.
- if (Pi < 0) { Pi = 0.0; }
-
- //saves this for later
- outFreq << i+1 << '\t' << Pi << endl;
-
- prob.push_back(Pi);
- }
-
- outFreq.close();
-
- return prob;
-
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "calcFreq");
- exit(1);
- }
-}
-//***************************************************************************************************************
-vector<float> Pintail::findQav(vector<int> window, int size) {
- try {
- vector<float> averages;
-
- //for each window find average
- for (int m = 0; m < windows.size(); m++) {
-
- float average = 0.0;
-
- //while you are in the window for this sequence
- for (int j = window[m]; j < (window[m]+size); j++) { average += probabilityProfile[j]; }
-
- average = average / size;
-
- //save this windows average
- averages.push_back(average);
- }
-
- return averages;
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "findQav");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-vector< vector<float> > Pintail::getQuantiles(int start, int end) {
- try {
- vector< vector<float> > quan;
-
- //for each sequence
- for(int i = start; i < end; i++){
-
- Sequence* query = templateSeqs[i];
-
- //compare to every other sequence in template
- for(int j = 0; j < i; j++){
-
- Sequence subject = *(templateSeqs[j]);
-
- map<int, int> trim = trimSeqs(query, subject, -1);
-
-
-
-
-
- }
-
-
-
- }
- return quan;
-
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "findQav");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-float Pintail::getCoef(vector<float> obs, vector<float> qav) {
- try {
-
- //find average prob for this seqs windows
- float probAverage = 0.0;
- for (int j = 0; j < qav.size(); j++) { probAverage += qav[j]; }
- probAverage = probAverage / (float) Qav.size();
-
- //find observed average
- float obsAverage = 0.0;
- for (int j = 0; j < obs.size(); j++) { obsAverage += obs[j]; }
- obsAverage = obsAverage / (float) obs.size();
-
-
- float coef = obsAverage / probAverage;
-
- return coef;
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "getCoef");
- exit(1);
- }
-}