]> git.donarmstrong.com Git - mothur.git/blob - chimeracheckrdp.cpp
working on chimeras
[mothur.git] / chimeracheckrdp.cpp
1 /*
2  *  chimeracheckrdp.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/8/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeracheckrdp.h"
11                 
12 //***************************************************************************************************************
13 ChimeraCheckRDP::ChimeraCheckRDP(string filename,  string o) {  fastafile = filename;  outputDir = o;  }
14 //***************************************************************************************************************
15
16 ChimeraCheckRDP::~ChimeraCheckRDP() {
17         try {
18                 delete templateDB;
19                 delete kmer;
20         }
21         catch(exception& e) {
22                 errorOut(e, "ChimeraCheckRDP", "~AlignSim");
23                 exit(1);
24         }
25 }       
26 //***************************************************************************************************************
27 void ChimeraCheckRDP::print(ostream& out) {
28         try {
29                 
30                 mothurOut("Processing: " + querySeq->getName()); mothurOutEndLine();
31                 
32                 out << querySeq->getName() << endl;
33                 out << "IS scores: " << '\t';
34                         
35                 for (int k = 0; k < IS.size(); k++) {
36                         out << IS[k].score << '\t'; 
37                 }
38                 out << endl;
39                 
40                 if (svg) {
41                         if (name != "") { //if user has specific names
42                                 map<string, string>::iterator it = names.find(querySeq->getName());
43                                 
44                                 if (it != names.end()) { //user wants pic of this
45                                         makeSVGpic(IS);  //zeros out negative results
46                                 }
47                         }else{//output them all
48                                 makeSVGpic(IS);  //zeros out negative results
49                         }
50                 }
51         }
52         catch(exception& e) {
53                 errorOut(e, "ChimeraCheckRDP", "print");
54                 exit(1);
55         }
56 }
57 //***************************************************************************************************************
58 void ChimeraCheckRDP::doPrep() {
59         try {
60                 templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
61                 mothurOutEndLine();
62                 
63                 kmer = new Kmer(kmerSize);
64                 
65                 if (name != "") { 
66                         readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
67                 }
68         }
69         catch(exception& e) {
70                 errorOut(e, "ChimeraCheckRDP", "doPrep");
71                 exit(1);
72         }
73 }
74 //***************************************************************************************************************
75 int ChimeraCheckRDP::getChimeras(Sequence* query) {
76         try {
77                 
78                 IS.clear();
79                                 
80                 querySeq = query;
81                         
82                 closest = templateDB->findClosestSequence(query);  
83         
84                 IS = findIS(); 
85                                         
86                 //determine chimera report cutoff - window score above 95%
87                 //getCutoff();  - not very acurate predictor
88                 
89                 return 0;
90         }
91         catch(exception& e) {
92                 errorOut(e, "ChimeraCheckRDP", "getChimeras");
93                 exit(1);
94         }
95 }
96 //***************************************************************************************************************
97 vector<sim> ChimeraCheckRDP::findIS() {
98         try {
99                 
100                 
101                 vector< map<int, int> > queryKmerInfo;  //vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
102                                                                                                 //example:  seqKmerInfo[50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
103                                                                                                 //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the 
104                                                                                                 //kmers 2 seqs had in common.  There may be a better way to do this thats why I am leaving so many comments...
105                 vector< map<int, int> > subjectKmerInfo;
106                 
107                 vector<sim>  isValues;
108                 string queryName = querySeq->getName();
109                 string seq = querySeq->getUnaligned();
110                 
111                 queryKmerInfo = kmer->getKmerCounts(seq);
112                 subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
113                 
114                 //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
115                 int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
116
117                 //you don't want the starting point to be virtually at hte end so move it in 10%
118                 int start = seq.length() / 10;
119                         
120                 //for each window
121                 for (int m = start; m < (seq.length() - start); m+=increment) {
122                         
123                         if ((m - kmerSize) < 0)  { mothurOut("Your sequence is too short for your kmerSize."); mothurOutEndLine(); exit(1); }
124                         
125                         sim temp;
126                         
127                         string fragLeft = seq.substr(0, m);  //left side of breakpoint
128                         string fragRight = seq.substr(m);  //right side of breakpoint
129                         
130                         //make a sequence of the left side and right side
131                         Sequence* left = new Sequence(queryName, fragLeft);
132                         Sequence* right = new Sequence(queryName, fragRight);
133                         
134                         //find seqs closest to each fragment
135                         Sequence closestLeft = templateDB->findClosestSequence(left); 
136         
137                         Sequence closestRight = templateDB->findClosestSequence(right); 
138                 
139                         //get kmerinfo for the closest left
140                         vector< map<int, int> > closeLeftKmerInfo = kmer->getKmerCounts(closestLeft.getUnaligned());
141                         
142                         //get kmerinfo for the closest right
143                         vector< map<int, int> > closeRightKmerInfo = kmer->getKmerCounts(closestRight.getUnaligned());
144                         
145                         //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
146                         //iterate through left sides map to subtract the number of times you saw things before you got the the right side
147                         map<int, int> rightside = queryKmerInfo[queryKmerInfo.size()-1];
148                         for (map<int, int>::iterator itleft = queryKmerInfo[m-kmerSize].begin(); itleft != queryKmerInfo[m-kmerSize].end(); itleft++) {
149                                 int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first];   //times that kmer was seen in total
150
151                                 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
152                                 int howmanyright = howManyTotal - itleft->second;
153                                 
154                                 //if any were seen just on the left erase
155                                 if (howmanyright == 0) {
156                                         rightside.erase(itleft->first);
157                                 }
158                         }
159                         
160                         map<int, int> closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1];
161                         for (map<int, int>::iterator itright = closeRightKmerInfo[m-kmerSize].begin(); itright != closeRightKmerInfo[m-kmerSize].end(); itright++) {
162                                 int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first];   //times that kmer was seen in total
163
164                                 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
165                                 int howmanyright = howManyTotal - itright->second;
166                                 
167                                 //if any were seen just on the left erase
168                                 if (howmanyright == 0) {
169                                         closerightside.erase(itright->first);
170                                 }
171                         }
172
173                         
174                         int nLeft = calcKmers(closeLeftKmerInfo[m-kmerSize], queryKmerInfo[m-kmerSize]);
175
176                         int nRight = calcKmers(closerightside, rightside);
177
178                         int is = nLeft + nRight - nTotal;
179
180                         //save IS, leftparent, rightparent, breakpoint
181                         temp.leftParent = closestLeft.getName();
182                         temp.rightParent = closestRight.getName();
183                         temp.score = is;
184                         temp.midpoint = m;
185                         
186                         isValues.push_back(temp);
187                         
188                         delete left;
189                         delete right;
190                 }
191                 
192                 return isValues;
193         
194         }
195         catch(exception& e) {
196                 errorOut(e, "ChimeraCheckRDP", "findIS");
197                 exit(1);
198         }
199 }
200 //***************************************************************************************************************
201 void ChimeraCheckRDP::readName(string namefile) {
202         try{
203                 ifstream in;
204                 openInputFile(namefile, in);
205                 string name;
206                 
207                 while (!in.eof()) {
208                         
209                         in >> name;
210                         
211                         names[name] = name;
212                         
213                         gobble(in);
214                 }
215         
216         }
217         catch(exception& e) {
218                 errorOut(e, "ChimeraCheckRDP", "readName");
219                 exit(1);
220         }
221 }
222
223 //***************************************************************************************************************
224 //find the smaller map and iterate through it and count kmers in common
225 int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
226         try{
227                 
228                 int common = 0;
229                 map<int, int>::iterator small;
230                 map<int, int>::iterator large;
231                 
232                 if (query.size() < subject.size()) {
233                 
234                         for (small = query.begin(); small != query.end(); small++) {
235                                 large = subject.find(small->first);
236                                 
237                                 //if you found it they have that kmer in common
238                                 if (large != subject.end()) {   common++;       }
239                         }
240                         
241                 }else { 
242                  
243                         for (small = subject.begin(); small != subject.end(); small++) {
244                                 large = query.find(small->first);
245                                 
246                                 //if you found it they have that kmer in common
247                                 if (large != query.end()) {             common++;        }
248                         }
249                 }
250                 
251                 return common;
252                 
253         }
254         catch(exception& e) {
255                 errorOut(e, "ChimeraCheckRDP", "calcKmers");
256                 exit(1);
257         }
258 }
259
260 //***************************************************************************************************************
261 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
262         try{
263                 
264                 string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
265                 ofstream outsvg;
266                 openOutputFile(file, outsvg);
267                 
268                 int width = (info.size()*5) + 150;
269                 
270                 outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
271                 outsvg << "<g>\n";
272                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
273                 
274                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
275                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
276                 
277                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
278                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
279                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
280                 
281                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
282                 
283                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
284                 
285                 
286                 //find max is score
287                 float biggest = 0.0;
288                 for (int i = 0; i < info.size(); i++) {
289                         if (info[i].score > biggest)  {
290                                 biggest = info[i].score;
291                         }
292                 }
293                 
294                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
295                 
296                 int scaler2 = 500 / biggest;
297                 
298                 
299                 outsvg << "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
300                 //160,200 180,230 200,210 234,220\"/> "; 
301                 for (int i = 0; i < info.size(); i++) {
302                         if(info[i].score < 0) { info[i].score = 0; }
303                         outsvg << ((i*5) + 75) << "," << (600 - (info[i].score * scaler2)) << " ";
304                 }
305                 
306                 outsvg << "\"/> ";
307                 outsvg << "</g>\n</svg>\n";
308                 
309                 outsvg.close();
310
311         }
312         catch(exception& e) {
313                 errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
314                 exit(1);
315         }
316 }
317 //***************************************************************************************************************
318
319