]> git.donarmstrong.com Git - mothur.git/blob - chimeracheckrdp.cpp
added checks for ^C to quit command instead of program
[mothur.git] / chimeracheckrdp.cpp
1 /*
2  *  chimeracheckrdp.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/8/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeracheckrdp.h"
11                 
12 //***************************************************************************************************************
13 ChimeraCheckRDP::ChimeraCheckRDP(string filename,  string o) {  fastafile = filename;  outputDir = o;  }
14 //***************************************************************************************************************
15
16 ChimeraCheckRDP::~ChimeraCheckRDP() {
17         try {
18                 delete templateDB;
19                 delete kmer;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "ChimeraCheckRDP", "~AlignSim");
23                 exit(1);
24         }
25 }       
26 //***************************************************************************************************************
27 int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
28         try {
29                 
30                 m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine();
31                 
32                 out << querySeq->getName() << endl;
33                 out << "IS scores: " << '\t';
34                         
35                 for (int k = 0; k < IS.size(); k++) {
36                         out << IS[k].score << '\t'; 
37                 }
38                 out << endl;
39                 
40                 if (svg) {
41                         if (name != "") { //if user has specific names
42                                 map<string, string>::iterator it = names.find(querySeq->getName());
43                                 
44                                 if (it != names.end()) { //user wants pic of this
45                                         makeSVGpic(IS);  //zeros out negative results
46                                 }
47                         }else{//output them all
48                                 makeSVGpic(IS);  //zeros out negative results
49                         }
50                 }
51                 
52                 return 0;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ChimeraCheckRDP", "print");
56                 exit(1);
57         }
58 }
59 //***************************************************************************************************************
60 int ChimeraCheckRDP::doPrep() {
61         try {
62                 templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
63                 m->mothurOutEndLine();
64                 
65                 kmer = new Kmer(kmerSize);
66                 
67                 if (name != "") { 
68                         readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
69                 }
70                 
71                 return 0;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "ChimeraCheckRDP", "doPrep");
75                 exit(1);
76         }
77 }
78 //***************************************************************************************************************
79 int ChimeraCheckRDP::getChimeras(Sequence* query) {
80         try {
81                 
82                 IS.clear();
83                                 
84                 querySeq = query;
85                         
86                 closest = templateDB->findClosestSequence(query);  
87         
88                 IS = findIS(); 
89                                         
90                 //determine chimera report cutoff - window score above 95%
91                 //getCutoff();  - not very acurate predictor
92                 
93                 return 0;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ChimeraCheckRDP", "getChimeras");
97                 exit(1);
98         }
99 }
100 //***************************************************************************************************************
101 vector<sim> ChimeraCheckRDP::findIS() {
102         try {
103                 
104                 
105                 vector< map<int, int> > queryKmerInfo;  //vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
106                                                                                                 //example:  seqKmerInfo[50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
107                                                                                                 //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the 
108                                                                                                 //kmers 2 seqs had in common.  There may be a better way to do this thats why I am leaving so many comments...
109                 vector< map<int, int> > subjectKmerInfo;
110                 
111                 vector<sim>  isValues;
112                 string queryName = querySeq->getName();
113                 string seq = querySeq->getUnaligned();
114                 
115                 queryKmerInfo = kmer->getKmerCounts(seq);
116                 subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
117                 
118                 //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
119                 int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
120
121                 //you don't want the starting point to be virtually at hte end so move it in 10%
122                 int start = seq.length() / 10;
123                         
124                 //for each window
125                 for (int f = start; f < (seq.length() - start); f+=increment) {
126                         
127                         if ((f - kmerSize) < 0)  { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); }
128                         
129                         sim temp;
130                         
131                         string fragLeft = seq.substr(0, f);  //left side of breakpoint
132                         string fragRight = seq.substr(f);  //right side of breakpoint
133                         
134                         //make a sequence of the left side and right side
135                         Sequence* left = new Sequence(queryName, fragLeft);
136                         Sequence* right = new Sequence(queryName, fragRight);
137                         
138                         //find seqs closest to each fragment
139                         Sequence closestLeft = templateDB->findClosestSequence(left); 
140         
141                         Sequence closestRight = templateDB->findClosestSequence(right); 
142                 
143                         //get kmerinfo for the closest left
144                         vector< map<int, int> > closeLeftKmerInfo = kmer->getKmerCounts(closestLeft.getUnaligned());
145                         
146                         //get kmerinfo for the closest right
147                         vector< map<int, int> > closeRightKmerInfo = kmer->getKmerCounts(closestRight.getUnaligned());
148                         
149                         //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
150                         //iterate through left sides map to subtract the number of times you saw things before you got the the right side
151                         map<int, int> rightside = queryKmerInfo[queryKmerInfo.size()-1];
152                         for (map<int, int>::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) {
153                                 int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first];   //times that kmer was seen in total
154
155                                 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
156                                 int howmanyright = howManyTotal - itleft->second;
157                                 
158                                 //if any were seen just on the left erase
159                                 if (howmanyright == 0) {
160                                         rightside.erase(itleft->first);
161                                 }
162                         }
163                         
164                         map<int, int> closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1];
165                         for (map<int, int>::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) {
166                                 int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first];   //times that kmer was seen in total
167
168                                 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
169                                 int howmanyright = howManyTotal - itright->second;
170                                 
171                                 //if any were seen just on the left erase
172                                 if (howmanyright == 0) {
173                                         closerightside.erase(itright->first);
174                                 }
175                         }
176
177                         
178                         int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]);
179
180                         int nRight = calcKmers(closerightside, rightside);
181
182                         int is = nLeft + nRight - nTotal;
183
184                         //save IS, leftparent, rightparent, breakpoint
185                         temp.leftParent = closestLeft.getName();
186                         temp.rightParent = closestRight.getName();
187                         temp.score = is;
188                         temp.midpoint = f;
189                         
190                         isValues.push_back(temp);
191                         
192                         delete left;
193                         delete right;
194                 }
195                 
196                 return isValues;
197         
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "ChimeraCheckRDP", "findIS");
201                 exit(1);
202         }
203 }
204 //***************************************************************************************************************
205 void ChimeraCheckRDP::readName(string namefile) {
206         try{
207                 ifstream in;
208                 openInputFile(namefile, in);
209                 string name;
210                 
211                 while (!in.eof()) {
212                         
213                         in >> name;
214                         
215                         names[name] = name;
216                         
217                         gobble(in);
218                 }
219         
220         }
221         catch(exception& e) {
222                 m->errorOut(e, "ChimeraCheckRDP", "readName");
223                 exit(1);
224         }
225 }
226
227 //***************************************************************************************************************
228 //find the smaller map and iterate through it and count kmers in common
229 int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
230         try{
231                 
232                 int common = 0;
233                 map<int, int>::iterator small;
234                 map<int, int>::iterator large;
235                 
236                 if (query.size() < subject.size()) {
237                 
238                         for (small = query.begin(); small != query.end(); small++) {
239                                 large = subject.find(small->first);
240                                 
241                                 //if you found it they have that kmer in common
242                                 if (large != subject.end()) {   common++;       }
243                         }
244                         
245                 }else { 
246                  
247                         for (small = subject.begin(); small != subject.end(); small++) {
248                                 large = query.find(small->first);
249                                 
250                                 //if you found it they have that kmer in common
251                                 if (large != query.end()) {             common++;        }
252                         }
253                 }
254                 
255                 return common;
256                 
257         }
258         catch(exception& e) {
259                 m->errorOut(e, "ChimeraCheckRDP", "calcKmers");
260                 exit(1);
261         }
262 }
263
264 //***************************************************************************************************************
265 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
266         try{
267                 
268                 string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
269                 ofstream outsvg;
270                 openOutputFile(file, outsvg);
271                 
272                 int width = (info.size()*5) + 150;
273                 
274                 outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
275                 outsvg << "<g>\n";
276                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
277                 
278                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
279                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
280                 
281                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
282                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
283                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
284                 
285                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
286                 
287                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
288                 
289                 
290                 //find max is score
291                 float biggest = 0.0;
292                 for (int i = 0; i < info.size(); i++) {
293                         if (info[i].score > biggest)  {
294                                 biggest = info[i].score;
295                         }
296                 }
297                 
298                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
299                 
300                 int scaler2 = 500 / biggest;
301                 
302                 
303                 outsvg << "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
304                 //160,200 180,230 200,210 234,220\"/> "; 
305                 for (int i = 0; i < info.size(); i++) {
306                         if(info[i].score < 0) { info[i].score = 0; }
307                         outsvg << ((i*5) + 75) << "," << (600 - (info[i].score * scaler2)) << " ";
308                 }
309                 
310                 outsvg << "\"/> ";
311                 outsvg << "</g>\n</svg>\n";
312                 
313                 outsvg.close();
314
315         }
316         catch(exception& e) {
317                 m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
318                 exit(1);
319         }
320 }
321 //***************************************************************************************************************
322
323