]> git.donarmstrong.com Git - mothur.git/blob - chimeracheckrdp.cpp
a few modifications for 1.9
[mothur.git] / chimeracheckrdp.cpp
1 /*
2  *  chimeracheckrdp.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/8/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeracheckrdp.h"
11                 
12 //***************************************************************************************************************
13 ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, int inc, int k, string o) : Chimera() { 
14         try {
15                 fastafile = filename; 
16                 templateFileName = temp;  
17                 name = n;
18                 svg = s;
19                 increment = inc;
20                 kmerSize = k;
21                 outputDir = o; 
22                 
23                 templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
24                 m->mothurOutEndLine();
25                 
26                 kmer = new Kmer(kmerSize);
27                 
28                 if (name != "") { 
29                         readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
30                 }
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ChimeraCheckRDP", "ChimeraCheckRDP");
34                 exit(1);
35         }
36 }
37 //***************************************************************************************************************
38
39 ChimeraCheckRDP::~ChimeraCheckRDP() {
40         try {
41                 delete templateDB;
42                 delete kmer;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ChimeraCheckRDP", "~ChimeraCheckRDP");
46                 exit(1);
47         }
48 }       
49 //***************************************************************************************************************
50 int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
51         try {
52                 
53                 m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine();
54                 
55                 out << querySeq->getName() << endl;
56                 out << "IS scores: " << '\t';
57                         
58                 for (int k = 0; k < IS.size(); k++) {
59                         out << IS[k].score << '\t'; 
60                 }
61                 out << endl;
62                 
63                 if (svg) {
64                         if (name != "") { //if user has specific names
65                                 map<string, string>::iterator it = names.find(querySeq->getName());
66                                 
67                                 if (it != names.end()) { //user wants pic of this
68                                         makeSVGpic(IS);  //zeros out negative results
69                                 }
70                         }else{//output them all
71                                 makeSVGpic(IS);  //zeros out negative results
72                         }
73                 }
74                 
75                 return 0;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "ChimeraCheckRDP", "print");
79                 exit(1);
80         }
81 }
82 #ifdef USE_MPI
83 //***************************************************************************************************************
84 int ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) {
85         try {
86                 
87                 cout << "Processing: " << querySeq->getName() << endl; 
88                 
89                 string outString = "";
90                 
91                 outString += querySeq->getName() + "\nIS scores: \t";
92                         
93                 for (int k = 0; k < IS.size(); k++) {
94                         outString += toString(IS[k].score)  + "\t"; 
95                 }
96                 outString += "\n";
97                 
98                 MPI_Status status;
99                 int length = outString.length();
100                 char* buf = new char[length];
101                 memcpy(buf, outString.c_str(), length);
102                                 
103                 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
104                 delete buf;
105
106                 if (svg) {
107                         if (name != "") { //if user has specific names
108                                 map<string, string>::iterator it = names.find(querySeq->getName());
109                                 
110                                 if (it != names.end()) { //user wants pic of this
111                                         makeSVGpic(IS);  //zeros out negative results
112                                 }
113                         }else{//output them all
114                                 makeSVGpic(IS);  //zeros out negative results
115                         }
116                 }
117                 
118                 return 0;
119         }
120         catch(exception& e) {
121                 m->errorOut(e, "ChimeraCheckRDP", "print");
122                 exit(1);
123         }
124 }
125 #endif
126 //***************************************************************************************************************
127 int ChimeraCheckRDP::getChimeras(Sequence* query) {
128         try {
129                 
130                 IS.clear();
131                                 
132                 querySeq = query;
133                         
134                 closest = templateDB->findClosestSequence(query);  
135         
136                 IS = findIS(); 
137                                         
138                 //determine chimera report cutoff - window score above 95%
139                 //getCutoff();  - not very acurate predictor
140                 
141                 return 0;
142         }
143         catch(exception& e) {
144                 m->errorOut(e, "ChimeraCheckRDP", "getChimeras");
145                 exit(1);
146         }
147 }
148 //***************************************************************************************************************
149 vector<sim> ChimeraCheckRDP::findIS() {
150         try {
151                 
152                 
153                 vector< map<int, int> > queryKmerInfo;  //vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
154                                                                                                 //example:  seqKmerInfo[50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
155                                                                                                 //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the 
156                                                                                                 //kmers 2 seqs had in common.  There may be a better way to do this thats why I am leaving so many comments...
157                 vector< map<int, int> > subjectKmerInfo;
158                 
159                 vector<sim>  isValues;
160                 string queryName = querySeq->getName();
161                 string seq = querySeq->getUnaligned();
162                 
163                 queryKmerInfo = kmer->getKmerCounts(seq);
164                 subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
165                 
166                 //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
167                 int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
168
169                 //you don't want the starting point to be virtually at hte end so move it in 10%
170                 int start = seq.length() / 10;
171                         
172                 //for each window
173                 for (int f = start; f < (seq.length() - start); f+=increment) {
174                 
175                         if (m->control_pressed) { return isValues; }
176                         
177                         if ((f - kmerSize) < 0)  { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); }
178                         
179                         sim temp;
180                         
181                         string fragLeft = seq.substr(0, f);  //left side of breakpoint
182                         string fragRight = seq.substr(f);  //right side of breakpoint
183                         
184                         //make a sequence of the left side and right side
185                         Sequence* left = new Sequence(queryName, fragLeft);
186                         Sequence* right = new Sequence(queryName, fragRight);
187                         
188                         //find seqs closest to each fragment
189                         Sequence closestLeft = templateDB->findClosestSequence(left); 
190         
191                         Sequence closestRight = templateDB->findClosestSequence(right); 
192                 
193                         //get kmerinfo for the closest left
194                         vector< map<int, int> > closeLeftKmerInfo = kmer->getKmerCounts(closestLeft.getUnaligned());
195                         
196                         //get kmerinfo for the closest right
197                         vector< map<int, int> > closeRightKmerInfo = kmer->getKmerCounts(closestRight.getUnaligned());
198                         
199                         //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
200                         //iterate through left sides map to subtract the number of times you saw things before you got the the right side
201                         map<int, int> rightside = queryKmerInfo[queryKmerInfo.size()-1];
202                         for (map<int, int>::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) {
203                                 int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first];   //times that kmer was seen in total
204
205                                 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
206                                 int howmanyright = howManyTotal - itleft->second;
207                                 
208                                 //if any were seen just on the left erase
209                                 if (howmanyright == 0) {
210                                         rightside.erase(itleft->first);
211                                 }
212                         }
213                         
214                         map<int, int> closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1];
215                         for (map<int, int>::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) {
216                                 int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first];   //times that kmer was seen in total
217
218                                 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
219                                 int howmanyright = howManyTotal - itright->second;
220                                 
221                                 //if any were seen just on the left erase
222                                 if (howmanyright == 0) {
223                                         closerightside.erase(itright->first);
224                                 }
225                         }
226
227                         
228                         int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]);
229
230                         int nRight = calcKmers(closerightside, rightside);
231
232                         int is = nLeft + nRight - nTotal;
233
234                         //save IS, leftparent, rightparent, breakpoint
235                         temp.leftParent = closestLeft.getName();
236                         temp.rightParent = closestRight.getName();
237                         temp.score = is;
238                         temp.midpoint = f;
239                         
240                         isValues.push_back(temp);
241                         
242                         delete left;
243                         delete right;
244                 }
245                 
246                 return isValues;
247         
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "ChimeraCheckRDP", "findIS");
251                 exit(1);
252         }
253 }
254 //***************************************************************************************************************
255 void ChimeraCheckRDP::readName(string namefile) {
256         try{
257         
258                 string name;
259
260         #ifdef USE_MPI
261                 
262                 MPI_File inMPI;
263                 MPI_Offset size;
264                 MPI_Status status;
265
266                 //char* inFileName = new char[namefile.length()];
267                 //memcpy(inFileName, namefile.c_str(), namefile.length());
268                 
269                 char inFileName[1024];
270                 strcpy(inFileName, namefile.c_str());
271
272                 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  
273                 MPI_File_get_size(inMPI, &size);
274
275                 //delete inFileName;
276
277                 char* buffer = new char[size];
278                 MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
279
280                 string tempBuf = buffer;
281                 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
282                 istringstream iss (tempBuf,istringstream::in);
283                 delete buffer;
284                 
285                 while(!iss.eof()) {
286                         iss >> name; gobble(iss);
287                         names[name] = name;
288                 }
289         
290                 MPI_File_close(&inMPI);
291                 
292         #else   
293         
294                 ifstream in;
295                 openInputFile(namefile, in);
296                                 
297                 while (!in.eof()) {
298                         in >> name; gobble(in);
299                         names[name] = name;
300                 }
301                 in.close();
302         
303         #endif
304         
305         }
306         catch(exception& e) {
307                 m->errorOut(e, "ChimeraCheckRDP", "readName");
308                 exit(1);
309         }
310 }
311
312 //***************************************************************************************************************
313 //find the smaller map and iterate through it and count kmers in common
314 int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
315         try{
316                 
317                 int common = 0;
318                 map<int, int>::iterator small;
319                 map<int, int>::iterator large;
320                 
321                 if (query.size() < subject.size()) {
322                 
323                         for (small = query.begin(); small != query.end(); small++) {
324                                 large = subject.find(small->first);
325                                 
326                                 //if you found it they have that kmer in common
327                                 if (large != subject.end()) {   common++;       }
328                         }
329                         
330                 }else { 
331                  
332                         for (small = subject.begin(); small != subject.end(); small++) {
333                                 large = query.find(small->first);
334                                 
335                                 //if you found it they have that kmer in common
336                                 if (large != query.end()) {             common++;        }
337                         }
338                 }
339                 
340                 return common;
341                 
342         }
343         catch(exception& e) {
344                 m->errorOut(e, "ChimeraCheckRDP", "calcKmers");
345                 exit(1);
346         }
347 }
348 #ifdef USE_MPI
349 //***************************************************************************************************************
350 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
351         try{
352                 
353                 string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
354                 
355                 MPI_File outSVG;
356                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
357
358                 //char* FileName = new char[file.length()];
359                 //memcpy(FileName, file.c_str(), file.length());
360                 
361                 char FileName[1024];
362                 strcpy(FileName, file.c_str());
363
364                 MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG);  //comm, filename, mode, info, filepointer
365                 
366                 //delete FileName;
367
368                 int width = (info.size()*5) + 150;
369                 
370                 string outString = "";
371                 
372                 outString += "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
373                 outString += "<g>\n";
374                 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
375                 
376                 outString +=  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
377                 outString +=  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
378                 
379                 outString += "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
380                 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
381                 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
382                 
383                 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
384                 
385                 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
386                 
387                 
388                 //find max is score
389                 float biggest = 0.0;
390                 for (int i = 0; i < info.size(); i++) {
391                         if (info[i].score > biggest)  {
392                                 biggest = info[i].score;
393                         }
394                 }
395                 
396                 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
397                 
398                 int scaler2 = 500 / biggest;
399                 
400                 
401                 outString += "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
402                 //160,200 180,230 200,210 234,220\"/> "; 
403                 for (int i = 0; i < info.size(); i++) {
404                         if(info[i].score < 0) { info[i].score = 0; }
405                         outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " ";
406                 }
407                 
408                 outString += "\"/> ";
409                 outString += "</g>\n</svg>\n";
410                 
411                 MPI_Status status;
412                 int length = outString.length();
413                 char* buf2 = new char[length];
414                 memcpy(buf2, outString.c_str(), length);
415                                 
416                 MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status);
417                 delete buf2;
418                 
419                 MPI_File_close(&outSVG);
420
421         }
422         catch(exception& e) {
423                 m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
424                 exit(1);
425         }
426 }
427 #else
428 //***************************************************************************************************************
429 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
430         try{
431                 
432                 string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
433                 ofstream outsvg;
434                 openOutputFile(file, outsvg);
435                 
436                 int width = (info.size()*5) + 150;
437                 
438                 outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
439                 outsvg << "<g>\n";
440                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
441                 
442                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
443                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
444                 
445                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
446                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
447                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
448                 
449                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
450                 
451                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
452                 
453                 
454                 //find max is score
455                 float biggest = 0.0;
456                 for (int i = 0; i < info.size(); i++) {
457                         if (info[i].score > biggest)  {
458                                 biggest = info[i].score;
459                         }
460                 }
461                 
462                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
463                 
464                 int scaler2 = 500 / biggest;
465                 
466                 
467                 outsvg << "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
468                 //160,200 180,230 200,210 234,220\"/> "; 
469                 for (int i = 0; i < info.size(); i++) {
470                         if(info[i].score < 0) { info[i].score = 0; }
471                         outsvg << ((i*5) + 75) << "," << (600 - (info[i].score * scaler2)) << " ";
472                 }
473                 
474                 outsvg << "\"/> ";
475                 outsvg << "</g>\n</svg>\n";
476                 
477                 outsvg.close();
478
479         }
480         catch(exception& e) {
481                 m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
482                 exit(1);
483         }
484 }
485 #endif
486 //***************************************************************************************************************/
487
488