]> git.donarmstrong.com Git - mothur.git/blob - chimeracheckrdp.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / chimeracheckrdp.cpp
1 /*
2  *  chimeracheckrdp.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/8/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeracheckrdp.h"
11                 
12 //***************************************************************************************************************
13 ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, int inc, int k, string o) : Chimera() { 
14         try {
15                 fastafile = filename; 
16                 templateFileName = temp;  
17                 name = n;
18                 svg = s;
19                 increment = inc;
20                 kmerSize = k;
21                 outputDir = o; 
22                 
23                 templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0, rand());
24                 m->mothurOutEndLine();
25                 
26                 kmer = new Kmer(kmerSize);
27                 
28                 if (name != "") { 
29                         readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
30                 }
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "ChimeraCheckRDP", "ChimeraCheckRDP");
34                 exit(1);
35         }
36 }
37 //***************************************************************************************************************
38
39 ChimeraCheckRDP::~ChimeraCheckRDP() {
40         try {
41                 delete templateDB;
42                 delete kmer;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ChimeraCheckRDP", "~ChimeraCheckRDP");
46                 exit(1);
47         }
48 }       
49 //***************************************************************************************************************
50 Sequence ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
51         try {
52                 
53                 m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine();
54                 
55                 out << querySeq->getName() << endl;
56                 out << "IS scores: " << '\t';
57                         
58                 for (int k = 0; k < IS.size(); k++) {
59                         out << IS[k].score << '\t'; 
60                 }
61                 out << endl;
62                 
63                 if (svg) {
64                         if (name != "") { //if user has specific names
65                                 map<string, string>::iterator it = names.find(querySeq->getName());
66                                 
67                                 if (it != names.end()) { //user wants pic of this
68                                         makeSVGpic(IS);  //zeros out negative results
69                                 }
70                         }else{//output them all
71                                 makeSVGpic(IS);  //zeros out negative results
72                         }
73                 }
74                 
75                 return *querySeq;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "ChimeraCheckRDP", "print");
79                 exit(1);
80         }
81 }
82 #ifdef USE_MPI
83 //***************************************************************************************************************
84 Sequence ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) {
85         try {
86                 
87                 cout << "Processing: " << querySeq->getName() << endl; 
88                 
89                 string outString = "";
90                 
91                 outString += querySeq->getName() + "\nIS scores: \t";
92                         
93                 for (int k = 0; k < IS.size(); k++) {
94                         outString += toString(IS[k].score)  + "\t"; 
95                 }
96                 outString += "\n";
97                 
98                 MPI_Status status;
99                 int length = outString.length();
100                 char* buf = new char[length];
101                 memcpy(buf, outString.c_str(), length);
102                                 
103                 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
104                 delete buf;
105
106                 if (svg) {
107                         if (name != "") { //if user has specific names
108                                 map<string, string>::iterator it = names.find(querySeq->getName());
109                                 
110                                 if (it != names.end()) { //user wants pic of this
111                                         makeSVGpic(IS);  //zeros out negative results
112                                 }
113                         }else{//output them all
114                                 makeSVGpic(IS);  //zeros out negative results
115                         }
116                 }
117                 
118                 return *querySeq;
119         }
120         catch(exception& e) {
121                 m->errorOut(e, "ChimeraCheckRDP", "print");
122                 exit(1);
123         }
124 }
125 #endif
126 //***************************************************************************************************************
127 int ChimeraCheckRDP::getChimeras(Sequence* query) {
128         try {
129                 
130                 IS.clear();
131                                 
132                 querySeq = query;
133                         
134                 closest = templateDB->findClosestSequence(query);  
135         
136                 IS = findIS(); 
137                                         
138                 //determine chimera report cutoff - window score above 95%
139                 //getCutoff();  - not very acurate predictor
140                 
141                 return 0;
142         }
143         catch(exception& e) {
144                 m->errorOut(e, "ChimeraCheckRDP", "getChimeras");
145                 exit(1);
146         }
147 }
148 //***************************************************************************************************************
149 vector<sim> ChimeraCheckRDP::findIS() {
150         try {
151                 
152                 
153                 vector< map<int, int> > queryKmerInfo;  //vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
154                                                                                                 //example:  seqKmerInfo[50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
155                                                                                                 //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the 
156                                                                                                 //kmers 2 seqs had in common.  There may be a better way to do this thats why I am leaving so many comments...
157                 vector< map<int, int> > subjectKmerInfo;
158                 
159                 vector<sim>  isValues;
160                 string queryName = querySeq->getName();
161                 string seq = querySeq->getUnaligned();
162                 
163                 queryKmerInfo = kmer->getKmerCounts(seq);
164                 subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
165                 
166                 //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
167                 int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
168
169                 //you don't want the starting point to be virtually at hte end so move it in 10%
170                 int start = seq.length() / 10;
171                         
172                 //for each window
173                 for (int f = start; f < (seq.length() - start); f+=increment) {
174                 
175                         if (m->control_pressed) { return isValues; }
176                         
177                         if ((f - kmerSize) < 0)  { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); }
178                         
179                         sim temp;
180                         
181                         string fragLeft = seq.substr(0, f);  //left side of breakpoint
182                         string fragRight = seq.substr(f);  //right side of breakpoint
183                         
184                         //make a sequence of the left side and right side
185                         Sequence* left = new Sequence(queryName, fragLeft);
186                         Sequence* right = new Sequence(queryName, fragRight);
187                         
188                         //find seqs closest to each fragment
189                         Sequence closestLeft = templateDB->findClosestSequence(left); 
190         
191                         Sequence closestRight = templateDB->findClosestSequence(right); 
192                 
193                         //get kmerinfo for the closest left
194                         vector< map<int, int> > closeLeftKmerInfo = kmer->getKmerCounts(closestLeft.getUnaligned());
195                         
196                         //get kmerinfo for the closest right
197                         vector< map<int, int> > closeRightKmerInfo = kmer->getKmerCounts(closestRight.getUnaligned());
198                         
199                         //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
200                         //iterate through left sides map to subtract the number of times you saw things before you got the the right side
201                         map<int, int> rightside = queryKmerInfo[queryKmerInfo.size()-1];
202                         for (map<int, int>::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) {
203                                 int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first];   //times that kmer was seen in total
204
205                                 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
206                                 int howmanyright = howManyTotal - itleft->second;
207                                 
208                                 //if any were seen just on the left erase
209                                 if (howmanyright == 0) {
210                                         rightside.erase(itleft->first);
211                                 }
212                         }
213                         
214                         map<int, int> closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1];
215                         for (map<int, int>::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) {
216                                 int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first];   //times that kmer was seen in total
217
218                                 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
219                                 int howmanyright = howManyTotal - itright->second;
220                                 
221                                 //if any were seen just on the left erase
222                                 if (howmanyright == 0) {
223                                         closerightside.erase(itright->first);
224                                 }
225                         }
226
227                         
228                         int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]);
229
230                         int nRight = calcKmers(closerightside, rightside);
231
232                         int is = nLeft + nRight - nTotal;
233
234                         //save IS, leftparent, rightparent, breakpoint
235                         temp.leftParent = closestLeft.getName();
236                         temp.rightParent = closestRight.getName();
237                         temp.score = is;
238                         temp.midpoint = f;
239                         
240                         isValues.push_back(temp);
241                         
242                         delete left;
243                         delete right;
244                 }
245                 
246                 return isValues;
247         
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "ChimeraCheckRDP", "findIS");
251                 exit(1);
252         }
253 }
254 //***************************************************************************************************************
255 void ChimeraCheckRDP::readName(string namefile) {
256         try{
257         
258                 string name;
259
260         #ifdef USE_MPI
261                 
262                 MPI_File inMPI;
263                 MPI_Offset size;
264                 MPI_Status status;
265
266                 //char* inFileName = new char[namefile.length()];
267                 //memcpy(inFileName, namefile.c_str(), namefile.length());
268                 
269                 char inFileName[1024];
270                 strcpy(inFileName, namefile.c_str());
271
272                 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  
273                 MPI_File_get_size(inMPI, &size);
274
275                 //delete inFileName;
276
277                 char* buffer = new char[size];
278                 MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
279
280                 string tempBuf = buffer;
281                 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
282                 istringstream iss (tempBuf,istringstream::in);
283                 delete buffer;
284                 
285                 while(!iss.eof()) {
286                         iss >> name; m->gobble(iss);
287                         names[name] = name;
288                 }
289         
290                 MPI_File_close(&inMPI);
291                 
292         #else   
293         
294                 ifstream in;
295                 m->openInputFile(namefile, in);
296                                 
297                 while (!in.eof()) {
298                         in >> name; m->gobble(in);
299                         names[name] = name;
300                 }
301                 in.close();
302         
303         #endif
304         
305         }
306         catch(exception& e) {
307                 m->errorOut(e, "ChimeraCheckRDP", "readName");
308                 exit(1);
309         }
310 }
311
312 //***************************************************************************************************************
313 //find the smaller map and iterate through it and count kmers in common
314 int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
315         try{
316                 
317                 int common = 0;
318                 
319                 map<int, int>::iterator smallone;
320                 map<int, int>::iterator largeone;
321
322                 if (query.size() < subject.size()) {
323                 
324                         for (smallone = query.begin(); smallone != query.end(); smallone++) {
325                                 largeone = subject.find(smallone->first);
326                                 
327                                 //if you found it they have that kmer in common
328                                 if (largeone != subject.end()) {        common++;       }
329                         }
330                         
331                 }else { 
332                  
333                         for (smallone = subject.begin(); smallone != subject.end(); smallone++) {
334                                 largeone = query.find(smallone->first);
335                                 
336                                 //if you found it they have that kmer in common
337                                 if (largeone != query.end()) {          common++;        }
338                         }
339                 }
340                 
341                 return common;
342                 
343         }
344         catch(exception& e) {
345                 m->errorOut(e, "ChimeraCheckRDP", "calcKmers");
346                 exit(1);
347         }
348 }
349 #ifdef USE_MPI
350 //***************************************************************************************************************
351 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
352         try{
353                 
354                 string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
355                 
356                 MPI_File outSVG;
357                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
358
359                 //char* FileName = new char[file.length()];
360                 //memcpy(FileName, file.c_str(), file.length());
361                 
362                 char FileName[1024];
363                 strcpy(FileName, file.c_str());
364
365                 MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG);  //comm, filename, mode, info, filepointer
366                 
367                 //delete FileName;
368
369                 int width = (info.size()*5) + 150;
370                 
371                 string outString = "";
372                 
373                 outString += "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
374                 outString += "<g>\n";
375                 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
376                 
377                 outString +=  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
378                 outString +=  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
379                 
380                 outString += "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
381                 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
382                 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
383                 
384                 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
385                 
386                 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
387                 
388                 
389                 //find max is score
390                 float biggest = 0.0;
391                 for (int i = 0; i < info.size(); i++) {
392                         if (info[i].score > biggest)  {
393                                 biggest = info[i].score;
394                         }
395                 }
396                 
397                 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
398                 
399                 int scaler2 = 500 / biggest;
400                 
401                 
402                 outString += "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
403                 //160,200 180,230 200,210 234,220\"/> "; 
404                 for (int i = 0; i < info.size(); i++) {
405                         if(info[i].score < 0) { info[i].score = 0; }
406                         outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " ";
407                 }
408                 
409                 outString += "\"/> ";
410                 outString += "</g>\n</svg>\n";
411                 
412                 MPI_Status status;
413                 int length = outString.length();
414                 char* buf2 = new char[length];
415                 memcpy(buf2, outString.c_str(), length);
416                                 
417                 MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status);
418                 delete buf2;
419                 
420                 MPI_File_close(&outSVG);
421
422         }
423         catch(exception& e) {
424                 m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
425                 exit(1);
426         }
427 }
428 #else
429 //***************************************************************************************************************
430 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
431         try{
432                 
433                 string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
434                 ofstream outsvg;
435                 m->openOutputFile(file, outsvg);
436                 
437                 int width = (info.size()*5) + 150;
438                 
439                 outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
440                 outsvg << "<g>\n";
441                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
442                 
443                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
444                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
445                 
446                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
447                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
448                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
449                 
450                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
451                 
452                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
453                 
454                 
455                 //find max is score
456                 float biggest = 0.0;
457                 for (int i = 0; i < info.size(); i++) {
458                         if (info[i].score > biggest)  {
459                                 biggest = info[i].score;
460                         }
461                 }
462                 
463                 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
464                 
465                 int scaler2 = 500 / biggest;
466                 
467                 
468                 outsvg << "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
469                 //160,200 180,230 200,210 234,220\"/> "; 
470                 for (int i = 0; i < info.size(); i++) {
471                         if(info[i].score < 0) { info[i].score = 0; }
472                         outsvg << ((i*5) + 75) << "," << (600 - (info[i].score * scaler2)) << " ";
473                 }
474                 
475                 outsvg << "\"/> ";
476                 outsvg << "</g>\n</svg>\n";
477                 
478                 outsvg.close();
479
480         }
481         catch(exception& e) {
482                 m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
483                 exit(1);
484         }
485 }
486 #endif
487 //***************************************************************************************************************/
488
489