5 * Created by westcott on 9/8/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "chimeracheckrdp.h"
12 //***************************************************************************************************************
13 ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, int inc, int k, string o) : Chimera() {
16 templateFileName = temp;
23 templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
24 m->mothurOutEndLine();
26 kmer = new Kmer(kmerSize);
29 readName(name); //fills name map with names of seqs the user wants to have .svg for.
33 m->errorOut(e, "ChimeraCheckRDP", "ChimeraCheckRDP");
37 //***************************************************************************************************************
39 ChimeraCheckRDP::~ChimeraCheckRDP() {
45 m->errorOut(e, "ChimeraCheckRDP", "~ChimeraCheckRDP");
49 //***************************************************************************************************************
50 int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
53 m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine();
55 out << querySeq->getName() << endl;
56 out << "IS scores: " << '\t';
58 for (int k = 0; k < IS.size(); k++) {
59 out << IS[k].score << '\t';
64 if (name != "") { //if user has specific names
65 map<string, string>::iterator it = names.find(querySeq->getName());
67 if (it != names.end()) { //user wants pic of this
68 makeSVGpic(IS); //zeros out negative results
70 }else{//output them all
71 makeSVGpic(IS); //zeros out negative results
78 m->errorOut(e, "ChimeraCheckRDP", "print");
83 //***************************************************************************************************************
84 int ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) {
87 cout << "Processing: " << querySeq->getName() << endl;
89 string outString = "";
91 outString += querySeq->getName() + "\nIS scores: \t";
93 for (int k = 0; k < IS.size(); k++) {
94 outString += toString(IS[k].score) + "\t";
99 int length = outString.length();
101 strcpy(buf, outString.c_str());
103 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
106 if (name != "") { //if user has specific names
107 map<string, string>::iterator it = names.find(querySeq->getName());
109 if (it != names.end()) { //user wants pic of this
110 makeSVGpic(IS); //zeros out negative results
112 }else{//output them all
113 makeSVGpic(IS); //zeros out negative results
119 catch(exception& e) {
120 m->errorOut(e, "ChimeraCheckRDP", "print");
125 //***************************************************************************************************************
126 int ChimeraCheckRDP::getChimeras(Sequence* query) {
133 closest = templateDB->findClosestSequence(query);
137 //determine chimera report cutoff - window score above 95%
138 //getCutoff(); - not very acurate predictor
142 catch(exception& e) {
143 m->errorOut(e, "ChimeraCheckRDP", "getChimeras");
147 //***************************************************************************************************************
148 vector<sim> ChimeraCheckRDP::findIS() {
152 vector< map<int, int> > queryKmerInfo; //vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
153 //example: seqKmerInfo[50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
154 //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the
155 //kmers 2 seqs had in common. There may be a better way to do this thats why I am leaving so many comments...
156 vector< map<int, int> > subjectKmerInfo;
158 vector<sim> isValues;
159 string queryName = querySeq->getName();
160 string seq = querySeq->getUnaligned();
162 queryKmerInfo = kmer->getKmerCounts(seq);
163 subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
165 //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
166 int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
168 //you don't want the starting point to be virtually at hte end so move it in 10%
169 int start = seq.length() / 10;
172 for (int f = start; f < (seq.length() - start); f+=increment) {
174 if (m->control_pressed) { return isValues; }
176 if ((f - kmerSize) < 0) { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); }
180 string fragLeft = seq.substr(0, f); //left side of breakpoint
181 string fragRight = seq.substr(f); //right side of breakpoint
183 //make a sequence of the left side and right side
184 Sequence* left = new Sequence(queryName, fragLeft);
185 Sequence* right = new Sequence(queryName, fragRight);
187 //find seqs closest to each fragment
188 Sequence closestLeft = templateDB->findClosestSequence(left);
190 Sequence closestRight = templateDB->findClosestSequence(right);
192 //get kmerinfo for the closest left
193 vector< map<int, int> > closeLeftKmerInfo = kmer->getKmerCounts(closestLeft.getUnaligned());
195 //get kmerinfo for the closest right
196 vector< map<int, int> > closeRightKmerInfo = kmer->getKmerCounts(closestRight.getUnaligned());
198 //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
199 //iterate through left sides map to subtract the number of times you saw things before you got the the right side
200 map<int, int> rightside = queryKmerInfo[queryKmerInfo.size()-1];
201 for (map<int, int>::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) {
202 int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first]; //times that kmer was seen in total
204 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
205 int howmanyright = howManyTotal - itleft->second;
207 //if any were seen just on the left erase
208 if (howmanyright == 0) {
209 rightside.erase(itleft->first);
213 map<int, int> closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1];
214 for (map<int, int>::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) {
215 int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first]; //times that kmer was seen in total
217 //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
218 int howmanyright = howManyTotal - itright->second;
220 //if any were seen just on the left erase
221 if (howmanyright == 0) {
222 closerightside.erase(itright->first);
227 int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]);
229 int nRight = calcKmers(closerightside, rightside);
231 int is = nLeft + nRight - nTotal;
233 //save IS, leftparent, rightparent, breakpoint
234 temp.leftParent = closestLeft.getName();
235 temp.rightParent = closestRight.getName();
239 isValues.push_back(temp);
248 catch(exception& e) {
249 m->errorOut(e, "ChimeraCheckRDP", "findIS");
253 //***************************************************************************************************************
254 void ChimeraCheckRDP::readName(string namefile) {
265 char inFileName[namefile.length()];
266 strcpy(inFileName, namefile.c_str());
268 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
269 MPI_File_get_size(inMPI, &size);
272 MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
274 string tempBuf = buffer;
275 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); }
276 istringstream iss (tempBuf,istringstream::in);
279 iss >> name; gobble(iss);
283 MPI_File_close(&inMPI);
288 openInputFile(namefile, in);
291 in >> name; gobble(in);
299 catch(exception& e) {
300 m->errorOut(e, "ChimeraCheckRDP", "readName");
305 //***************************************************************************************************************
306 //find the smaller map and iterate through it and count kmers in common
307 int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
311 map<int, int>::iterator small;
312 map<int, int>::iterator large;
314 if (query.size() < subject.size()) {
316 for (small = query.begin(); small != query.end(); small++) {
317 large = subject.find(small->first);
319 //if you found it they have that kmer in common
320 if (large != subject.end()) { common++; }
325 for (small = subject.begin(); small != subject.end(); small++) {
326 large = query.find(small->first);
328 //if you found it they have that kmer in common
329 if (large != query.end()) { common++; }
336 catch(exception& e) {
337 m->errorOut(e, "ChimeraCheckRDP", "calcKmers");
342 //***************************************************************************************************************
343 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
346 string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
349 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
351 char FileName[file.length()];
352 strcpy(FileName, file.c_str());
354 MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG); //comm, filename, mode, info, filepointer
356 int width = (info.size()*5) + 150;
358 string outString = "";
360 outString += "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
361 outString += "<g>\n";
362 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
364 outString += "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";
365 outString += "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
367 outString += "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
368 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
369 outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
371 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
373 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
378 for (int i = 0; i < info.size(); i++) {
379 if (info[i].score > biggest) {
380 biggest = info[i].score;
384 outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
386 int scaler2 = 500 / biggest;
389 outString += "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
390 //160,200 180,230 200,210 234,220\"/> ";
391 for (int i = 0; i < info.size(); i++) {
392 if(info[i].score < 0) { info[i].score = 0; }
393 outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " ";
396 outString += "\"/> ";
397 outString += "</g>\n</svg>\n";
400 int length = outString.length();
402 strcpy(buf2, outString.c_str());
404 MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status);
406 MPI_File_close(&outSVG);
409 catch(exception& e) {
410 m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
415 //***************************************************************************************************************
416 void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
419 string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
421 openOutputFile(file, outsvg);
423 int width = (info.size()*5) + 150;
425 outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
427 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
429 outsvg << "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";
430 outsvg << "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
432 outsvg << "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
433 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
434 outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
436 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
438 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
443 for (int i = 0; i < info.size(); i++) {
444 if (info[i].score > biggest) {
445 biggest = info[i].score;
449 outsvg << "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
451 int scaler2 = 500 / biggest;
454 outsvg << "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
455 //160,200 180,230 200,210 234,220\"/> ";
456 for (int i = 0; i < info.size(); i++) {
457 if(info[i].score < 0) { info[i].score = 0; }
458 outsvg << ((i*5) + 75) << "," << (600 - (info[i].score * scaler2)) << " ";
462 outsvg << "</g>\n</svg>\n";
467 catch(exception& e) {
468 m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
473 //***************************************************************************************************************/