X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeracheckrdp.cpp;fp=chimeracheckrdp.cpp;h=0000000000000000000000000000000000000000;hb=4a877efa127e56e81a21f53cfdbbfd3bfbe8c4ff;hp=42b531282c5f020664277a9e907699a3bf227658;hpb=a6cf29fa4dac0909c7582cb1094151d34093ee76;p=mothur.git diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp deleted file mode 100644 index 42b5312..0000000 --- a/chimeracheckrdp.cpp +++ /dev/null @@ -1,489 +0,0 @@ -/* - * chimeracheckrdp.cpp - * Mothur - * - * Created by westcott on 9/8/09. - * Copyright 2009 Schloss Lab. All rights reserved. - * - */ - -#include "chimeracheckrdp.h" - -//*************************************************************************************************************** -ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, int inc, int k, string o) : Chimera() { - try { - fastafile = filename; - templateFileName = temp; - name = n; - svg = s; - increment = inc; - kmerSize = k; - outputDir = o; - - templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0, rand()); - m->mothurOutEndLine(); - - kmer = new Kmer(kmerSize); - - if (name != "") { - readName(name); //fills name map with names of seqs the user wants to have .svg for. - } - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "ChimeraCheckRDP"); - exit(1); - } -} -//*************************************************************************************************************** - -ChimeraCheckRDP::~ChimeraCheckRDP() { - try { - delete templateDB; - delete kmer; - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "~ChimeraCheckRDP"); - exit(1); - } -} -//*************************************************************************************************************** -Sequence ChimeraCheckRDP::print(ostream& out, ostream& outAcc) { - try { - - m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine(); - - out << querySeq->getName() << endl; - out << "IS scores: " << '\t'; - - for (int k = 0; k < IS.size(); k++) { - out << IS[k].score << '\t'; - } - out << endl; - - if (svg) { - if (name != "") { //if user has specific names - map::iterator it = names.find(querySeq->getName()); - - if (it != names.end()) { //user wants pic of this - makeSVGpic(IS); //zeros out negative results - } - }else{//output them all - makeSVGpic(IS); //zeros out negative results - } - } - - return *querySeq; - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "print"); - exit(1); - } -} -#ifdef USE_MPI -//*************************************************************************************************************** -Sequence ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) { - try { - - cout << "Processing: " << querySeq->getName() << endl; - - string outString = ""; - - outString += querySeq->getName() + "\nIS scores: \t"; - - for (int k = 0; k < IS.size(); k++) { - outString += toString(IS[k].score) + "\t"; - } - outString += "\n"; - - MPI_Status status; - int length = outString.length(); - char* buf = new char[length]; - memcpy(buf, outString.c_str(), length); - - MPI_File_write_shared(out, buf, length, MPI_CHAR, &status); - delete buf; - - if (svg) { - if (name != "") { //if user has specific names - map::iterator it = names.find(querySeq->getName()); - - if (it != names.end()) { //user wants pic of this - makeSVGpic(IS); //zeros out negative results - } - }else{//output them all - makeSVGpic(IS); //zeros out negative results - } - } - - return *querySeq; - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "print"); - exit(1); - } -} -#endif -//*************************************************************************************************************** -int ChimeraCheckRDP::getChimeras(Sequence* query) { - try { - - IS.clear(); - - querySeq = query; - - closest = templateDB->findClosestSequence(query); - - IS = findIS(); - - //determine chimera report cutoff - window score above 95% - //getCutoff(); - not very acurate predictor - - return 0; - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "getChimeras"); - exit(1); - } -} -//*************************************************************************************************************** -vector ChimeraCheckRDP::findIS() { - try { - - - vector< map > queryKmerInfo; //vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq - //example: seqKmerInfo[50] = map containing the kmers found in the first 50 + kmersize characters of ecoli. - //i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the - //kmers 2 seqs had in common. There may be a better way to do this thats why I am leaving so many comments... - vector< map > subjectKmerInfo; - - vector isValues; - string queryName = querySeq->getName(); - string seq = querySeq->getUnaligned(); - - queryKmerInfo = kmer->getKmerCounts(seq); - subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned()); - - //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each - int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]); - - //you don't want the starting point to be virtually at hte end so move it in 10% - int start = seq.length() / 10; - - //for each window - for (int f = start; f < (seq.length() - start); f+=increment) { - - if (m->control_pressed) { return isValues; } - - if ((f - kmerSize) < 0) { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); } - - sim temp; - - string fragLeft = seq.substr(0, f); //left side of breakpoint - string fragRight = seq.substr(f); //right side of breakpoint - - //make a sequence of the left side and right side - Sequence* left = new Sequence(queryName, fragLeft); - Sequence* right = new Sequence(queryName, fragRight); - - //find seqs closest to each fragment - Sequence closestLeft = templateDB->findClosestSequence(left); - - Sequence closestRight = templateDB->findClosestSequence(right); - - //get kmerinfo for the closest left - vector< map > closeLeftKmerInfo = kmer->getKmerCounts(closestLeft.getUnaligned()); - - //get kmerinfo for the closest right - vector< map > closeRightKmerInfo = kmer->getKmerCounts(closestRight.getUnaligned()); - - //right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side - //iterate through left sides map to subtract the number of times you saw things before you got the the right side - map rightside = queryKmerInfo[queryKmerInfo.size()-1]; - for (map::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) { - int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first]; //times that kmer was seen in total - - //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side - int howmanyright = howManyTotal - itleft->second; - - //if any were seen just on the left erase - if (howmanyright == 0) { - rightside.erase(itleft->first); - } - } - - map closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1]; - for (map::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) { - int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first]; //times that kmer was seen in total - - //itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side - int howmanyright = howManyTotal - itright->second; - - //if any were seen just on the left erase - if (howmanyright == 0) { - closerightside.erase(itright->first); - } - } - - - int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]); - - int nRight = calcKmers(closerightside, rightside); - - int is = nLeft + nRight - nTotal; - - //save IS, leftparent, rightparent, breakpoint - temp.leftParent = closestLeft.getName(); - temp.rightParent = closestRight.getName(); - temp.score = is; - temp.midpoint = f; - - isValues.push_back(temp); - - delete left; - delete right; - } - - return isValues; - - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "findIS"); - exit(1); - } -} -//*************************************************************************************************************** -void ChimeraCheckRDP::readName(string namefile) { - try{ - - string name; - - #ifdef USE_MPI - - MPI_File inMPI; - MPI_Offset size; - MPI_Status status; - - //char* inFileName = new char[namefile.length()]; - //memcpy(inFileName, namefile.c_str(), namefile.length()); - - char inFileName[1024]; - strcpy(inFileName, namefile.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); - MPI_File_get_size(inMPI, &size); - - //delete inFileName; - - char* buffer = new char[size]; - MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status); - - string tempBuf = buffer; - if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } - istringstream iss (tempBuf,istringstream::in); - delete buffer; - - while(!iss.eof()) { - iss >> name; m->gobble(iss); - names[name] = name; - } - - MPI_File_close(&inMPI); - - #else - - ifstream in; - m->openInputFile(namefile, in); - - while (!in.eof()) { - in >> name; m->gobble(in); - names[name] = name; - } - in.close(); - - #endif - - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "readName"); - exit(1); - } -} - -//*************************************************************************************************************** -//find the smaller map and iterate through it and count kmers in common -int ChimeraCheckRDP::calcKmers(map query, map subject) { - try{ - - int common = 0; - - map::iterator smallone; - map::iterator largeone; - - if (query.size() < subject.size()) { - - for (smallone = query.begin(); smallone != query.end(); smallone++) { - largeone = subject.find(smallone->first); - - //if you found it they have that kmer in common - if (largeone != subject.end()) { common++; } - } - - }else { - - for (smallone = subject.begin(); smallone != subject.end(); smallone++) { - largeone = query.find(smallone->first); - - //if you found it they have that kmer in common - if (largeone != query.end()) { common++; } - } - } - - return common; - - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "calcKmers"); - exit(1); - } -} -#ifdef USE_MPI -//*************************************************************************************************************** -void ChimeraCheckRDP::makeSVGpic(vector info) { - try{ - - string file = outputDir + querySeq->getName() + ".chimeracheck.svg"; - - MPI_File outSVG; - int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; - - //char* FileName = new char[file.length()]; - //memcpy(FileName, file.c_str(), file.length()); - - char FileName[1024]; - strcpy(FileName, file.c_str()); - - MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG); //comm, filename, mode, info, filepointer - - //delete FileName; - - int width = (info.size()*5) + 150; - - string outString = ""; - - outString += "\n"; - outString += "\n"; - outString += "Plotted IS values for " + querySeq->getName() + "\n"; - - outString += "\n"; - outString += "\n"; - - outString += "" + toString(info[0].midpoint) + "\n"; - outString += "" + toString(info[info.size()-1].midpoint) + "\n"; - outString += "Base Positions\n"; - - outString += "0\n"; - - outString += "IS\n"; - - - //find max is score - float biggest = 0.0; - for (int i = 0; i < info.size(); i++) { - if (info[i].score > biggest) { - biggest = info[i].score; - } - } - - outString += "" + toString(biggest) + "\n"; - - int scaler2 = 500 / biggest; - - - outString += " "; - for (int i = 0; i < info.size(); i++) { - if(info[i].score < 0) { info[i].score = 0; } - outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " "; - } - - outString += "\"/> "; - outString += "\n\n"; - - MPI_Status status; - int length = outString.length(); - char* buf2 = new char[length]; - memcpy(buf2, outString.c_str(), length); - - MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status); - delete buf2; - - MPI_File_close(&outSVG); - - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); - exit(1); - } -} -#else -//*************************************************************************************************************** -void ChimeraCheckRDP::makeSVGpic(vector info) { - try{ - - string file = outputDir + querySeq->getName() + ".chimeracheck.svg"; - ofstream outsvg; - m->openOutputFile(file, outsvg); - - int width = (info.size()*5) + 150; - - outsvg << "\n"; - outsvg << "\n"; - outsvg << "Plotted IS values for " + querySeq->getName() + "\n"; - - outsvg << "\n"; - outsvg << "\n"; - - outsvg << "" + toString(info[0].midpoint) + "\n"; - outsvg << "" + toString(info[info.size()-1].midpoint) + "\n"; - outsvg << "Base Positions\n"; - - outsvg << "0\n"; - - outsvg << "IS\n"; - - - //find max is score - float biggest = 0.0; - for (int i = 0; i < info.size(); i++) { - if (info[i].score > biggest) { - biggest = info[i].score; - } - } - - outsvg << "" + toString(biggest) + "\n"; - - int scaler2 = 500 / biggest; - - - outsvg << " "; - for (int i = 0; i < info.size(); i++) { - if(info[i].score < 0) { info[i].score = 0; } - outsvg << ((i*5) + 75) << "," << (600 - (info[i].score * scaler2)) << " "; - } - - outsvg << "\"/> "; - outsvg << "\n\n"; - - outsvg.close(); - - } - catch(exception& e) { - m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); - exit(1); - } -} -#endif -//***************************************************************************************************************/ - -