#include "chimeracheckrdp.h"
//***************************************************************************************************************
-ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string o) { fastafile = filename; templateFile = temp; outputDir = o; }
+ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, int inc, int k, string o) : Chimera() {
+ try {
+ fastafile = filename;
+ templateFileName = temp;
+ name = n;
+ svg = s;
+ increment = inc;
+ kmerSize = k;
+ outputDir = o;
+
+ templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
+ m->mothurOutEndLine();
+
+ kmer = new Kmer(kmerSize);
+
+ if (name != "") {
+ readName(name); //fills name map with names of seqs the user wants to have .svg for.
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraCheckRDP", "ChimeraCheckRDP");
+ exit(1);
+ }
+}
//***************************************************************************************************************
ChimeraCheckRDP::~ChimeraCheckRDP() {
try {
- for (int i = 0; i < querySeqs.size(); i++) { delete querySeqs[i]; }
delete templateDB;
delete kmer;
}
catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "~AlignSim");
+ m->errorOut(e, "ChimeraCheckRDP", "~ChimeraCheckRDP");
exit(1);
}
}
//***************************************************************************************************************
-void ChimeraCheckRDP::print(ostream& out) {
+int ChimeraCheckRDP::print(ostream& out, ostream& outAcc) {
try {
- mothurOutEndLine();
-
- //vector<bool> isChimeric; isChimeric.resize(querySeqs.size(), false);
+ m->mothurOut("Processing: " + querySeq->getName()); m->mothurOutEndLine();
- for (int i = 0; i < querySeqs.size(); i++) {
+ out << querySeq->getName() << endl;
+ out << "IS scores: " << '\t';
- out << querySeqs[i]->getName() << endl;
- out << "IS scores: " << '\t';
-
- //int lastChimericWindowFound = 0;
-
- for (int k = 0; k < IS[i].size(); k++) {
- out << IS[i][k].score << '\t';
- //if (IS[i][k].score > chimeraCutoff) { isChimeric[i] = true; lastChimericWindowFound = k; }
- }
- out << endl;
- //if (isChimeric[i]) {
- //mothurOut(querySeqs[i]->getName() + "\tIS: " + toString(IS[i][lastChimericWindowFound].score) + "\tbreakpoint: " + toString(IS[i][lastChimericWindowFound].midpoint) + "\tleft parent: " + IS[i][lastChimericWindowFound].leftParent + "\tright parent: " + IS[i][lastChimericWindowFound].rightParent); mothurOutEndLine();
- //out << endl << "chimera: YES" << endl;
- //}else{
- //out << endl << "chimera: NO" << endl;
- //}
+ for (int k = 0; k < IS.size(); k++) {
+ out << IS[k].score << '\t';
+ }
+ out << endl;
+
+ if (svg) {
+ if (name != "") { //if user has specific names
+ map<string, string>::iterator it = names.find(querySeq->getName());
- if (svg) {
-
- if (name != "") { //if user has specific names
- map<string, string>::iterator it = names.find(querySeqs[i]->getName());
-
- if (it != names.end()) { //user wants pic of this
- makeSVGpic(IS[i], i); //zeros out negative results
- }
- }else{//output them all
- makeSVGpic(IS[i], i); //zeros out negative results
- }
+ if (it != names.end()) { //user wants pic of this
+ makeSVGpic(IS); //zeros out negative results
}
+ }else{//output them all
+ makeSVGpic(IS); //zeros out negative results
+ }
}
- mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine();
+ return 0;
}
catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "print");
+ m->errorOut(e, "ChimeraCheckRDP", "print");
exit(1);
}
}
-
+#ifdef USE_MPI
//***************************************************************************************************************
-int ChimeraCheckRDP::getChimeras() {
+int ChimeraCheckRDP::print(MPI_File& out, MPI_File& outAcc) {
try {
- //read in query sequences and subject sequences
- mothurOutEndLine();
- mothurOut("Reading query sequences... "); cout.flush();
- querySeqs = readSeqs(fastafile);
- mothurOut("Done.");
- //templateSeqs = readSeqs(templateFile);
- templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
- mothurOutEndLine();
-
- int numSeqs = querySeqs.size();
-
- IS.resize(numSeqs);
- closest.resize(numSeqs);
-
- //break up file if needed
- int linesPerProcess = numSeqs / processors ;
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- //find breakup of sequences for all times we will Parallelize
- if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
- else {
- //fill line pairs
- for (int i = 0; i < (processors-1); i++) {
- lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
- }
- //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
- int i = processors - 1;
- lines.push_back(new linePair((i*linesPerProcess), numSeqs));
- }
-
- #else
- lines.push_back(new linePair(0, numSeqs));
- #endif
+ cout << "Processing: " << querySeq->getName() << endl;
- kmer = new Kmer(kmerSize);
+ string outString = "";
- if (name != "") {
- readName(name); //fills name map with names of seqs the user wants to have .svg for.
+ outString += querySeq->getName() + "\nIS scores: \t";
+
+ for (int k = 0; k < IS.size(); k++) {
+ outString += toString(IS[k].score) + "\t";
}
+ outString += "\n";
- //find closest seq to each querySeq
- for (int i = 0; i < querySeqs.size(); i++) {
- closest[i] = templateDB->findClosestSequence(querySeqs[i]);
- }
+ MPI_Status status;
+ int length = outString.length();
+ char* buf = new char[length];
+ memcpy(buf, outString.c_str(), length);
+
+ MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
+ delete buf;
- //for each query find IS value
- if (processors == 1) {
- for (int i = 0; i < querySeqs.size(); i++) {
- IS[i] = findIS(i);
+ if (svg) {
+ if (name != "") { //if user has specific names
+ map<string, string>::iterator it = names.find(querySeq->getName());
+
+ if (it != names.end()) { //user wants pic of this
+ makeSVGpic(IS); //zeros out negative results
+ }
+ }else{//output them all
+ makeSVGpic(IS); //zeros out negative results
}
- }else { createProcessesIS(); }
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraCheckRDP", "print");
+ exit(1);
+ }
+}
+#endif
+//***************************************************************************************************************
+int ChimeraCheckRDP::getChimeras(Sequence* query) {
+ try {
+
+ IS.clear();
+
+ querySeq = query;
+
+ closest = templateDB->findClosestSequence(query);
+
+ IS = findIS();
+
//determine chimera report cutoff - window score above 95%
//getCutoff(); - not very acurate predictor
- //free memory
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
-
return 0;
}
catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "getChimeras");
+ m->errorOut(e, "ChimeraCheckRDP", "getChimeras");
exit(1);
}
}
//***************************************************************************************************************
-vector<sim> ChimeraCheckRDP::findIS(int query) {
+vector<sim> ChimeraCheckRDP::findIS() {
try {
vector< map<int, int> > subjectKmerInfo;
vector<sim> isValues;
- string queryName = querySeqs[query]->getName();
- string seq = querySeqs[query]->getUnaligned();
-
- mothurOut("Finding IS values for sequence " + toString(query+1)); mothurOutEndLine();
+ string queryName = querySeq->getName();
+ string seq = querySeq->getUnaligned();
queryKmerInfo = kmer->getKmerCounts(seq);
- subjectKmerInfo = kmer->getKmerCounts(closest[query].getUnaligned());
+ subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
//find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
int start = seq.length() / 10;
//for each window
- for (int m = start; m < (seq.length() - start); m+=increment) {
+ for (int f = start; f < (seq.length() - start); f+=increment) {
+
+ if (m->control_pressed) { return isValues; }
- if ((m - kmerSize) < 0) { mothurOut("Your sequence is too short for your kmerSize."); mothurOutEndLine(); exit(1); }
+ if ((f - kmerSize) < 0) { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); }
sim temp;
- string fragLeft = seq.substr(0, m); //left side of breakpoint
- string fragRight = seq.substr(m); //right side of breakpoint
+ string fragLeft = seq.substr(0, f); //left side of breakpoint
+ string fragRight = seq.substr(f); //right side of breakpoint
//make a sequence of the left side and right side
Sequence* left = new Sequence(queryName, fragLeft);
//right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
//iterate through left sides map to subtract the number of times you saw things before you got the the right side
map<int, int> rightside = queryKmerInfo[queryKmerInfo.size()-1];
- for (map<int, int>::iterator itleft = queryKmerInfo[m-kmerSize].begin(); itleft != queryKmerInfo[m-kmerSize].end(); itleft++) {
+ for (map<int, int>::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) {
int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first]; //times that kmer was seen in total
//itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
}
map<int, int> closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1];
- for (map<int, int>::iterator itright = closeRightKmerInfo[m-kmerSize].begin(); itright != closeRightKmerInfo[m-kmerSize].end(); itright++) {
+ for (map<int, int>::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) {
int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first]; //times that kmer was seen in total
//itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
}
- int nLeft = calcKmers(closeLeftKmerInfo[m-kmerSize], queryKmerInfo[m-kmerSize]);
+ int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]);
int nRight = calcKmers(closerightside, rightside);
temp.leftParent = closestLeft.getName();
temp.rightParent = closestRight.getName();
temp.score = is;
- temp.midpoint = m;
+ temp.midpoint = f;
isValues.push_back(temp);
delete left;
delete right;
- }
+ }
return isValues;
}
catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "findIS");
+ m->errorOut(e, "ChimeraCheckRDP", "findIS");
exit(1);
}
}
//***************************************************************************************************************
void ChimeraCheckRDP::readName(string namefile) {
try{
- ifstream in;
- openInputFile(namefile, in);
+
string name;
+
+ #ifdef USE_MPI
+
+ MPI_File inMPI;
+ MPI_Offset size;
+ MPI_Status status;
+
+ //char* inFileName = new char[namefile.length()];
+ //memcpy(inFileName, namefile.c_str(), namefile.length());
+ char inFileName[1024];
+ strcpy(inFileName, namefile.c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
+ MPI_File_get_size(inMPI, &size);
+
+ //delete inFileName;
+
+ char* buffer = new char[size];
+ MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
+
+ string tempBuf = buffer;
+ if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); }
+ istringstream iss (tempBuf,istringstream::in);
+ delete buffer;
+
+ while(!iss.eof()) {
+ iss >> name; gobble(iss);
+ names[name] = name;
+ }
+
+ MPI_File_close(&inMPI);
+
+ #else
+
+ ifstream in;
+ openInputFile(namefile, in);
+
while (!in.eof()) {
-
- in >> name;
-
+ in >> name; gobble(in);
names[name] = name;
-
- gobble(in);
}
+ in.close();
+
+ #endif
}
catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "readName");
+ m->errorOut(e, "ChimeraCheckRDP", "readName");
exit(1);
}
}
}
catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "calcKmers");
+ m->errorOut(e, "ChimeraCheckRDP", "calcKmers");
exit(1);
}
}
-
+#ifdef USE_MPI
//***************************************************************************************************************
-void ChimeraCheckRDP::getCutoff() {
+void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
try{
- vector<float> temp;
+ string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
+
+ MPI_File outSVG;
+ int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
+
+ //char* FileName = new char[file.length()];
+ //memcpy(FileName, file.c_str(), file.length());
+
+ char FileName[1024];
+ strcpy(FileName, file.c_str());
+
+ MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG); //comm, filename, mode, info, filepointer
+
+ //delete FileName;
+
+ int width = (info.size()*5) + 150;
+
+ string outString = "";
+
+ outString += "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
+ outString += "<g>\n";
+ outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
+
+ outString += "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";
+ outString += "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
+
+ outString += "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n";
+ outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n";
+ outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n";
+
+ outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n";
- //store all is scores for all windows
- for (int i = 0; i < IS.size(); i++) {
- for (int j = 0; j < IS[i].size(); j++) {
- temp.push_back(IS[i][j].score);
+ outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n";
+
+
+ //find max is score
+ float biggest = 0.0;
+ for (int i = 0; i < info.size(); i++) {
+ if (info[i].score > biggest) {
+ biggest = info[i].score;
}
}
- //sort them
- sort(temp.begin(), temp.end());
+ outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n";
+
+ int scaler2 = 500 / biggest;
+
+
+ outString += "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\"";
+ //160,200 180,230 200,210 234,220\"/> ";
+ for (int i = 0; i < info.size(); i++) {
+ if(info[i].score < 0) { info[i].score = 0; }
+ outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " ";
+ }
+
+ outString += "\"/> ";
+ outString += "</g>\n</svg>\n";
+
+ MPI_Status status;
+ int length = outString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outString.c_str(), length);
+
+ MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status);
+ delete buf2;
- //get 95%
- chimeraCutoff = temp[int(temp.size() * 0.95)];
+ MPI_File_close(&outSVG);
}
catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "getCutoff");
+ m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
exit(1);
}
}
-
+#else
//***************************************************************************************************************
-void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
+void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
try{
- string file = outputDir + querySeqs[query]->getName() + ".chimeracheck.svg";
+ string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
ofstream outsvg;
openOutputFile(file, outsvg);
outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
outsvg << "<g>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeqs[query]->getName() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
outsvg << "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";
outsvg << "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
}
catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
+ m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic");
exit(1);
}
}
-//***************************************************************************************************************
-void ChimeraCheckRDP::createProcessesIS() {
- try {
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
-
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- IS[i] = findIS(i);
- }
-
- //write out data to file so parent can read it
- ofstream out;
- string s = toString(getpid()) + ".temp";
- openOutputFile(s, out);
-
- //output pairs
- for (int i = lines[process]->start; i < lines[process]->end; i++) {
- out << IS[i].size() << endl;
- for (int j = 0; j < IS[i].size(); j++) {
- out << IS[i][j].leftParent << '\t'<< IS[i][j].rightParent << '\t' << IS[i][j].midpoint << '\t' << IS[i][j].score << endl;
- }
- }
-
- out.close();
-
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
-
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
- //get data created by processes
- for (int i=0;i<processors;i++) {
- ifstream in;
- string s = toString(processIDS[i]) + ".temp";
- openInputFile(s, in);
-
- //get pairs
- for (int k = lines[i]->start; k < lines[i]->end; k++) {
-
- int size;
- in >> size; gobble(in);
-
- string left, right;
- int mid;
- float score;
-
- IS[k].clear();
-
- for (int j = 0; j < size; j++) {
- in >> left >> right >> mid >> score; gobble(in);
-
- sim temp;
- temp.leftParent = left;
- temp.rightParent = right;
- temp.midpoint = mid;
- temp.score = score;
-
- IS[k].push_back(temp);
- }
- }
-
- in.close();
- remove(s.c_str());
- }
-#else
- for (int i = 0; i < querySeqs.size(); i++) {
- IS[i] = findIS(i);
- }
-#endif
- }
- catch(exception& e) {
- errorOut(e, "ChimeraCheckRDP", "createProcessesIS");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
+#endif
+//***************************************************************************************************************/