//***************************************************************************************************************
ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
-int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
+int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
try {
fastafile = file;
templateFileName = temp; templateSeqs = readSeqs(temp);
numWanted = numw;
realign = r;
trimChimera = trim;
+ numNoParents = 0;
+ blastlocation = blas;
+ threadID = tid;
- decalc = new DeCalculator();
-
doPrep();
}
catch(exception& e) {
}
}
//***************************************************************************************************************
+//template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD
ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
- int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
+ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera() {
try {
+ byGroup = bg;
fastafile = file; templateSeqs = readSeqs(fastafile);
templateFileName = temp;
searchMethod = mode;
realign = r;
trimChimera = trim;
priority = prior;
+ numNoParents = 0;
+ blastlocation = blas;
+ threadID = tid;
- decalc = new DeCalculator();
createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
- //run filter on template
- for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { break; } runFilter(templateSeqs[i]); }
-
+ if (searchMethod == "distance") {
+ //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ //run filter on template copying templateSeqs into filteredTemplateSeqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ runFilter(newSeq);
+ filteredTemplateSeqs.push_back(newSeq);
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+//template=self
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
+ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
+ try {
+ fastafile = file; templateSeqs = readSeqs(fastafile);
+ templateFileName = temp;
+ searchMethod = mode;
+ kmerSize = k;
+ match = ms;
+ misMatch = mms;
+ window = win;
+ divR = div;
+ minSim = minsim;
+ minCov = mincov;
+ minBS = minbs;
+ minSNP = minsnp;
+ parents = par;
+ iters = it;
+ increment = inc;
+ numWanted = numw;
+ realign = r;
+ trimChimera = trim;
+ priority = prior;
+ numNoParents = 0;
+ blastlocation = blas;
+ threadID = tid;
+
+
+ createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+ if (searchMethod == "distance") {
+ //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ //run filter on template copying templateSeqs into filteredTemplateSeqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ runFilter(newSeq);
+ filteredTemplateSeqs.push_back(newSeq);
+ }
+ }
}
catch(exception& e) {
m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
}else if (searchMethod == "blast") {
//generate blastdb
- databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+ databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
+
+ if (m->control_pressed) { return 0; }
for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
databaseLeft->generateDB();
}
}
//***************************************************************************************************************
-vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
+vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
try {
//when template=self, the query file is sorted from most abundance to least abundant
//userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
vector<Sequence*> userTemplate;
- int myAbund = priority[q->getName()];
+ int myAbund = priority[q.getName()];
for (int i = 0; i < templateSeqs.size(); i++) {
}
}
+ //avoids nuisance error from formatdb for making blank blast database
+ if (userTemplate.size() == 0) {
+ return userTemplate;
+ }
+
string kmerDBNameLeft;
string kmerDBNameRight;
}else if (searchMethod == "blast") {
//generate blastdb
- databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+ databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
+
+ if (m->control_pressed) { return userTemplate; }
for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
databaseLeft->generateDB();
//***************************************************************************************************************
ChimeraSlayer::~ChimeraSlayer() {
- delete decalc;
if (templateFileName != "self") {
if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
else if (searchMethod == "blast") { delete databaseLeft; }
out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
}
//***************************************************************************************************************
-Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
+Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
try {
- Sequence* trim = NULL;
- if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
+ Sequence trim;
+ if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
if (chimeraFlags == "yes") {
string chimeraFlag = "no";
if (chimeraFlag == "yes") {
if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
- m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
- outAcc << querySeq->getName() << endl;
+ m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
+ outAcc << querySeq.getName() << endl;
- if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
if (trimChimera) {
int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
- string newAligned = trim->getAligned();
+ string newAligned = trim.getAligned();
if (lengthLeft > lengthRight) { //trim right
for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //trim left
for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
}
- trim->setAligned(newAligned);
+ trim.setAligned(newAligned);
}
}
}
printBlock(chimeraResults[0], chimeraFlag, out);
out << endl;
}else {
- out << querySeq->getName() << "\tno" << endl;
+ out << querySeq.getName() << "\tno" << endl;
}
return trim;
}
}
//***************************************************************************************************************
-Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
+Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
try {
- Sequence* trim = NULL;
+ Sequence trim;
if (trimChimera) {
string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
- trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
+ trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
}
if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
if (rightChimeric || leftChimeric) {
- m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
- outAcc << querySeq->getName() << endl;
+ m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
+ outAcc << querySeq.getName() << endl;
- if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
if (trimChimera) {
- string newAligned = trim->getAligned();
+ string newAligned = trim.getAligned();
//right side is fine so keep that
if ((leftChimeric) && (!rightChimeric)) {
}
}
- trim->setAligned(newAligned);
+ trim.setAligned(newAligned);
}
}
printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
out << endl;
}else {
- out << querySeq->getName() << "\tno" << endl;
+ out << querySeq.getName() << "\tno" << endl;
}
return trim;
#ifdef USE_MPI
//***************************************************************************************************************
-Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
+Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
try {
MPI_Status status;
bool results = false;
string outAccString = "";
string outputString = "";
- Sequence* trim = NULL;
+ Sequence trim;
if (trimChimera) {
string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
- trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
+ trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
}
bool rightChimeric = false;
bool leftChimeric = false;
+
+ cout << endl;
if (chimeraFlag == "yes") {
//which peice is chimeric or are both
if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
if (rightChimeric || leftChimeric) {
- cout << querySeq->getName() << "\tyes" << endl;
- outAccString += querySeq->getName() + "\n";
+ cout << querySeq.getName() << "\tyes" << endl;
+ outAccString += querySeq.getName() + "\n";
results = true;
- if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
//write to accnos file
int length = outAccString.length();
delete buf2;
if (trimChimera) {
- string newAligned = trim->getAligned();
+ string newAligned = trim.getAligned();
//right side is fine so keep that
if ((leftChimeric) && (!rightChimeric)) {
}
}
- trim->setAligned(newAligned);
+ trim.setAligned(newAligned);
}
}
delete buf;
}else {
- outputString += querySeq->getName() + "\tno\n";
+ outputString += querySeq.getName() + "\tno\n";
//write to output file
int length = outputString.length();
}
}
//***************************************************************************************************************
-Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
+Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
try {
MPI_Status status;
bool results = false;
string outAccString = "";
string outputString = "";
- Sequence* trim = NULL;
- if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
+ Sequence trim;
+ if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
if (chimeraFlags == "yes") {
string chimeraFlag = "no";
if (chimeraFlag == "yes") {
if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
- cout << querySeq->getName() << "\tyes" << endl;
- outAccString += querySeq->getName() + "\n";
+ cout << querySeq.getName() << "\tyes" << endl;
+ outAccString += querySeq.getName() + "\n";
results = true;
- if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
//write to accnos file
int length = outAccString.length();
int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
- string newAligned = trim->getAligned();
+ string newAligned = trim.getAligned();
if (lengthLeft > lengthRight) { //trim right
for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //trim left
for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
}
- trim->setAligned(newAligned);
+ trim.setAligned(newAligned);
}
}
}
delete buf;
}else {
- outputString += querySeq->getName() + "\tno\n";
+ outputString += querySeq.getName() + "\tno\n";
//write to output file
int length = outputString.length();
chimeraFlags = "no";
printResults.flag = "no";
- querySeq = query;
+ querySeq = *query;
//you must create a template
vector<Sequence*> thisTemplate;
vector<Sequence*> thisFilteredTemplate;
if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
- else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases
+ else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
if (m->control_pressed) { return 0; }
-
if (thisTemplate.size() == 0) { return 0; } //not chimeric
//moved this out of maligner - 4/29/11
- vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
+ vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
if (m->control_pressed) { return 0; }
- string chimeraFlag = maligner.getResults(query, decalc);
+ string chimeraFlag = maligner.getResults(*query, decalc);
if (m->control_pressed) { return 0; }
vector<results> Results = maligner.getOutput();
- for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
+ //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
if (chimeraFlag == "yes") {
-
+
if (realign) {
vector<string> parents;
for (int i = 0; i < Results.size(); i++) {
realigner.reAlign(query, parents);
}
-
+
+// cout << query->getAligned() << endl;
//get sequence that were given from maligner results
- vector<SeqDist> seqs;
+ vector<SeqCompare> seqs;
map<string, float> removeDups;
map<string, float>::iterator itDup;
map<string, string> parentNameSeq;
map<string, string>::iterator itSeq;
for (int j = 0; j < Results.size(); j++) {
+
float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
//only add if you are not a duplicate
- itDup = removeDups.find(Results[j].parent);
- if (itDup == removeDups.end()) { //this is not duplicate
- removeDups[Results[j].parent] = dist;
- parentNameSeq[Results[j].parent] = Results[j].parentAligned;
- }else if (dist > itDup->second) { //is this a stronger number for this parent
- removeDups[Results[j].parent] = dist;
- parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+// cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
+
+
+ if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
+
+ itDup = removeDups.find(Results[j].parent);
+ if (itDup == removeDups.end()) { //this is not duplicate
+ removeDups[Results[j].parent] = dist;
+ parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+ }else if (dist > itDup->second) { //is this a stronger number for this parent
+ removeDups[Results[j].parent] = dist;
+ parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+ }
+
}
+
}
for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
itSeq = parentNameSeq.find(itDup->first);
- Sequence* seq = new Sequence(itDup->first, itSeq->second);
+ Sequence seq(itDup->first, itSeq->second);
- SeqDist member;
+ SeqCompare member;
member.seq = seq;
member.dist = itDup->second;
-
seqs.push_back(member);
}
//limit number of parents to explore - default 3
if (Results.size() > parents) {
//sort by distance
- sort(seqs.begin(), seqs.end(), compareSeqDist);
+ sort(seqs.begin(), seqs.end(), compareSeqCompare);
//prioritize larger more similiar sequence fragments
reverse(seqs.begin(), seqs.end());
- for (int k = seqs.size()-1; k > (parents-1); k--) {
- delete seqs[k].seq;
- seqs.pop_back();
- }
+ //for (int k = seqs.size()-1; k > (parents-1); k--) {
+ // delete seqs[k].seq;
+ //seqs.pop_back();
+ //}
}
//put seqs into vector to send to slayer
- vector<Sequence*> seqsForSlayer;
- for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
- if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
+// cout << query->getAligned() << endl;
+ vector<Sequence> seqsForSlayer;
+ for (int k = 0; k < seqs.size(); k++) {
+// cout << seqs[k].seq->getAligned() << endl;
+ seqsForSlayer.push_back(seqs[k].seq);
+// cout << seqs[k].seq->getName() << endl;
+ }
+
+ if (m->control_pressed) { return 0; }
//send to slayer
- chimeraFlags = slayer.getResults(query, seqsForSlayer);
+ chimeraFlags = slayer.getResults(*query, seqsForSlayer);
if (m->control_pressed) { return 0; }
chimeraResults = slayer.getOutput();
printResults.results = chimeraResults;
//free memory
- for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
+ //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
}
-
+ //cout << endl << endl;
return 0;
}
catch(exception& e) {
//***************************************************************************************************************
void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
try {
- out << querySeq->getName() << '\t';
+ out << querySeq.getName() << '\t';
out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
try {
if ((leftChimeric) && (!rightChimeric)) { //print left
- out << querySeq->getName() << '\t';
+ out << querySeq.getName() << '\t';
out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
}else if ((!leftChimeric) && (rightChimeric)) { //print right
- out << querySeq->getName() << '\t';
+ out << querySeq.getName() << '\t';
out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
}else { //print both results
if (leftdata.flag == "yes") {
- out << querySeq->getName() + "_LEFT" << '\t';
+ out << querySeq.getName() + "_LEFT" << '\t';
out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
if (rightdata.flag == "yes") {
if (leftdata.flag == "yes") { out << endl; }
- out << querySeq->getName() + "_RIGHT"<< '\t';
+ out << querySeq.getName() + "_RIGHT"<< '\t';
out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
string out = "";
if ((leftChimeric) && (!rightChimeric)) { //get left
- out += querySeq->getName() + "\t";
+ out += querySeq.getName() + "\t";
out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
}else if ((!leftChimeric) && (rightChimeric)) { //print right
- out += querySeq->getName() + "\t";
+ out += querySeq.getName() + "\t";
out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
}else { //print both results
if (leftdata.flag == "yes") {
- out += querySeq->getName() + "_LEFT\t";
+ out += querySeq.getName() + "_LEFT\t";
out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
if (rightdata.flag == "yes") {
if (leftdata.flag == "yes") { out += "\n"; }
- out += querySeq->getName() + "_RIGHT\t";
+ out += querySeq.getName() + "_RIGHT\t";
out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
string outputString = "";
- outputString += querySeq->getName() + "\t";
+ outputString += querySeq.getName() + "\t";
outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
}
}
//***************************************************************************************************************
-vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
+vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
try {
- vector<Sequence*> refSeqs;
+ vector<Sequence> refSeqs;
if (searchMethod == "distance") {
//find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
- Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
+ Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
runFilter(newSeq);
- refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
+ refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
delete newSeq;
}else if (searchMethod == "blast") {
refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
}
}
//***************************************************************************************************************/
-vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
+vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
try {
- vector<Sequence*> refResults;
+ vector<Sequence> refResults;
//get parts of query
- string queryUnAligned = q->getUnaligned();
+ string queryUnAligned = q.getUnaligned();
string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
-
- Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
- Sequence* queryRight = new Sequence(q->getName(), rightQuery);
+//cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
+ Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
+ Sequence* queryRight = new Sequence(q.getName(), rightQuery);
vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
- cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
- vector<int> smaller;
- vector<int> larger;
+
- if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
- else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
+ //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
+// vector<int> smaller;
+// vector<int> larger;
+//
+// if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
+// else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
//merge results
map<int, int> seen;
map<int, int>::iterator it;
vector<int> mergedResults;
- for (int i = 0; i < smaller.size(); i++) {
+
+ int index = 0;
+// for (int i = 0; i < smaller.size(); i++) {
+ while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
+
if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
//add left if you havent already
- it = seen.find(smaller[i]);
+ it = seen.find(tempIndexesLeft[index]);
if (it == seen.end()) {
- mergedResults.push_back(smaller[i]);
- seen[smaller[i]] = smaller[i];
+ mergedResults.push_back(tempIndexesLeft[index]);
+ seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
}
//add right if you havent already
- it = seen.find(larger[i]);
+ it = seen.find(tempIndexesRight[index]);
if (it == seen.end()) {
- mergedResults.push_back(larger[i]);
- seen[larger[i]] = larger[i];
+ mergedResults.push_back(tempIndexesRight[index]);
+ seen[tempIndexesRight[index]] = tempIndexesRight[index];
}
+ index++;
}
+
- for (int i = smaller.size(); i < larger.size(); i++) {
+ for (int i = index; i < tempIndexesLeft.size(); i++) {
if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
//add right if you havent already
- it = seen.find(larger[i]);
+ it = seen.find(tempIndexesLeft[i]);
if (it == seen.end()) {
- mergedResults.push_back(larger[i]);
- seen[larger[i]] = larger[i];
+ mergedResults.push_back(tempIndexesLeft[i]);
+ seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
}
}
+ for (int i = index; i < tempIndexesRight.size(); i++) {
+ if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
+
+ //add right if you havent already
+ it = seen.find(tempIndexesRight[i]);
+ if (it == seen.end()) {
+ mergedResults.push_back(tempIndexesRight[i]);
+ seen[tempIndexesRight[i]] = tempIndexesRight[i];
+ }
+ }
+ //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
+ //cout << qname << endl;
+
+ if (mergedResults.size() == 0) { numNoParents++; }
+
for (int i = 0; i < mergedResults.size(); i++) {
- //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
- if (db[mergedResults[i]]->getName() != q->getName()) {
- Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
+ //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
+ if (db[mergedResults[i]]->getName() != q.getName()) {
+ Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
refResults.push_back(temp);
-
}
}
-
+ //cout << endl << endl;
+
delete queryRight;
delete queryLeft;
}
}
//***************************************************************************************************************
-vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
+vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
try {
- vector<Sequence*> refResults;
+ vector<Sequence> refResults;
//get parts of query
- string queryUnAligned = q->getUnaligned();
+ string queryUnAligned = q.getUnaligned();
string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
- Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
- Sequence* queryRight = new Sequence(q->getName(), rightQuery);
+ Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
+ Sequence* queryRight = new Sequence(q.getName(), rightQuery);
vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
//merge results
map<int, int> seen;
map<int, int>::iterator it;
- vector<int> mergedResults;
- for (int i = 0; i < tempIndexesLeft.size(); i++) {
+ vector<int> mergedResults;
+
+ int index = 0;
+ // for (int i = 0; i < smaller.size(); i++) {
+ while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
//add left if you havent already
+ it = seen.find(tempIndexesLeft[index]);
+ if (it == seen.end()) {
+ mergedResults.push_back(tempIndexesLeft[index]);
+ seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
+ }
+
+ //add right if you havent already
+ it = seen.find(tempIndexesRight[index]);
+ if (it == seen.end()) {
+ mergedResults.push_back(tempIndexesRight[index]);
+ seen[tempIndexesRight[index]] = tempIndexesRight[index];
+ }
+ index++;
+ }
+
+
+ for (int i = index; i < tempIndexesLeft.size(); i++) {
+ if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
+
+ //add right if you havent already
it = seen.find(tempIndexesLeft[i]);
if (it == seen.end()) {
mergedResults.push_back(tempIndexesLeft[i]);
seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
}
+ }
+
+ for (int i = index; i < tempIndexesRight.size(); i++) {
+ if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
//add right if you havent already
it = seen.find(tempIndexesRight[i]);
}
}
- //numWanted = mergedResults.size();
-
- //cout << q->getName() << endl;
-
for (int i = 0; i < mergedResults.size(); i++) {
- //cout << db[mergedResults[i]]->getName() << endl;
- if (db[mergedResults[i]]->getName() != q->getName()) {
- Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
+ //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
+ if (db[mergedResults[i]]->getName() != q.getName()) {
+ Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
refResults.push_back(temp);
+
}
}
+
//cout << endl;
delete queryRight;
delete queryLeft;