}
}
//***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, int k, int ms, int mms, int win, float div,
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
try {
fastafile = file; templateSeqs = readSeqs(fastafile);
numWanted = numw;
realign = r;
trimChimera = trim;
+ priority = prior;
decalc = new DeCalculator();
createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
- //run filter on template
- for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } templateSeqs.clear();
-
+ if (searchMethod == "distance") {
+ createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+
+ //run filter on template copying templateSeqs into filteredTemplateSeqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ runFilter(newSeq);
+ filteredTemplateSeqs.push_back(newSeq);
+ }
+ }
}
catch(exception& e) {
m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
//***************************************************************************************************************
int ChimeraSlayer::doPrep() {
try {
+ if (searchMethod == "distance") {
+ //read in all query seqs
+ vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
- //read in all query seqs
- vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
+ vector<Sequence*> temp = templateSeqs;
+ for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
- vector<Sequence*> temp = templateSeqs;
- for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
+ createFilter(temp, 0.0); //just removed columns where all seqs have a gap
- createFilter(temp, 0.0); //just removed columns where all seqs have a gap
+ for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
- for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
-
- if (m->control_pressed) { return 0; }
-
- //run filter on template
- for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
+ if (m->control_pressed) { return 0; }
+ //run filter on template copying templateSeqs into filteredTemplateSeqs
+ for (int i = 0; i < templateSeqs.size(); i++) {
+ if (m->control_pressed) { return 0; }
+
+ Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+ runFilter(newSeq);
+ filteredTemplateSeqs.push_back(newSeq);
+ }
+ }
string kmerDBNameLeft;
string kmerDBNameRight;
}else if (searchMethod == "blast") {
//generate blastdb
- databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+ databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
databaseLeft->generateDB();
}
}
//***************************************************************************************************************
-int ChimeraSlayer::getTemplate(Sequence* q) {
+vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
try {
+ //when template=self, the query file is sorted from most abundance to least abundant
+ //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
+ vector<Sequence*> userTemplate;
+
+ int myAbund = priority[q->getName()];
+
+ for (int i = 0; i < templateSeqs.size(); i++) {
+
+ if (m->control_pressed) { return userTemplate; }
+
+ //have I reached a sequence with the same abundance as myself?
+ if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
+
+ //if its am not chimeric add it
+ if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
+ userTemplate.push_back(templateSeqs[i]);
+ if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
+ }
+ }
+
string kmerDBNameLeft;
string kmerDBNameRight;
#ifdef USE_MPI
for (int i = 0; i < userTemplate.size(); i++) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { return userTemplate; }
string leftFrag = userTemplate[i]->getUnaligned();
leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
databaseLeft->setNumSeqs(userTemplate.size());
for (int i = 0; i < userTemplate.size(); i++) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { return userTemplate; }
string rightFrag = userTemplate[i]->getUnaligned();
rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
for (int i = 0; i < userTemplate.size(); i++) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { return userTemplate; }
string leftFrag = userTemplate[i]->getUnaligned();
leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
databaseLeft->setNumSeqs(userTemplate.size());
for (int i = 0; i < userTemplate.size(); i++) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { return userTemplate; }
string rightFrag = userTemplate[i]->getUnaligned();
rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
}else if (searchMethod == "blast") {
//generate blastdb
- databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+ databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
- for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return 0; } databaseLeft->addSequence(*userTemplate[i]); }
+ for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
databaseLeft->generateDB();
databaseLeft->setNumSeqs(userTemplate.size());
}
- return 0;
+ return userTemplate;
}
catch(exception& e) {
if (templateFileName != "self") {
if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
else if (searchMethod == "blast") { delete databaseLeft; }
- }else {
- //delete userTemplate
- for (int i = 0; i < userTemplate.size(); i++) {
- delete userTemplate[i];
- }
- userTemplate.clear();
}
}
//***************************************************************************************************************
m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
outAcc << querySeq->getName() << endl;
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+
if (trimChimera) {
- int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
- int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
+ int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
+ int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
string newAligned = trim->getAligned();
if (lengthLeft > lengthRight) { //trim right
- for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //trim left
- for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
+ for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
}
trim->setAligned(newAligned);
}
out << endl;
}else {
out << querySeq->getName() << "\tno" << endl;
- if (templateFileName == "self") {
- Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
- runFilter(temp);
- userTemplate.push_back(temp);
- }
}
return trim;
m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
outAcc << querySeq->getName() << endl;
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+
if (trimChimera) {
string newAligned = trim->getAligned();
//right side is fine so keep that
if ((leftChimeric) && (!rightChimeric)) {
- for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
+ for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
}else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
- for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //both sides are chimeric, keep longest piece
- int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
- int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
+ int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
+ int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
int length = lengthLeftLeft;
if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
- int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
- int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
+ int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
+ int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
if (lengthRightRight > length) { longest = 4; }
if (longest == 1) { //leftleft
- for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else if (longest == 2) { //leftright
//get rid of leftleft
- for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+ for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
//get rid of right
- for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else if (longest == 3) { //rightleft
//get rid of left
- for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
+ for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
//get rid of rightright
- for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //rightright
//get rid of left
- for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
+ for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
//get rid of rightleft
- for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+ for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
}
}
out << endl;
}else {
out << querySeq->getName() << "\tno" << endl;
- if (templateFileName == "self") {
- Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
- runFilter(temp);
- userTemplate.push_back(temp);
- }
}
return trim;
outAccString += querySeq->getName() + "\n";
results = true;
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+
//write to accnos file
int length = outAccString.length();
char* buf2 = new char[length];
//right side is fine so keep that
if ((leftChimeric) && (!rightChimeric)) {
- for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
+ for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
}else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
- for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //both sides are chimeric, keep longest piece
- int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
- int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
+ int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
+ int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
int length = lengthLeftLeft;
if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
- int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
- int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
+ int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
+ int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
if (lengthRightRight > length) { longest = 4; }
if (longest == 1) { //leftleft
- for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else if (longest == 2) { //leftright
//get rid of leftleft
- for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+ for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
//get rid of right
- for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else if (longest == 3) { //rightleft
//get rid of left
- for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
+ for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
//get rid of rightright
- for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //rightright
//get rid of left
- for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
+ for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
//get rid of rightleft
- for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+ for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
}
}
MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
delete buf;
-
- if (template == "self") {
- Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
- runFilter(temp);
- userTemplate.push_back(temp);
- }
}
outAccString += querySeq->getName() + "\n";
results = true;
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+
//write to accnos file
int length = outAccString.length();
char* buf2 = new char[length];
delete buf2;
if (trimChimera) {
- int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
- int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
+ int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
+ int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
string newAligned = trim->getAligned();
if (lengthLeft > lengthRight) { //trim right
- for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
+ for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //trim left
- for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
+ for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
}
trim->setAligned(newAligned);
}
MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
delete buf;
-
- if (template == "self") {
- Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
- runFilter(temp);
- userTemplate.push_back(temp);
- }
}
return trim;
chimeraFlags = "no";
printResults.flag = "no";
-
- //filter query
- spotMap = runFilter(query);
- printResults.spotMap = spotMap;
querySeq = query;
//you must create a template
vector<Sequence*> thisTemplate;
- if (templateFileName != "self") { thisTemplate = templateSeqs; }
- else { getTemplate(query); thisTemplate = userTemplate; } //fills this template and creates the databases
+ vector<Sequence*> thisFilteredTemplate;
+ if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
+ else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases
if (m->control_pressed) { return 0; }
if (thisTemplate.size() == 0) { return 0; } //not chimeric
- //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
- Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
- Slayer slayer(window, increment, minSim, divR, iters, minSNP);
+ //moved this out of maligner - 4/29/11
+ vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
+
+ Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
+ Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
if (templateFileName == "self") {
if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
if (m->control_pressed) { return 0; }
string chimeraFlag = maligner.getResults(query, decalc);
-
+
if (m->control_pressed) { return 0; }
vector<results> Results = maligner.getOutput();
-
- if (realign) {
- ChimeraReAligner realigner(thisTemplate, match, misMatch);
- realigner.reAlign(query, Results);
- }
-
+
+ for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
+
if (chimeraFlag == "yes") {
-
+
+ if (realign) {
+ vector<string> parents;
+ for (int i = 0; i < Results.size(); i++) {
+ parents.push_back(Results[i].parentAligned);
+ }
+
+ ChimeraReAligner realigner;
+ realigner.reAlign(query, parents);
+
+ }
+
//get sequence that were given from maligner results
vector<SeqDist> seqs;
map<string, float> removeDups;
seqs.pop_back();
}
}
-
+
//put seqs into vector to send to slayer
vector<Sequence*> seqsForSlayer;
-
for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
- //mask then send to slayer...
- if (seqMask != "") {
- decalc->setMask(seqMask);
-
- //mask querys
- decalc->runMask(query);
-
- //mask parents
- for (int k = 0; k < seqsForSlayer.size(); k++) {
- decalc->runMask(seqsForSlayer[k]);
- }
-
- spotMap = decalc->getMaskMap();
- }
-
if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
//send to slayer
if (m->control_pressed) { return 0; }
chimeraResults = slayer.getOutput();
- //free memory
- for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
-
- printResults.spotMap = spotMap;
printResults.flag = chimeraFlags;
printResults.results = chimeraResults;
+
+ //free memory
+ for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
}
-
+ //cout << endl << endl;
return 0;
}
catch(exception& e) {
out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
- out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
+ out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
}
catch(exception& e) {
out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
- out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
+ out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
}else if ((!leftChimeric) && (rightChimeric)) { //print right
out << querySeq->getName() << '\t';
out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
- out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
+ out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
}else { //print both results
if (leftdata.flag == "yes") {
out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
- out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
+ out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
}
if (rightdata.flag == "yes") {
out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
- out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
+ out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
}
}
out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
- out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
+ out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
}else if ((!leftChimeric) && (rightChimeric)) { //print right
out += querySeq->getName() + "\t";
out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
- out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
+ out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
}else { //print both results
out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
- out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
+ out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
}
if (rightdata.flag == "yes") {
out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
- out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
+ out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
}
}
outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
- outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
+ outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
return outputString;
}
exit(1);
}
}
+//***************************************************************************************************************
+vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
+ try {
+
+ vector<Sequence*> refSeqs;
+
+ if (searchMethod == "distance") {
+ //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
+ Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
+ runFilter(newSeq);
+ refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
+ delete newSeq;
+ }else if (searchMethod == "blast") {
+ refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
+ }else if (searchMethod == "kmer") {
+ refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
+ }else { m->mothurOut("not valid search."); exit(1); } //should never get here
+
+ return refSeqs;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
+ exit(1);
+ }
+}
//***************************************************************************************************************/
+vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
+ try {
+
+ vector<Sequence*> refResults;
+
+ //get parts of query
+ string queryUnAligned = q->getUnaligned();
+ string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
+ string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
+//cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
+ Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
+ Sequence* queryRight = new Sequence(q->getName(), rightQuery);
+
+ vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
+ vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
+ //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
+ vector<int> smaller;
+ vector<int> larger;
+
+ if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
+ else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
+
+ //merge results
+ map<int, int> seen;
+ map<int, int>::iterator it;
+ vector<int> mergedResults;
+ for (int i = 0; i < smaller.size(); i++) {
+ if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
+
+ //add left if you havent already
+ it = seen.find(smaller[i]);
+ if (it == seen.end()) {
+ mergedResults.push_back(smaller[i]);
+ seen[smaller[i]] = smaller[i];
+ }
+
+ //add right if you havent already
+ it = seen.find(larger[i]);
+ if (it == seen.end()) {
+ mergedResults.push_back(larger[i]);
+ seen[larger[i]] = larger[i];
+ }
+ }
+
+ for (int i = smaller.size(); i < larger.size(); i++) {
+ if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
+
+ //add right if you havent already
+ it = seen.find(larger[i]);
+ if (it == seen.end()) {
+ mergedResults.push_back(larger[i]);
+ seen[larger[i]] = larger[i];
+ }
+ }
+
+ for (int i = 0; i < mergedResults.size(); i++) {
+ //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
+ if (db[mergedResults[i]]->getName() != q->getName()) {
+ Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
+ refResults.push_back(temp);
+
+ }
+ }
+
+ delete queryRight;
+ delete queryLeft;
+
+ return refResults;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
+ try {
+ vector<Sequence*> refResults;
+
+ //get parts of query
+ string queryUnAligned = q->getUnaligned();
+ string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
+ string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
+
+ Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
+ Sequence* queryRight = new Sequence(q->getName(), rightQuery);
+
+ vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
+ vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
+
+ //merge results
+ map<int, int> seen;
+ map<int, int>::iterator it;
+ vector<int> mergedResults;
+ for (int i = 0; i < tempIndexesLeft.size(); i++) {
+
+ if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
+
+ //add left if you havent already
+ it = seen.find(tempIndexesLeft[i]);
+ if (it == seen.end()) {
+ mergedResults.push_back(tempIndexesLeft[i]);
+ seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
+ }
+
+ //add right if you havent already
+ it = seen.find(tempIndexesRight[i]);
+ if (it == seen.end()) {
+ mergedResults.push_back(tempIndexesRight[i]);
+ seen[tempIndexesRight[i]] = tempIndexesRight[i];
+ }
+ }
+
+ //numWanted = mergedResults.size();
+
+ //cout << q->getName() << endl;
+
+ for (int i = 0; i < mergedResults.size(); i++) {
+ //cout << db[mergedResults[i]]->getName() << endl;
+ if (db[mergedResults[i]]->getName() != q->getName()) {
+ Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
+ refResults.push_back(temp);
+ }
+ }
+ //cout << endl;
+ delete queryRight;
+ delete queryLeft;
+
+ return refResults;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+