}
}
//***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div,
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
try {
fastafile = file; templateSeqs = readSeqs(fastafile);
increment = inc;
numWanted = numw;
realign = r;
- includeAbunds = abunds;
trimChimera = trim;
-
- //read name file and create nameMapRank
- readNameFile(name);
+ priority = prior;
decalc = new DeCalculator();
createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
//run filter on template
- for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]); }
+ for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { break; } runFilter(templateSeqs[i]); }
+
}
catch(exception& e) {
exit(1);
}
}
-//***************************************************************************************************************
-int ChimeraSlayer::readNameFile(string name) {
- try {
- ifstream in;
- m->openInputFile(name, in);
-
- int maxRank = 0;
- int minRank = 10000000;
-
- while(!in.eof()){
-
- if (m->control_pressed) { in.close(); return 0; }
-
- string thisname, repnames;
-
- in >> thisname; m->gobble(in); //read from first column
- in >> repnames; //read from second column
-
- map<string, vector<string> >::iterator it = nameMapRank.find(thisname);
- if (it == nameMapRank.end()) {
-
- vector<string> splitRepNames;
- m->splitAtComma(repnames, splitRepNames);
-
- nameMapRank[thisname] = splitRepNames;
-
- if (splitRepNames.size() > maxRank) { maxRank = splitRepNames.size(); }
- if (splitRepNames.size() < minRank) { minRank = splitRepNames.size(); }
-
- }else{ m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine(); }
-
- m->gobble(in);
- }
- in.close();
-
- //sanity check to make sure files match
- for (int i = 0; i < templateSeqs.size(); i++) {
- map<string, vector<string> >::iterator it = nameMapRank.find(templateSeqs[i]->getName());
-
- if (it == nameMapRank.end()) { m->mothurOut("[ERROR]: " + templateSeqs[i]->getName() + " is not in namesfile, but is in fastafile. Every name in fasta file must be in first column of names file."); m->mothurOutEndLine(); m->control_pressed = true; }
- }
-
- if (maxRank == minRank) { m->mothurOut("[ERROR]: all sequences in namesfile have the same abundance, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
-
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "ChimeraSlayer", "readNameFile");
- exit(1);
- }
-}
-
//***************************************************************************************************************
int ChimeraSlayer::doPrep() {
try {
}else if (searchMethod == "blast") {
//generate blastdb
- databaseLeft = new BlastDB(-2.0, -1.0, match, misMatch);
+ databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+
for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
databaseLeft->generateDB();
databaseLeft->setNumSeqs(templateSeqs.size());
vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
try {
- vector<Sequence*> thisTemplate;
+ //when template=self, the query file is sorted from most abundance to least abundant
+ //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
+ vector<Sequence*> userTemplate;
- int thisRank;
- string thisName = q->getName();
- map<string, vector<string> >::iterator itRank = nameMapRank.find(thisName); // you will find it because we already sanity checked
- thisRank = (itRank->second).size();
+ int myAbund = priority[q->getName()];
- //create list of names we want to put into the template
- set<string> namesToAdd;
- for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
- if (itRank->first != thisName) {
- if (includeAbunds == "greaterequal") {
- if ((itRank->second).size() >= thisRank) {
- //you are more abundant than me or equal to my abundance
- for (int i = 0; i < (itRank->second).size(); i++) {
- namesToAdd.insert((itRank->second)[i]);
- }
- }
- }else if (includeAbunds == "greater") {
- if ((itRank->second).size() > thisRank) {
- //you are more abundant than me
- for (int i = 0; i < (itRank->second).size(); i++) {
- namesToAdd.insert((itRank->second)[i]);
- }
- }
- }else if (includeAbunds == "all") {
- //add everyone
- for (int i = 0; i < (itRank->second).size(); i++) {
- namesToAdd.insert((itRank->second)[i]);
- }
- }
- }
- }
-
- for (int i = 0; i < templateSeqs.size(); i++) {
- if (namesToAdd.count(templateSeqs[i]->getName()) != 0) {
- thisTemplate.push_back(templateSeqs[i]);
- }
+ for (int i = 0; i < templateSeqs.size(); i++) {
+
+ if (m->control_pressed) { return userTemplate; }
+
+ //have I reached a sequence with the same abundance as myself?
+ if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
+
+ //if its am not chimeric add it
+ if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { userTemplate.push_back(templateSeqs[i]); }
}
string kmerDBNameLeft;
string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
#ifdef USE_MPI
- for (int i = 0; i < thisTemplate.size(); i++) {
+ for (int i = 0; i < userTemplate.size(); i++) {
- if (m->control_pressed) { return thisTemplate; }
+ if (m->control_pressed) { return userTemplate; }
- string leftFrag = thisTemplate[i]->getUnaligned();
+ string leftFrag = userTemplate[i]->getUnaligned();
leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
- Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
+ Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
databaseLeft->addSequence(leftTemp);
}
databaseLeft->generateDB();
- databaseLeft->setNumSeqs(thisTemplate.size());
+ databaseLeft->setNumSeqs(userTemplate.size());
- for (int i = 0; i < thisTemplate.size(); i++) {
- if (m->control_pressed) { return thisTemplate; }
+ for (int i = 0; i < userTemplate.size(); i++) {
+ if (m->control_pressed) { return userTemplate; }
- string rightFrag = thisTemplate[i]->getUnaligned();
+ string rightFrag = userTemplate[i]->getUnaligned();
rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
- Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
+ Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
databaseRight->addSequence(rightTemp);
}
databaseRight->generateDB();
- databaseRight->setNumSeqs(thisTemplate.size());
+ databaseRight->setNumSeqs(userTemplate.size());
#else
- for (int i = 0; i < thisTemplate.size(); i++) {
+ for (int i = 0; i < userTemplate.size(); i++) {
- if (m->control_pressed) { return thisTemplate; }
+ if (m->control_pressed) { return userTemplate; }
- string leftFrag = thisTemplate[i]->getUnaligned();
+ string leftFrag = userTemplate[i]->getUnaligned();
leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
- Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
+ Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
databaseLeft->addSequence(leftTemp);
}
databaseLeft->generateDB();
- databaseLeft->setNumSeqs(thisTemplate.size());
+ databaseLeft->setNumSeqs(userTemplate.size());
- for (int i = 0; i < thisTemplate.size(); i++) {
- if (m->control_pressed) { return thisTemplate; }
+ for (int i = 0; i < userTemplate.size(); i++) {
+ if (m->control_pressed) { return userTemplate; }
- string rightFrag = thisTemplate[i]->getUnaligned();
+ string rightFrag = userTemplate[i]->getUnaligned();
rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
- Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
+ Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
databaseRight->addSequence(rightTemp);
}
databaseRight->generateDB();
- databaseRight->setNumSeqs(thisTemplate.size());
+ databaseRight->setNumSeqs(userTemplate.size());
#endif
}else if (searchMethod == "blast") {
//generate blastdb
- databaseLeft = new BlastDB(-2.0, -1.0, match, misMatch);
- for (int i = 0; i < thisTemplate.size(); i++) { if (m->control_pressed) { return thisTemplate; } databaseLeft->addSequence(*thisTemplate[i]); }
+ databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+
+ for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
databaseLeft->generateDB();
- databaseLeft->setNumSeqs(thisTemplate.size());
+ databaseLeft->setNumSeqs(userTemplate.size());
}
- return thisTemplate;
+ return userTemplate;
}
catch(exception& e) {
Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
try {
Sequence* trim = NULL;
- if (trimChimera) { trim = trimQuery; }
+ if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
if (chimeraFlags == "yes") {
string chimeraFlag = "no";
m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
outAcc << querySeq->getName() << endl;
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+
if (trimChimera) {
int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
}
trim->setAligned(newAligned);
}
-
}
}
printBlock(chimeraResults[0], chimeraFlag, out);
out << endl;
- }else { out << querySeq->getName() << "\tno" << endl; }
+ }else {
+ out << querySeq->getName() << "\tno" << endl;
+ }
return trim;
m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
outAcc << querySeq->getName() << endl;
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+
if (trimChimera) {
string newAligned = trim->getAligned();
for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //both sides are chimeric, keep longest piece
- int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLStart] - leftPiece.spotMap[leftPiece.results[0].winLEnd];
- int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winRStart] - leftPiece.spotMap[leftPiece.results[0].winREnd];
+ int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
+ int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
int length = lengthLeftLeft;
if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
- int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLStart] - rightPiece.spotMap[rightPiece.results[0].winLEnd];
- int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winRStart] - rightPiece.spotMap[rightPiece.results[0].winREnd];
+ int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
+ int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
if (lengthRightRight > length) { longest = 4; }
printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
out << endl;
- }else { out << querySeq->getName() << "\tno" << endl; }
+ }else {
+ out << querySeq->getName() << "\tno" << endl;
+ }
return trim;
outAccString += querySeq->getName() + "\n";
results = true;
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+
//write to accnos file
int length = outAccString.length();
char* buf2 = new char[length];
for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
}else { //both sides are chimeric, keep longest piece
- int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLStart] - leftPiece.spotMap[leftPiece.results[0].winLEnd];
- int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winRStart] - leftPiece.spotMap[leftPiece.results[0].winREnd];
+ int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
+ int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
int length = lengthLeftLeft;
if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
- int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLStart] - rightPiece.spotMap[rightPiece.results[0].winLEnd];
- int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winRStart] - rightPiece.spotMap[rightPiece.results[0].winREnd];
+ int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
+ int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
if (lengthRightRight > length) { longest = 4; }
string outputString = "";
Sequence* trim = NULL;
- if (trimChimera) { trim = trimQuery; }
+ if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
if (chimeraFlags == "yes") {
string chimeraFlag = "no";
outAccString += querySeq->getName() + "\n";
results = true;
+ if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
+
//write to accnos file
int length = outAccString.length();
char* buf2 = new char[length];
//***************************************************************************************************************
int ChimeraSlayer::getChimeras(Sequence* query) {
try {
- if (trimChimera) { trimQuery = new Sequence(query->getName(), query->getAligned()); printResults.trimQuery = *trimQuery; }
+
+ trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
+ printResults.trimQuery = trimQuery;
chimeraFlags = "no";
printResults.flag = "no";
//you must create a template
vector<Sequence*> thisTemplate;
if (templateFileName != "self") { thisTemplate = templateSeqs; }
- else { thisTemplate = getTemplate(query); } //fills thistemplate and creates the databases
+ else { thisTemplate = getTemplate(query); } //fills this template and creates the databases
if (m->control_pressed) { return 0; }
if (m->control_pressed) { return 0; }
string chimeraFlag = maligner.getResults(query, decalc);
+
if (m->control_pressed) { return 0; }
+
vector<results> Results = maligner.getOutput();
- //found in testing realigning only made things worse
if (realign) {
ChimeraReAligner realigner(thisTemplate, match, misMatch);
realigner.reAlign(query, Results);