From: westcott Date: Wed, 4 May 2011 20:07:48 +0000 (+0000) Subject: chimera.slayer X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=55ec7cde88d5512e177fe9488d5ee13793853bad chimera.slayer --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 217151b..2e6f5a8 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1505,10 +1505,10 @@ A7E9B68112D37EC400DA6239 /* chimeracheckrdp.h */, A7E9B68412D37EC400DA6239 /* chimerarealigner.cpp */, A7E9B68512D37EC400DA6239 /* chimerarealigner.h */, - A7E9B68812D37EC400DA6239 /* chimeraslayer.cpp */, - A7E9B68912D37EC400DA6239 /* chimeraslayer.h */, A7E9B6C212D37EC400DA6239 /* decalc.h */, A7E9B6C112D37EC400DA6239 /* decalc.cpp */, + A7E9B68812D37EC400DA6239 /* chimeraslayer.cpp */, + A7E9B68912D37EC400DA6239 /* chimeraslayer.h */, A7E9B74612D37EC400DA6239 /* maligner.h */, A7E9B74512D37EC400DA6239 /* maligner.cpp */, A7E9B79312D37EC400DA6239 /* pintail.cpp */, diff --git a/blastdb.cpp b/blastdb.cpp index 7f02a8d..8bf79c5 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -147,7 +147,7 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { //while (!m8FileHandle.eof()) { char c = m8FileHandle.get(); if (c == 10 || c == 13){ break; }else{ cout << c; } } // //cout << endl; m->gobble(m8FileHandle); - if (score >= minPerID) { + if (score >= minPerID) { //this follows broads CS, but probably should be searchScore... topMatches.push_back(templateAccession); Scores.push_back(searchScore); } diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 5b10759..ffba9f5 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -72,10 +72,18 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, mapcontrol_pressed) { break; } runFilter(templateSeqs[i]); } - - + if (searchMethod == "distance") { + createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap + + //run filter on template copying templateSeqs into filteredTemplateSeqs + for (int i = 0; i < templateSeqs.size(); i++) { + if (m->control_pressed) { break; } + + Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned()); + runFilter(newSeq); + filteredTemplateSeqs.push_back(newSeq); + } + } } catch(exception& e) { m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer"); @@ -835,7 +843,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) { //free memory for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } } - + //cout << endl << endl; return 0; } catch(exception& e) { @@ -1024,7 +1032,7 @@ vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db string queryUnAligned = q->getUnaligned(); string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence - +//cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl; Sequence* queryLeft = new Sequence(q->getName(), leftQuery); Sequence* queryRight = new Sequence(q->getName(), rightQuery); diff --git a/decalc.cpp b/decalc.cpp index 3b3740b..c600a7f 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -838,9 +838,9 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorgetName() << '\t' << dists[i].dist << endl; - if ((thisTemplate[dists[i].index]->getName() != querySeq->getName()) && (dists[i].dist >= minSim)) { + if ((thisTemplate[dists[i].index]->getName() != querySeq->getName()) && (((1.0-dists[i].dist)*100) >= minSim)) { Sequence* temp = new Sequence(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. - + //cout << querySeq->getName() << '\t' << thisTemplate[dists[i].index]->getName() << '\t' << dists[i].dist << endl; seqsMatches.push_back(temp); } diff --git a/listseqscommand.cpp b/listseqscommand.cpp index ebfd075..21cb4c7 100644 --- a/listseqscommand.cpp +++ b/listseqscommand.cpp @@ -257,18 +257,32 @@ int ListSeqsCommand::readFasta(){ m->openInputFile(fastafile, in); string name; + ofstream out; + string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta"; + m->openOutputFile(newFastaName, out); + int count = 1; + string lastName = ""; + while(!in.eof()){ if (m->control_pressed) { in.close(); return 0; } Sequence currSeq(in); name = currSeq.getName(); + if (lastName == "") { lastName = name; } + if (name != lastName) { count = 1; } + lastName = name; + + Sequence newSeq(name+"_"+toString(count), currSeq.getAligned()); + newSeq.printSequence(out); if (name != "") { names.push_back(name); } m->gobble(in); + count++; } in.close(); + out.close(); return 0; diff --git a/maligner.cpp b/maligner.cpp index f834db1..5ffba32 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -77,7 +77,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { verticalFilter(temp); - //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl << refSeqs[i]->getAligned() << endl; } + //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl ; }//<< refSeqs[i]->getAligned() << endl vector< vector > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes @@ -150,7 +150,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion); -// cout << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << endl; + //cout << query->getName() << '\t' << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << ", " << temp.divR << endl; outputResults.push_back(temp); }