}
m8FileHandle.close();
+ string root = dbFileName;
+ string temp = dbFileName + ".nsq";
+ remove(temp.c_str());
+ temp = dbFileName + ".nsi";
+ remove(temp.c_str());
+
+ temp = dbFileName + ".nsd";
+ remove(temp.c_str());
+
+ temp = dbFileName + ".nin";
+ remove(temp.c_str());
+
+ temp = dbFileName + ".nhr";
+ remove(temp.c_str());
+
+
+
return topMatches;
}
catch(exception& e) {
Chimera(){};
Chimera(string);
+ Chimera(string, bool);
Chimera(string, string);
virtual ~Chimera(){};
virtual void setFilter(bool f) { filter = f; }
string qAligned = query->getAligned();
string newQuery = "";
-
+
//sort parents by region start
sort(parents.begin(), parents.end(), compareRegionStart);
int longest = 0;
//take query and break apart into pieces using breakpoints given by results of parents
for (int i = 0; i < parents.size(); i++) {
- cout << parents[i].parent << '\t' << parents[i].nastRegionStart << '\t' << parents[i].nastRegionEnd << endl;
- int length = parents[i].nastRegionEnd - parents[i].nastRegionStart;
+ int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
string q = qAligned.substr(parents[i].nastRegionStart, length);
- cout << "query = " << q << endl;
Sequence* queryFrag = new Sequence(query->getName(), q);
queryParts.push_back(queryFrag);
-
+
Sequence* parent = getSequence(parents[i].parent);
string p = parent->getAligned();
p = p.substr(parents[i].nastRegionStart, length);
- cout << "parent = " << p << endl;
parent->setAligned(p);
parentParts.push_back(parent);
//make sure you don't cutoff end of query
if (parents[parents.size()-1].nastRegionEnd < qAligned.length()) { newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd); }
-
+
//set query to new aligned string
query->setAligned(newQuery);
else {
//valid paramters for this command
string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask",
- "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir" };
+ "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
temp = validParameter.validFile(parameters, "parents", false); if (temp == "not found") { temp = "3"; }
convert(temp, parents);
+ temp = validParameter.validFile(parameters, "realign", false); if (temp == "not found") { temp = "f"; }
+ realign = isTrue(temp);
+
+ search = validParameter.validFile(parameters, "search", false); if (search == "not found") { temp = "distance"; }
+
temp = validParameter.validFile(parameters, "iters", false);
if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }
else if (temp == "not found") { temp = "1000"; }
//"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
//mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
- mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
+ mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters, search, realign.\n");
mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
mothurOut("The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n");
mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n");
mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
+ mothurOut("The search parameter allows you to specify search method for finding the closest parent. Choices are distance and blast, default distance. -used only by chimeraslayer. \n");
+ mothurOut("The realign parameter allows you to realign the query to the potential paretns. Choices are true or false, default false. -used only by chimeraslayer. \n");
mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
mothurOut("Details for each method: \n");
mothurOut("\tpintail: \n");
else if (method == "pintail") { chimera = new Pintail(fastafile, outputDir); }
else if (method == "ccode") { chimera = new Ccode(fastafile, outputDir); }
else if (method == "chimeracheck") { chimera = new ChimeraCheckRDP(fastafile, outputDir); }
- else if (method == "chimeraslayer") { chimera = new ChimeraSlayer("blast"); }
+ else if (method == "chimeraslayer") { chimera = new ChimeraSlayer(search, realign); }
else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
//set user options
void appendOutputFiles(string, string);
bool abort;
- string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile, outputDir;
- bool filter, correction, svg, printAll;
+ string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile, outputDir, search;
+ bool filter, correction, svg, printAll, realign;
int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs;
float divR;
Chimera* chimera;
#include "chimerarealigner.h"
//***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string mode) : searchMethod(mode) { decalc = new DeCalculator(); }
+ChimeraSlayer::ChimeraSlayer(string mode, bool r) : searchMethod(mode), realign(r) { decalc = new DeCalculator(); }
//***************************************************************************************************************
ChimeraSlayer::~ChimeraSlayer() { delete decalc; }
//***************************************************************************************************************
string chimeraFlag = maligner->getResults(query, decalc);
vector<results> Results = maligner->getOutput();
- //realign query to parents to improve slayers detection rate
- ChimeraReAligner realigner(templateSeqs, match, misMatch);
- realigner.reAlign(query, Results);
-cout << query->getName() << '\n' << query->getAligned() << endl;
+ //realign query to parents to improve slayers detection rate???
+ if (realign) {
+ ChimeraReAligner realigner(templateSeqs, match, misMatch);
+ realigner.reAlign(query, Results);
+ }
+
//if (chimeraFlag == "yes") {
//get sequence that were given from maligner results
class ChimeraSlayer : public Chimera {
public:
- ChimeraSlayer(string);
+ ChimeraSlayer(string, bool);
~ChimeraSlayer();
int getChimeras(Sequence*);
vector<data_struct> chimeraResults;
string chimeraFlags, searchMethod;
string fastafile;
+ bool realign;
void printBlock(data_struct, ostream&);
};
Dist* distcalculator = new eachGapDist();
+ string queryUnAligned = querySeq->getUnaligned();
+ int numBases = int(queryUnAligned.length() * 0.33);
+
+ string leftQuery = ""; //first 1/3 of the sequence
+ string rightQuery = ""; //last 1/3 of the sequence
string queryAligned = querySeq->getAligned();
- string leftQuery = queryAligned.substr(0, (queryAligned.length() / 3)); //first 1/3 of the sequence
- string rightQuery = queryAligned.substr(((queryAligned.length() / 3)*2)); //last 1/3 of the sequence
+
+ //left side
+ int baseCount = 0;
+ int leftSpot = 0;
+ for (int i = 0; i < queryAligned.length(); i++) {
+ leftQuery += queryAligned[i];
+ //if you are a base
+ if ((queryAligned[i] != '.') && (queryAligned[i] != '-')) { baseCount++; }
+ //if you have 1/3
+ if (baseCount >= numBases) { leftSpot = i; break; } //first 1/3
+ }
+
+ //right side - count through another 1/3, so you are at last third
+ baseCount = 0;
+ int rightSpot = 0;
+ for (int i = leftSpot; i < queryAligned.length(); i++) {
+ //if you are a base
+ if ((queryAligned[i] != '.') && (queryAligned[i] != '-')) { baseCount++; }
+ //if you have 1/3
+ if (baseCount >= numBases) { rightSpot = i; break; } //last 1/3
+ }
+ rightQuery = queryAligned.substr(rightSpot); //sequence from pos spot to end
Sequence queryLeft(querySeq->getName(), leftQuery);
Sequence queryRight(querySeq->getName(), rightQuery);
-
+//cout << querySeq->getName() << '\t' << leftSpot << '\t' << rightSpot << endl;
+//cout << queryUnAligned.length() << '\t' << queryLeft.getUnaligned().length() << '\t' << queryRight.getUnaligned().length() << endl;
for(int j = 0; j < db.size(); j++){
string dbAligned = db[j]->getAligned();
- string leftDB = dbAligned.substr(0, (dbAligned.length() / 3)); //first 1/3 of the sequence
- string rightDB = dbAligned.substr(((dbAligned.length() / 3)*2)); //last 1/3 of the sequence
+ string leftDB = dbAligned.substr(0, leftSpot+1); //first 1/3 of the sequence
+ string rightDB = dbAligned.substr(rightSpot); //last 1/3 of the sequence
Sequence dbLeft(db[j]->getName(), leftDB);
Sequence dbRight(db[j]->getName(), rightDB);
-
+
distcalculator->calcDist(queryLeft, dbLeft);
float distLeft = distcalculator->getDist();