X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=pintail.cpp;h=6adfdf91ebeaf30847bee05d90f2c9b22d3c5946;hb=c3f0a9c8f932b923f3a6fbbf143e8f4b85fd6f5f;hp=04dfb0a76467ab6b030cfa60b8462dd7d30afd73;hpb=f7fbd74ffedcf62217109c22e828453eaefa1458;p=mothur.git diff --git a/pintail.cpp b/pintail.cpp index 04dfb0a..6adfdf9 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -18,7 +18,7 @@ inline bool compareQuanMembers(quanMember left, quanMember right){ } //*************************************************************************************************************** -Pintail::Pintail(string filename, string temp) { fastafile = filename; templateFile = temp; } +Pintail::Pintail(string filename, string temp, string o) { fastafile = filename; templateFile = temp; outputDir = o; } //*************************************************************************************************************** Pintail::~Pintail() { @@ -41,13 +41,15 @@ void Pintail::print(ostream& out) { for (int i = 0; i < querySeqs.size(); i++) { int index = ceil(deviation[i]); - - if (index == 0) { index=1; } //is your DE value higher than the 95% string chimera; - if (DE[i] > quantiles[index-1][4]) { chimera = "Yes"; } - else { chimera = "No"; } + if (quantiles[index][4] == 0.0) { + chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index); + }else { + if (DE[i] > quantiles[index][4]) { chimera = "Yes"; } + else { chimera = "No"; } + } out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl; if (chimera == "Yes") { @@ -72,7 +74,7 @@ void Pintail::print(ostream& out) { } //*************************************************************************************************************** -void Pintail::getChimeras() { +int Pintail::getChimeras() { try { //read in query sequences and subject sequences @@ -83,6 +85,8 @@ void Pintail::getChimeras() { int numSeqs = querySeqs.size(); + if (unaligned) { mothurOut("Your sequences need to be aligned when you use the pintail method."); mothurOutEndLine(); return 1; } + obsDistance.resize(numSeqs); expectedDistance.resize(numSeqs); seqCoef.resize(numSeqs); @@ -144,23 +148,20 @@ void Pintail::getChimeras() { mothurOut("Done."); mothurOutEndLine(); }else { createProcessesPairs(); } -/*string o = "foronlinepintailpairs-eachgap"; -ofstream out7; -openOutputFile(o, out7); +//string o = "closestmatch.eachgap.fasta"; +//ofstream out7; +//openOutputFile(o, out7); -for (int i = 0; i < bestfit.size(); i++) { - out7 << querySeqs[i]->getName() << endl; - out7 << querySeqs[i]->getUnaligned() << endl << endl; - - out7 << bestfit[i]->getName() << endl; - out7 << bestfit[i]->getUnaligned() << endl << endl << endl; -} -out7.close();/*/ +//for (int i = 0; i < bestfit.size(); i++) { + //out7 << ">" << querySeqs[i]->getName() << "-"<< bestfit[i]->getName() << endl; + //out7 << bestfit[i]->getAligned() << endl; +//} +//out7.close(); //find P mothurOut("Getting conservation... "); cout.flush(); if (consfile == "") { mothurOut("Calculating probability of conservation for your template sequences. This can take a while... I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command. Providing the .freq file will improve speed. "); cout.flush(); - probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); + probabilityProfile = decalc->calcFreq(templateSeqs, outputDir + getSimpleName(templateFile)); mothurOut("Done."); mothurOutEndLine(); }else { probabilityProfile = readFreq(); } @@ -279,10 +280,19 @@ out7.close();/*/ ofstream out4, out5; string noOutliers, outliers; - noOutliers = getRootName(templateFile) + "pintail.quanNOOUTLIERS"; - outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS"; + if ((!filter) && (seqMask == "")) { + noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.quan"; + }else if ((filter) && (seqMask == "")) { + noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.filtered.quan"; + }else if ((!filter) && (seqMask != "")) { + noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.masked.quan"; + }else if ((filter) && (seqMask != "")) { + noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.filtered.masked.quan"; + } + + //outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS"; - openOutputFile(outliers, out4); + /*openOutputFile(outliers, out4); //adjust quantiles for (int i = 0; i < quantilesMembers.size(); i++) { @@ -321,7 +331,7 @@ out7.close();/*/ } - out4.close(); + out4.close();*/ decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size()); @@ -374,6 +384,8 @@ out7.close();/*/ delete distcalculator; delete decalc; + + return 0; } catch(exception& e) { errorOut(e, "Pintail", "getChimeras"); @@ -431,28 +443,9 @@ vector Pintail::findPairs(int start, int end) { vector seqsMatches; for(int i = start; i < end; i++){ - - float smallest = 10000.0; - Sequence query = *(querySeqs[i]); - Sequence* match; - - for(int j = 0; j < templateSeqs.size(); j++){ - - Sequence temp = *(templateSeqs[j]); - - distcalculator->calcDist(query, temp); - float dist = distcalculator->getDist(); - - if (dist < smallest) { - match = templateSeqs[j]; - smallest = dist; - } - } - - //make copy so trimSeqs doesn't overtrim - Sequence* copy = new Sequence(match->getName(), match->getAligned()); - seqsMatches.push_back(copy); + vector copy = decalc->findClosest(querySeqs[i], templateSeqs, 1); + seqsMatches.push_back(copy[0]); } return seqsMatches;