]> git.donarmstrong.com Git - mothur.git/blobdiff - searchchime.cpp
removing chime source files from mother project.
[mothur.git] / searchchime.cpp
diff --git a/searchchime.cpp b/searchchime.cpp
deleted file mode 100644 (file)
index c00a9c4..0000000
+++ /dev/null
@@ -1,304 +0,0 @@
-#include "myutils.h"\r
-#include "ultra.h"\r
-#include "chime.h"\r
-#include "uc.h"\r
-#include "dp.h"\r
-#include <set>\r
-#include <algorithm>\r
-\r
-#define TRACE  0\r
-\r
-extern FILE *g_fUChime;\r
-\r
-void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
-  vector<unsigned> &Parents);\r
-\r
-void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
-  const string &PathQA, const string &PathQB, ChimeHit2 &Hit);\r
-\r
-double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo);\r
-\r
-static void GetSmoothedIdVec(const SeqData &QSD, const SeqData &PSD, const string &Path,\r
-  vector<unsigned> &IdVec, unsigned d)\r
-       {\r
-       IdVec.clear();\r
-       const unsigned ColCount = SIZE(Path);\r
-\r
-       const byte *Q = QSD.Seq;\r
-       const byte *P = PSD.Seq;\r
-\r
-       const unsigned QL = QSD.L;\r
-       const unsigned PL = PSD.L;\r
-\r
-       if (QL <= d)\r
-               {\r
-               IdVec.resize(QSD.L, 0);\r
-               return;\r
-               }\r
-\r
-       unsigned QPos = 0;\r
-       unsigned PPos = 0;\r
-\r
-       vector<bool> SameVec;\r
-       SameVec.reserve(QL);\r
-       for (unsigned Col = 0; Col < ColCount; ++Col)\r
-               {\r
-               char c = Path[Col];\r
-\r
-               bool Same = false;\r
-               if (c == 'M')\r
-                       {\r
-                       byte q = Q[QPos];\r
-                       byte p = P[PPos];\r
-                       Same = (toupper(q) == toupper(p));\r
-                       }\r
-\r
-               if (c == 'M' || c == 'D')\r
-                       {\r
-                       ++QPos;\r
-                       SameVec.push_back(Same);\r
-                       }\r
-\r
-               if (c == 'M' || c == 'I')\r
-                       ++PPos;\r
-               }\r
-\r
-       asserta(SIZE(SameVec) == QL);\r
-\r
-       unsigned n = 0;\r
-       for (unsigned QPos = 0; QPos < d; ++QPos)\r
-               {\r
-               if (SameVec[QPos])\r
-                       ++n;\r
-               IdVec.push_back(n);\r
-               }\r
-\r
-       for (unsigned QPos = d; QPos < QL; ++QPos)\r
-               {\r
-               if (SameVec[QPos])\r
-                       ++n;\r
-               IdVec.push_back(n);\r
-               if (SameVec[QPos-d])\r
-                       --n;\r
-               }\r
-       asserta(SIZE(IdVec) == QL);\r
-\r
-#if    TRACE\r
-       {\r
-       Log("\n");\r
-       Log("GetSmoothedIdVec\n");\r
-       unsigned QPos = 0;\r
-       unsigned PPos = 0;\r
-       Log("Q P  Same       Id\n");\r
-       Log("- -  ----  -------\n");\r
-       for (unsigned Col = 0; Col < ColCount; ++Col)\r
-               {\r
-               char c = Path[Col];\r
-\r
-               bool Same = false;\r
-               if (c == 'M')\r
-                       {\r
-                       byte q = Q[QPos];\r
-                       byte p = P[PPos];\r
-                       Same = (toupper(q) == toupper(p));\r
-                       Log("%c %c  %4c  %7d\n", q, p, tof(Same), IdVec[QPos]);\r
-                       }\r
-\r
-               if (c == 'M' || c == 'D')\r
-                       ++QPos;\r
-               if (c == 'M' || c == 'I')\r
-                       ++PPos;\r
-               }\r
-       }\r
-#endif\r
-       }\r
-\r
-bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
-  const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
-  float MinFractId, ChimeHit2 &Hit)\r
-       {\r
-       Hit.Clear();\r
-       Hit.QLabel = QSD.Label;\r
-\r
-       if (opt_verbose)\r
-               {\r
-               Log("\n");\r
-               Log("SearchChime()\n");\r
-               Log("Query>%s\n", QSD.Label);\r
-               }\r
-\r
-       vector<unsigned> Parents;\r
-       GetCandidateParents(U, QSD, QAb, Parents);\r
-\r
-       unsigned ParentCount = SIZE(Parents);\r
-       if (ParentCount <= 1)\r
-               {\r
-               if (opt_verbose)\r
-                       Log("%u candidate parents, done.\n", ParentCount);\r
-               return false;\r
-               }\r
-\r
-       if (opt_fastalign)\r
-               HF.SetA(QSD);\r
-       HSPFinder *ptrHF = (opt_fastalign ? &HF : 0);\r
-\r
-       unsigned ChunkLength;\r
-       vector<unsigned> ChunkLos;\r
-       GetChunkInfo(QSD.L, ChunkLength, ChunkLos);\r
-       const unsigned ChunkCount = SIZE(ChunkLos);\r
-\r
-       vector<unsigned> ChunkIndexToBestId(ChunkCount, 0);\r
-       vector<unsigned> ChunkIndexToBestParentIndex(ChunkCount, UINT_MAX);\r
-\r
-       vector<SeqData> PSDs;\r
-       vector<string> Paths;\r
-       double TopPctId = 0.0;\r
-       unsigned TopParentIndex = UINT_MAX;\r
-       unsigned QL = QSD.L;\r
-       vector<unsigned> MaxIdVec(QL, 0);\r
-       for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
-               {\r
-               unsigned ParentSeqIndex = Parents[ParentIndex];\r
-\r
-               SeqData PSD;\r
-               //PSD.Label = U.GetSeedLabel(ParentSeqIndex);\r
-               //PSD.Seq = U.GetSeedSeq(ParentSeqIndex);\r
-               //PSD.L = U.GetSeedLength(ParentSeqIndex);\r
-               //PSD.Index = ParentSeqIndex;\r
-               U.GetSeqData(ParentSeqIndex, PSD);\r
-               PSDs.push_back(PSD);\r
-\r
-               if (opt_fastalign)\r
-                       HF.SetB(PSD);\r
-\r
-               PathData PD;\r
-\r
-               float HSPId;\r
-               bool Found = GlobalAlign(QSD, PSD, AP, AH, *ptrHF, MinFractId, HSPId, PD);\r
-               if (!Found)\r
-                       {\r
-                       Paths.push_back("");                            \r
-                       continue;\r
-                       }\r
-\r
-               double PctId = 100.0*GetFractIdGivenPath(QSD.Seq, PSD.Seq, PD.Start, true);\r
-               if (opt_selfid && PctId == 100.0)\r
-                       {\r
-                       Paths.push_back("");                            \r
-                       continue;\r
-                       }\r
-\r
-               if (PctId > TopPctId)\r
-                       {\r
-                       TopParentIndex = ParentIndex;\r
-                       TopPctId = PctId;\r
-                       if (TopPctId >= 100.0 - opt_mindiv)\r
-                               {\r
-                               if (opt_verbose)\r
-                                       {\r
-                                       Log("  %.1f%%  >%s\n", TopPctId, PSD.Label);\r
-                                       Log("  Top hit exceeds ctl threshold, done.\n");\r
-                                       return false;\r
-                                       }\r
-                               }\r
-                       }\r
-\r
-               string Path = PD.Start;\r
-               Paths.push_back(Path);\r
-\r
-               vector<unsigned> IdVec;\r
-               GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
-\r
-               for (unsigned QPos = 0; QPos < QL; ++QPos)\r
-                       if (IdVec[QPos] > MaxIdVec[QPos])\r
-                               MaxIdVec[QPos] = IdVec[QPos];\r
-               }\r
-\r
-       vector<unsigned> BestParents;\r
-       for (unsigned k = 0; k < opt_maxp; ++k)\r
-               {\r
-               unsigned BestParent = UINT_MAX;\r
-               unsigned BestCov = 0;\r
-               for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
-                       {\r
-                       const SeqData &PSD = PSDs[ParentIndex];\r
-                       const string &Path = Paths[ParentIndex];\r
-                       if (Path == "")\r
-                               continue;\r
-\r
-                       vector<unsigned> IdVec;\r
-                       GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
-\r
-                       unsigned Cov = 0;\r
-                       for (unsigned QPos = 0; QPos < QL; ++QPos)\r
-                               if (IdVec[QPos] == MaxIdVec[QPos])\r
-                                       ++Cov;\r
-\r
-                       if (Cov > BestCov)\r
-                               {\r
-                               BestParent = ParentIndex;\r
-                               BestCov = Cov;\r
-                               }\r
-                       }\r
-\r
-               if (BestParent == UINT_MAX)\r
-                       break;\r
-\r
-               BestParents.push_back(BestParent);\r
-               vector<unsigned> IdVec;\r
-\r
-               const SeqData &PSD = PSDs[BestParent];\r
-               const string &Path = Paths[BestParent];\r
-               GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
-               for (unsigned QPos = 0; QPos < QL; ++QPos)\r
-                       if (IdVec[QPos] == MaxIdVec[QPos])\r
-                               MaxIdVec[QPos] = UINT_MAX;\r
-               }\r
-\r
-       unsigned BestParentCount = SIZE(BestParents);\r
-\r
-       if (opt_verbose)\r
-               {\r
-               Log("%u/%u best parents\n", BestParentCount, ParentCount);\r
-               for (unsigned k = 0; k < BestParentCount; ++k)\r
-                       {\r
-                       unsigned i = BestParents[k];\r
-                       Log(" %s\n", PSDs[i].Label);\r
-                       }\r
-               }\r
-\r
-       bool Found = false;\r
-       for (unsigned k1 = 0; k1 < BestParentCount; ++k1)\r
-               {\r
-               unsigned i1 = BestParents[k1];\r
-               asserta(i1 < ParentCount);\r
-\r
-               const SeqData &PSD1 = PSDs[i1];\r
-               const string &Path1 = Paths[i1];\r
-\r
-               for (unsigned k2 = k1 + 1; k2 < BestParentCount; ++k2)\r
-                       {\r
-                       unsigned i2 = BestParents[k2];\r
-                       asserta(i2 < ParentCount);\r
-                       asserta(i2 != i1);\r
-\r
-                       const SeqData &PSD2 = PSDs[i2];\r
-                       const string &Path2 = Paths[i2];\r
-\r
-                       ChimeHit2 Hit2;\r
-                       AlignChime(QSD, PSD1, PSD2, Path1, Path2, Hit2);\r
-                       Hit2.PctIdQT = TopPctId;\r
-\r
-                       if (Hit2.Accept())\r
-                               Found = true;\r
-\r
-                       if (Hit2.Score > Hit.Score)\r
-                               Hit = Hit2;\r
-\r
-                       if (opt_verbose)\r
-                               Hit2.LogMe();\r
-                       }\r
-               }\r
-\r
-       return Found;\r
-       }\r