]> git.donarmstrong.com Git - mothur.git/blobdiff - uchime_src/uchime_main.cpp
added uchime_src folder. added biom parameter to make.shared. added biom as a current...
[mothur.git] / uchime_src / uchime_main.cpp
diff --git a/uchime_src/uchime_main.cpp b/uchime_src/uchime_main.cpp
new file mode 100644 (file)
index 0000000..b9d69ad
--- /dev/null
@@ -0,0 +1,212 @@
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "seqdb.h"\r
+#include "dp.h"\r
+#include "ultra.h"\r
+#include "hspfinder.h"\r
+#include <algorithm>\r
+#include <set>\r
+\r
+bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
+  const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
+  float MinFractId, ChimeHit2 &Hit);\r
+\r
+FILE *g_fUChime;\r
+FILE *g_fUChimeAlns;\r
+const vector<float> *g_SortVecFloat;\r
+bool g_UchimeDeNovo = false;\r
+\r
+void Usage()\r
+       {\r
+       printf("\n");\r
+       printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
+       printf("http://www.drive5.com/uchime\n");\r
+       printf("\n");\r
+       printf("This software is donated to the public domain\n");\r
+       printf("\n");\r
+\r
+       printf(\r
+#include "help.h"\r
+               );\r
+       }\r
+\r
+void SetBLOSUM62()\r
+       {\r
+       Die("SetBLOSUM62 not implemented");\r
+       }\r
+\r
+void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
+       {\r
+       Die("ReadSubstMx not implemented");\r
+       }\r
+\r
+void LogAllocs()\r
+       {\r
+       /*empty*/\r
+       }\r
+\r
+static bool CmpDescVecFloat(unsigned i, unsigned j)\r
+       {\r
+       return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
+       }\r
+\r
+void Range(vector<unsigned> &v, unsigned N)\r
+       {\r
+       v.clear();\r
+       v.reserve(N);\r
+       for (unsigned i = 0; i < N; ++i)\r
+               v.push_back(i);\r
+       }\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
+       {\r
+       StartTimer(Sort);\r
+       const unsigned N = SIZE(Values);\r
+       Range(Order, N);\r
+       g_SortVecFloat = &Values;\r
+       sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
+       EndTimer(Sort);\r
+       }\r
+\r
+float GetAbFromLabel(const string &Label)\r
+       {\r
+       vector<string> Fields;\r
+       Split(Label, Fields, '/');\r
+       const unsigned N = SIZE(Fields);\r
+       for (unsigned i = 0; i < N; ++i)\r
+               {\r
+               const string &Field = Fields[i];\r
+               if (Field.substr(0, 3) == "ab=")\r
+                       {\r
+                       string a = Field.substr(3, string::npos);\r
+                       return (float) atof(a.c_str());\r
+                       }\r
+               }\r
+       if (g_UchimeDeNovo)\r
+               Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
+       return 0.0;\r
+       }\r
+\r
+int main(int argc, char *argv[])\r
+       {\r
+               \r
+       MyCmdLine(argc, argv);\r
+\r
+       if (argc < 2)\r
+               {\r
+               Usage();\r
+               return 0;\r
+               }\r
+\r
+       if (opt_version)\r
+               {\r
+               printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+               return 0;\r
+               }\r
+\r
+       printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+       printf("by Robert C. Edgar\n");\r
+       printf("http://drive5.com/uchime\n");\r
+       printf("This code is donated to the public domain.\n");\r
+       printf("\n");\r
+       if (!optset_w)\r
+               opt_w = 8;\r
+       \r
+       float MinFractId = 0.95f;\r
+       if (optset_id)\r
+               MinFractId = (float) opt_id;\r
+\r
+       Log("%8.2f  minh\n", opt_minh);\r
+       Log("%8.2f  xn\n", opt_xn);\r
+       Log("%8.2f  dn\n", opt_dn);\r
+       Log("%8.2f  xa\n", opt_xa);\r
+       Log("%8.2f  mindiv\n", opt_mindiv);\r
+       Log("%8u  maxp\n", opt_maxp);\r
+\r
+       if (opt_input == "" && opt_uchime != "")\r
+               opt_input = opt_uchime;\r
+\r
+       if (opt_input == "")\r
+               Die("Missing --input");\r
+\r
+       g_UchimeDeNovo = (opt_db == "");\r
+\r
+       if (opt_uchimeout != "")\r
+               g_fUChime = CreateStdioFile(opt_uchimeout);\r
+\r
+       if (opt_uchimealns != "")\r
+               g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
+\r
+       SeqDB Input;\r
+       SeqDB DB;\r
+\r
+       Input.FromFasta(opt_input);\r
+       if (!Input.IsNucleo())\r
+               Die("Input contains amino acid sequences");\r
+\r
+       const unsigned QuerySeqCount = Input.GetSeqCount();\r
+       vector<unsigned> Order;\r
+       for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+               Order.push_back(i);\r
+\r
+       if (g_UchimeDeNovo)\r
+               {\r
+               vector<float> Abs;\r
+               for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+                       {\r
+                       const char *Label = Input.GetLabel(i);\r
+                       float Ab = GetAbFromLabel(Label);\r
+                       Abs.push_back(Ab);\r
+                       }\r
+               SortDescending(Abs, Order);\r
+               DB.m_IsNucleoSet = true;\r
+               DB.m_IsNucleo = true;\r
+               }\r
+       else\r
+               {\r
+               DB.FromFasta(opt_db);\r
+               if (!DB.IsNucleo())\r
+                       Die("Database contains amino acid sequences");\r
+               }\r
+\r
+       vector<ChimeHit2> Hits;\r
+       unsigned HitCount = 0;\r
+       for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+               {\r
+               unsigned QuerySeqIndex = Order[i];\r
+\r
+               SeqData QSD;\r
+               Input.GetSeqData(QuerySeqIndex, QSD);\r
+\r
+               float QAb = -1.0;\r
+               if (g_UchimeDeNovo)\r
+                       QAb = GetAbFromLabel(QSD.Label);\r
+\r
+               ChimeHit2 Hit;\r
+               AlnParams &AP = *(AlnParams *) 0;\r
+               AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
+               HSPFinder &HF = *(HSPFinder *) 0;\r
+               bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
+               if (Found)\r
+                       ++HitCount;\r
+               else\r
+                       {\r
+                       if (g_UchimeDeNovo)\r
+                               DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
+                       }\r
+\r
+               WriteChimeHit(g_fUChime, Hit);\r
+\r
+               ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
+               }\r
+\r
+       Log("\n");\r
+       Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
+         opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
+\r
+       CloseStdioFile(g_fUChime);\r
+       CloseStdioFile(g_fUChimeAlns);\r
+\r
+       ProgressExit();\r
+       return 0;\r
+       }\r