--- /dev/null
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "seqdb.h"\r
+#include "dp.h"\r
+#include "ultra.h"\r
+#include "hspfinder.h"\r
+#include <algorithm>\r
+#include <set>\r
+\r
+bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
+ const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
+ float MinFractId, ChimeHit2 &Hit);\r
+\r
+FILE *g_fUChime;\r
+FILE *g_fUChimeAlns;\r
+const vector<float> *g_SortVecFloat;\r
+bool g_UchimeDeNovo = false;\r
+\r
+void Usage()\r
+ {\r
+ printf("\n");\r
+ printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
+ printf("http://www.drive5.com/uchime\n");\r
+ printf("\n");\r
+ printf("This software is donated to the public domain\n");\r
+ printf("\n");\r
+\r
+ printf(\r
+#include "help.h"\r
+ );\r
+ }\r
+\r
+void SetBLOSUM62()\r
+ {\r
+ Die("SetBLOSUM62 not implemented");\r
+ }\r
+\r
+void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
+ {\r
+ Die("ReadSubstMx not implemented");\r
+ }\r
+\r
+void LogAllocs()\r
+ {\r
+ /*empty*/\r
+ }\r
+\r
+static bool CmpDescVecFloat(unsigned i, unsigned j)\r
+ {\r
+ return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
+ }\r
+\r
+void Range(vector<unsigned> &v, unsigned N)\r
+ {\r
+ v.clear();\r
+ v.reserve(N);\r
+ for (unsigned i = 0; i < N; ++i)\r
+ v.push_back(i);\r
+ }\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
+ {\r
+ StartTimer(Sort);\r
+ const unsigned N = SIZE(Values);\r
+ Range(Order, N);\r
+ g_SortVecFloat = &Values;\r
+ sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
+ EndTimer(Sort);\r
+ }\r
+\r
+float GetAbFromLabel(const string &Label)\r
+ {\r
+ vector<string> Fields;\r
+ Split(Label, Fields, '/');\r
+ const unsigned N = SIZE(Fields);\r
+ for (unsigned i = 0; i < N; ++i)\r
+ {\r
+ const string &Field = Fields[i];\r
+ if (Field.substr(0, 3) == "ab=")\r
+ {\r
+ string a = Field.substr(3, string::npos);\r
+ return (float) atof(a.c_str());\r
+ }\r
+ }\r
+ if (g_UchimeDeNovo)\r
+ Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
+ return 0.0;\r
+ }\r
+\r
+int main(int argc, char *argv[])\r
+ {\r
+ \r
+ MyCmdLine(argc, argv);\r
+\r
+ if (argc < 2)\r
+ {\r
+ Usage();\r
+ return 0;\r
+ }\r
+\r
+ if (opt_version)\r
+ {\r
+ printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+ return 0;\r
+ }\r
+\r
+ printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+ printf("by Robert C. Edgar\n");\r
+ printf("http://drive5.com/uchime\n");\r
+ printf("This code is donated to the public domain.\n");\r
+ printf("\n");\r
+ if (!optset_w)\r
+ opt_w = 8;\r
+ \r
+ float MinFractId = 0.95f;\r
+ if (optset_id)\r
+ MinFractId = (float) opt_id;\r
+\r
+ Log("%8.2f minh\n", opt_minh);\r
+ Log("%8.2f xn\n", opt_xn);\r
+ Log("%8.2f dn\n", opt_dn);\r
+ Log("%8.2f xa\n", opt_xa);\r
+ Log("%8.2f mindiv\n", opt_mindiv);\r
+ Log("%8u maxp\n", opt_maxp);\r
+\r
+ if (opt_input == "" && opt_uchime != "")\r
+ opt_input = opt_uchime;\r
+\r
+ if (opt_input == "")\r
+ Die("Missing --input");\r
+\r
+ g_UchimeDeNovo = (opt_db == "");\r
+\r
+ if (opt_uchimeout != "")\r
+ g_fUChime = CreateStdioFile(opt_uchimeout);\r
+\r
+ if (opt_uchimealns != "")\r
+ g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
+\r
+ SeqDB Input;\r
+ SeqDB DB;\r
+\r
+ Input.FromFasta(opt_input);\r
+ if (!Input.IsNucleo())\r
+ Die("Input contains amino acid sequences");\r
+\r
+ const unsigned QuerySeqCount = Input.GetSeqCount();\r
+ vector<unsigned> Order;\r
+ for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+ Order.push_back(i);\r
+\r
+ if (g_UchimeDeNovo)\r
+ {\r
+ vector<float> Abs;\r
+ for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+ {\r
+ const char *Label = Input.GetLabel(i);\r
+ float Ab = GetAbFromLabel(Label);\r
+ Abs.push_back(Ab);\r
+ }\r
+ SortDescending(Abs, Order);\r
+ DB.m_IsNucleoSet = true;\r
+ DB.m_IsNucleo = true;\r
+ }\r
+ else\r
+ {\r
+ DB.FromFasta(opt_db);\r
+ if (!DB.IsNucleo())\r
+ Die("Database contains amino acid sequences");\r
+ }\r
+\r
+ vector<ChimeHit2> Hits;\r
+ unsigned HitCount = 0;\r
+ for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+ {\r
+ unsigned QuerySeqIndex = Order[i];\r
+\r
+ SeqData QSD;\r
+ Input.GetSeqData(QuerySeqIndex, QSD);\r
+\r
+ float QAb = -1.0;\r
+ if (g_UchimeDeNovo)\r
+ QAb = GetAbFromLabel(QSD.Label);\r
+\r
+ ChimeHit2 Hit;\r
+ AlnParams &AP = *(AlnParams *) 0;\r
+ AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
+ HSPFinder &HF = *(HSPFinder *) 0;\r
+ bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
+ if (Found)\r
+ ++HitCount;\r
+ else\r
+ {\r
+ if (g_UchimeDeNovo)\r
+ DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
+ }\r
+\r
+ WriteChimeHit(g_fUChime, Hit);\r
+\r
+ ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
+ }\r
+\r
+ Log("\n");\r
+ Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
+ opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
+\r
+ CloseStdioFile(g_fUChime);\r
+ CloseStdioFile(g_fUChimeAlns);\r
+\r
+ ProgressExit();\r
+ return 0;\r
+ }\r