+++ /dev/null
-#include "myutils.h"\r
-#include "chime.h"\r
-#include "seqdb.h"\r
-#include "dp.h"\r
-#include "ultra.h"\r
-#include "hspfinder.h"\r
-#include <algorithm>\r
-#include <set>\r
-\r
-bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
- const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
- float MinFractId, ChimeHit2 &Hit);\r
-\r
-FILE *g_fUChime;\r
-FILE *g_fUChimeAlns;\r
-const vector<float> *g_SortVecFloat;\r
-bool g_UchimeDeNovo = false;\r
-\r
-void Usage()\r
- {\r
- printf("\n");\r
- printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
- printf("http://www.drive5.com/uchime\n");\r
- printf("\n");\r
- printf("This software is donated to the public domain\n");\r
- printf("\n");\r
-\r
- printf(\r
-#include "help.h"\r
- );\r
- }\r
-\r
-void SetBLOSUM62()\r
- {\r
- Die("SetBLOSUM62 not implemented");\r
- }\r
-\r
-void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
- {\r
- Die("ReadSubstMx not implemented");\r
- }\r
-\r
-void LogAllocs()\r
- {\r
- /*empty*/\r
- }\r
-\r
-static bool CmpDescVecFloat(unsigned i, unsigned j)\r
- {\r
- return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
- }\r
-\r
-void Range(vector<unsigned> &v, unsigned N)\r
- {\r
- v.clear();\r
- v.reserve(N);\r
- for (unsigned i = 0; i < N; ++i)\r
- v.push_back(i);\r
- }\r
-\r
-void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
- {\r
- StartTimer(Sort);\r
- const unsigned N = SIZE(Values);\r
- Range(Order, N);\r
- g_SortVecFloat = &Values;\r
- sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
- EndTimer(Sort);\r
- }\r
-\r
-float GetAbFromLabel(const string &Label)\r
- {\r
- vector<string> Fields;\r
- Split(Label, Fields, '/');\r
- const unsigned N = SIZE(Fields);\r
- for (unsigned i = 0; i < N; ++i)\r
- {\r
- const string &Field = Fields[i];\r
- if (Field.substr(0, 3) == "ab=")\r
- {\r
- string a = Field.substr(3, string::npos);\r
- return (float) atof(a.c_str());\r
- }\r
- }\r
- if (g_UchimeDeNovo)\r
- Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
- return 0.0;\r
- }\r
-\r
-int uchime_main(int argc, char *argv[])\r
- {\r
- \r
- MyCmdLine(argc, argv);\r
-\r
- if (argc < 2)\r
- {\r
- Usage();\r
- return 0;\r
- }\r
-\r
- if (opt_version)\r
- {\r
- printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
- return 0;\r
- }\r
-\r
- printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
- printf("by Robert C. Edgar\n");\r
- printf("http://drive5.com/uchime\n");\r
- printf("This code is donated to the public domain.\n");\r
- printf("\n");\r
- if (!optset_w)\r
- opt_w = 8;\r
- \r
- float MinFractId = 0.95f;\r
- if (optset_id)\r
- MinFractId = (float) opt_id;\r
-\r
- Log("%8.2f minh\n", opt_minh);\r
- Log("%8.2f xn\n", opt_xn);\r
- Log("%8.2f dn\n", opt_dn);\r
- Log("%8.2f xa\n", opt_xa);\r
- Log("%8.2f mindiv\n", opt_mindiv);\r
- Log("%8u maxp\n", opt_maxp);\r
-\r
- if (opt_input == "" && opt_uchime != "")\r
- opt_input = opt_uchime;\r
-\r
- if (opt_input == "")\r
- Die("Missing --input");\r
-\r
- g_UchimeDeNovo = (opt_db == "");\r
-\r
- if (opt_uchimeout != "")\r
- g_fUChime = CreateStdioFile(opt_uchimeout);\r
-\r
- if (opt_uchimealns != "")\r
- g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
-\r
- SeqDB Input;\r
- SeqDB DB;\r
-\r
- Input.FromFasta(opt_input);\r
- if (!Input.IsNucleo())\r
- Die("Input contains amino acid sequences");\r
-\r
- const unsigned QuerySeqCount = Input.GetSeqCount();\r
- vector<unsigned> Order;\r
- for (unsigned i = 0; i < QuerySeqCount; ++i)\r
- Order.push_back(i);\r
-\r
- if (g_UchimeDeNovo)\r
- {\r
- vector<float> Abs;\r
- for (unsigned i = 0; i < QuerySeqCount; ++i)\r
- {\r
- const char *Label = Input.GetLabel(i);\r
- float Ab = GetAbFromLabel(Label);\r
- Abs.push_back(Ab);\r
- }\r
- SortDescending(Abs, Order);\r
- DB.m_IsNucleoSet = true;\r
- DB.m_IsNucleo = true;\r
- }\r
- else\r
- {\r
- DB.FromFasta(opt_db);\r
- if (!DB.IsNucleo())\r
- Die("Database contains amino acid sequences");\r
- }\r
-\r
- vector<ChimeHit2> Hits;\r
- unsigned HitCount = 0;\r
- for (unsigned i = 0; i < QuerySeqCount; ++i)\r
- {\r
- unsigned QuerySeqIndex = Order[i];\r
-\r
- SeqData QSD;\r
- Input.GetSeqData(QuerySeqIndex, QSD);\r
-\r
- float QAb = -1.0;\r
- if (g_UchimeDeNovo)\r
- QAb = GetAbFromLabel(QSD.Label);\r
-\r
- ChimeHit2 Hit;\r
- AlnParams &AP = *(AlnParams *) 0;\r
- AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
- HSPFinder &HF = *(HSPFinder *) 0;\r
- bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
- if (Found)\r
- ++HitCount;\r
- else\r
- {\r
- if (g_UchimeDeNovo)\r
- DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
- }\r
-\r
- WriteChimeHit(g_fUChime, Hit);\r
-\r
- ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
- }\r
-\r
- Log("\n");\r
- Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
- opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
-\r
- CloseStdioFile(g_fUChime);\r
- CloseStdioFile(g_fUChimeAlns);\r
-\r
- ProgressExit();\r
- return 0;\r
- }\r