6 #include "hspfinder.h"
\r
10 bool SearchChime(Ultra &U, const SeqData &QSD, float QAb,
\r
11 const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,
\r
12 float MinFractId, ChimeHit2 &Hit);
\r
15 FILE *g_fUChimeAlns;
\r
16 const vector<float> *g_SortVecFloat;
\r
17 bool g_UchimeDeNovo = false;
\r
22 printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);
\r
23 printf("http://www.drive5.com/uchime\n");
\r
25 printf("This software is donated to the public domain\n");
\r
35 Die("SetBLOSUM62 not implemented");
\r
38 void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)
\r
40 Die("ReadSubstMx not implemented");
\r
48 static bool CmpDescVecFloat(unsigned i, unsigned j)
\r
50 return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];
\r
53 void Range(vector<unsigned> &v, unsigned N)
\r
57 for (unsigned i = 0; i < N; ++i)
\r
61 void SortDescending(const vector<float> &Values, vector<unsigned> &Order)
\r
64 const unsigned N = SIZE(Values);
\r
66 g_SortVecFloat = &Values;
\r
67 sort(Order.begin(), Order.end(), CmpDescVecFloat);
\r
71 float GetAbFromLabel(const string &Label)
\r
73 vector<string> Fields;
\r
74 Split(Label, Fields, '/');
\r
75 const unsigned N = SIZE(Fields);
\r
76 for (unsigned i = 0; i < N; ++i)
\r
78 const string &Field = Fields[i];
\r
79 if (Field.substr(0, 3) == "ab=")
\r
81 string a = Field.substr(3, string::npos);
\r
82 return (float) atof(a.c_str());
\r
86 Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());
\r
90 int uchime_main(int argc, char *argv[])
\r
93 MyCmdLine(argc, argv);
\r
103 printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);
\r
107 printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);
\r
108 printf("by Robert C. Edgar\n");
\r
109 printf("http://drive5.com/uchime\n");
\r
110 printf("This code is donated to the public domain.\n");
\r
115 float MinFractId = 0.95f;
\r
117 MinFractId = (float) opt_id;
\r
119 Log("%8.2f minh\n", opt_minh);
\r
120 Log("%8.2f xn\n", opt_xn);
\r
121 Log("%8.2f dn\n", opt_dn);
\r
122 Log("%8.2f xa\n", opt_xa);
\r
123 Log("%8.2f mindiv\n", opt_mindiv);
\r
124 Log("%8u maxp\n", opt_maxp);
\r
126 if (opt_input == "" && opt_uchime != "")
\r
127 opt_input = opt_uchime;
\r
129 if (opt_input == "")
\r
130 Die("Missing --input");
\r
132 g_UchimeDeNovo = (opt_db == "");
\r
134 if (opt_uchimeout != "")
\r
135 g_fUChime = CreateStdioFile(opt_uchimeout);
\r
137 if (opt_uchimealns != "")
\r
138 g_fUChimeAlns = CreateStdioFile(opt_uchimealns);
\r
143 Input.FromFasta(opt_input);
\r
144 if (!Input.IsNucleo())
\r
145 Die("Input contains amino acid sequences");
\r
147 const unsigned QuerySeqCount = Input.GetSeqCount();
\r
148 vector<unsigned> Order;
\r
149 for (unsigned i = 0; i < QuerySeqCount; ++i)
\r
150 Order.push_back(i);
\r
152 if (g_UchimeDeNovo)
\r
155 for (unsigned i = 0; i < QuerySeqCount; ++i)
\r
157 const char *Label = Input.GetLabel(i);
\r
158 float Ab = GetAbFromLabel(Label);
\r
161 SortDescending(Abs, Order);
\r
162 DB.m_IsNucleoSet = true;
\r
163 DB.m_IsNucleo = true;
\r
167 DB.FromFasta(opt_db);
\r
168 if (!DB.IsNucleo())
\r
169 Die("Database contains amino acid sequences");
\r
172 vector<ChimeHit2> Hits;
\r
173 unsigned HitCount = 0;
\r
174 for (unsigned i = 0; i < QuerySeqCount; ++i)
\r
176 unsigned QuerySeqIndex = Order[i];
\r
179 Input.GetSeqData(QuerySeqIndex, QSD);
\r
182 if (g_UchimeDeNovo)
\r
183 QAb = GetAbFromLabel(QSD.Label);
\r
186 AlnParams &AP = *(AlnParams *) 0;
\r
187 AlnHeuristics &AH = *(AlnHeuristics *) 0;
\r
188 HSPFinder &HF = *(HSPFinder *) 0;
\r
189 bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);
\r
194 if (g_UchimeDeNovo)
\r
195 DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);
\r
198 WriteChimeHit(g_fUChime, Hit);
\r
200 ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));
\r
204 Log("%s: %u/%u chimeras found (%.1f%%)\n",
\r
205 opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));
\r
207 CloseStdioFile(g_fUChime);
\r
208 CloseStdioFile(g_fUChimeAlns);
\r