1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.
\r
8 #include "hspfinder.h"
\r
11 #include "mothurout.h"
\r
13 bool SearchChime(Ultra &U, const SeqData &QSD, float QAb,
\r
14 const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,
\r
15 float MinFractId, ChimeHit2 &Hit);
\r
18 FILE *g_fUChimeAlns;
\r
19 const vector<float> *g_SortVecFloat;
\r
20 bool g_UchimeDeNovo = false;
\r
25 //printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);
\r
26 //printf("http://www.drive5.com/uchime\n");
\r
28 //printf("This software is donated to the public domain\n");
\r
38 Die("SetBLOSUM62 not implemented");
\r
41 void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)
\r
43 Die("ReadSubstMx not implemented");
\r
51 static bool CmpDescVecFloat(unsigned i, unsigned j)
\r
53 return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];
\r
56 void Range(vector<unsigned> &v, unsigned N)
\r
60 for (unsigned i = 0; i < N; ++i)
\r
64 void SortDescending(const vector<float> &Values, vector<unsigned> &Order)
\r
67 const unsigned N = SIZE(Values);
\r
69 g_SortVecFloat = &Values;
\r
70 sort(Order.begin(), Order.end(), CmpDescVecFloat);
\r
74 float GetAbFromLabel(const string &Label)
\r
76 vector<string> Fields;
\r
77 Split(Label, Fields, '/');
\r
78 const unsigned N = SIZE(Fields);
\r
79 for (unsigned i = 0; i < N; ++i)
\r
81 const string &Field = Fields[i];
\r
82 if (Field.substr(0, 3) == "ab=")
\r
84 string a = Field.substr(3, string::npos);
\r
85 return (float) atof(a.c_str());
\r
89 Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());
\r
93 int uchime_main(int argc, char *argv[])
\r
96 m = MothurOut::getInstance();
\r
98 MyCmdLine(argc, argv);
\r
108 printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);
\r
116 float MinFractId = 0.95f;
\r
118 MinFractId = (float) opt_id;
\r
120 Log("%8.2f minh\n", opt_minh);
\r
121 Log("%8.2f xn\n", opt_xn);
\r
122 Log("%8.2f dn\n", opt_dn);
\r
123 Log("%8.2f xa\n", opt_xa);
\r
124 Log("%8.2f mindiv\n", opt_mindiv);
\r
125 Log("%8u maxp\n", opt_maxp);
\r
127 if (opt_input == "" && opt_uchime != "")
\r
128 opt_input = opt_uchime;
\r
130 if (opt_input == "")
\r
131 Die("Missing --input");
\r
133 g_UchimeDeNovo = (opt_db == "");
\r
135 if (opt_uchimeout != "")
\r
136 g_fUChime = CreateStdioFile(opt_uchimeout);
\r
138 if (opt_uchimealns != "")
\r
139 g_fUChimeAlns = CreateStdioFile(opt_uchimealns);
\r
144 Input.FromFasta(opt_input);
\r
145 if (!Input.IsNucleo())
\r
146 Die("Input contains amino acid sequences");
\r
148 const unsigned QuerySeqCount = Input.GetSeqCount();
\r
149 vector<unsigned> Order;
\r
150 for (unsigned i = 0; i < QuerySeqCount; ++i)
\r
151 Order.push_back(i);
\r
153 if (g_UchimeDeNovo)
\r
156 for (unsigned i = 0; i < QuerySeqCount; ++i)
\r
158 const char *Label = Input.GetLabel(i);
\r
159 float Ab = GetAbFromLabel(Label);
\r
162 SortDescending(Abs, Order);
\r
163 DB.m_IsNucleoSet = true;
\r
164 DB.m_IsNucleo = true;
\r
168 DB.FromFasta(opt_db);
\r
169 if (!DB.IsNucleo())
\r
170 Die("Database contains amino acid sequences");
\r
173 vector<ChimeHit2> Hits;
\r
174 unsigned HitCount = 0;
\r
175 for (unsigned i = 0; i < QuerySeqCount; ++i)
\r
178 if (m->control_pressed) { break; }
\r
180 unsigned QuerySeqIndex = Order[i];
\r
183 Input.GetSeqData(QuerySeqIndex, QSD);
\r
186 if (g_UchimeDeNovo)
\r
187 QAb = GetAbFromLabel(QSD.Label);
\r
190 AlnParams &AP = *(AlnParams *) 0;
\r
191 AlnHeuristics &AH = *(AlnHeuristics *) 0;
\r
192 HSPFinder &HF = *(HSPFinder *) 0;
\r
193 bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);
\r
198 if (g_UchimeDeNovo)
\r
199 DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);
\r
202 WriteChimeHit(g_fUChime, Hit);
\r
204 //ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));
\r
206 if((i+1) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(i+1) + ", " + toString(HitCount) + " chimeras found."); m->mothurOutEndLine(); }
\r
208 if (!m->control_pressed) {
\r
210 if((QuerySeqCount) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(QuerySeqCount) + ", " + toString(HitCount) + " chimeras found."); m->mothurOutEndLine(); }
\r
214 Log("%s: %u/%u chimeras found (%.1f%%)\n",
\r
215 opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));
\r
217 CloseStdioFile(g_fUChime);
\r
218 CloseStdioFile(g_fUChimeAlns);
\r