]> git.donarmstrong.com Git - mothur.git/blob - uchime_main.cpp
40e7f44546668bb7d22d1a0b4e8800f29fc89e56
[mothur.git] / uchime_main.cpp
1 #include "myutils.h"\r
2 #include "chime.h"\r
3 #include "seqdb.h"\r
4 #include "dp.h"\r
5 #include "ultra.h"\r
6 #include "hspfinder.h"\r
7 #include <algorithm>\r
8 #include <set>\r
9 #include "mothurout.h"\r
10 \r
11 bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
12   const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
13   float MinFractId, ChimeHit2 &Hit);\r
14 \r
15 FILE *g_fUChime;\r
16 FILE *g_fUChimeAlns;\r
17 const vector<float> *g_SortVecFloat;\r
18 bool g_UchimeDeNovo = false;\r
19 \r
20 void Usage()\r
21         {\r
22         //printf("\n");\r
23         //printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
24         //printf("http://www.drive5.com/uchime\n");\r
25         //printf("\n");\r
26         //printf("This software is donated to the public domain\n");\r
27         //printf("\n");\r
28 \r
29         //printf(\r
30 //#include "help.h"\r
31                 //);\r
32         }\r
33 \r
34 void SetBLOSUM62()\r
35         {\r
36         Die("SetBLOSUM62 not implemented");\r
37         }\r
38 \r
39 void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
40         {\r
41         Die("ReadSubstMx not implemented");\r
42         }\r
43 \r
44 void LogAllocs()\r
45         {\r
46         /*empty*/\r
47         }\r
48 \r
49 static bool CmpDescVecFloat(unsigned i, unsigned j)\r
50         {\r
51         return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
52         }\r
53 \r
54 void Range(vector<unsigned> &v, unsigned N)\r
55         {\r
56         v.clear();\r
57         v.reserve(N);\r
58         for (unsigned i = 0; i < N; ++i)\r
59                 v.push_back(i);\r
60         }\r
61 \r
62 void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
63         {\r
64         StartTimer(Sort);\r
65         const unsigned N = SIZE(Values);\r
66         Range(Order, N);\r
67         g_SortVecFloat = &Values;\r
68         sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
69         EndTimer(Sort);\r
70         }\r
71 \r
72 float GetAbFromLabel(const string &Label)\r
73         {\r
74         vector<string> Fields;\r
75         Split(Label, Fields, '/');\r
76         const unsigned N = SIZE(Fields);\r
77         for (unsigned i = 0; i < N; ++i)\r
78                 {\r
79                 const string &Field = Fields[i];\r
80                 if (Field.substr(0, 3) == "ab=")\r
81                         {\r
82                         string a = Field.substr(3, string::npos);\r
83                         return (float) atof(a.c_str());\r
84                         }\r
85                 }\r
86         if (g_UchimeDeNovo)\r
87                 Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
88         return 0.0;\r
89         }\r
90 \r
91 int uchime_main(int argc, char *argv[])\r
92         {\r
93         MothurOut* m;\r
94         m = MothurOut::getInstance();\r
95                 \r
96         MyCmdLine(argc, argv);\r
97 \r
98         if (argc < 2)\r
99                 {\r
100                 Usage();\r
101                 return 0;\r
102                 }\r
103 \r
104         if (opt_version)\r
105                 {\r
106                 printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
107                 return 0;\r
108                 }\r
109 \r
110         //printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
111         //printf("by Robert C. Edgar\n");\r
112         //printf("http://drive5.com/uchime\n");\r
113         //printf("This code is donated to the public domain.\n");\r
114         //printf("\n");\r
115         if (!optset_w)\r
116                 opt_w = 8;\r
117         \r
118         float MinFractId = 0.95f;\r
119         if (optset_id)\r
120                 MinFractId = (float) opt_id;\r
121 \r
122         Log("%8.2f  minh\n", opt_minh);\r
123         Log("%8.2f  xn\n", opt_xn);\r
124         Log("%8.2f  dn\n", opt_dn);\r
125         Log("%8.2f  xa\n", opt_xa);\r
126         Log("%8.2f  mindiv\n", opt_mindiv);\r
127         Log("%8u  maxp\n", opt_maxp);\r
128 \r
129         if (opt_input == "" && opt_uchime != "")\r
130                 opt_input = opt_uchime;\r
131 \r
132         if (opt_input == "")\r
133                 Die("Missing --input");\r
134 \r
135         g_UchimeDeNovo = (opt_db == "");\r
136 \r
137         if (opt_uchimeout != "")\r
138                 g_fUChime = CreateStdioFile(opt_uchimeout);\r
139 \r
140         if (opt_uchimealns != "")\r
141                 g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
142 \r
143         SeqDB Input;\r
144         SeqDB DB;\r
145 \r
146         Input.FromFasta(opt_input);\r
147         if (!Input.IsNucleo())\r
148                 Die("Input contains amino acid sequences");\r
149 \r
150         const unsigned QuerySeqCount = Input.GetSeqCount();\r
151         vector<unsigned> Order;\r
152         for (unsigned i = 0; i < QuerySeqCount; ++i)\r
153                 Order.push_back(i);\r
154 \r
155         if (g_UchimeDeNovo)\r
156                 {\r
157                 vector<float> Abs;\r
158                 for (unsigned i = 0; i < QuerySeqCount; ++i)\r
159                         {\r
160                         const char *Label = Input.GetLabel(i);\r
161                         float Ab = GetAbFromLabel(Label);\r
162                         Abs.push_back(Ab);\r
163                         }\r
164                 SortDescending(Abs, Order);\r
165                 DB.m_IsNucleoSet = true;\r
166                 DB.m_IsNucleo = true;\r
167                 }\r
168         else\r
169                 {\r
170                 DB.FromFasta(opt_db);\r
171                 if (!DB.IsNucleo())\r
172                         Die("Database contains amino acid sequences");\r
173                 }\r
174 \r
175         vector<ChimeHit2> Hits;\r
176         unsigned HitCount = 0;\r
177         for (unsigned i = 0; i < QuerySeqCount; ++i)\r
178                 {\r
179                         \r
180                 if (m->control_pressed) { break; }\r
181                         \r
182                 unsigned QuerySeqIndex = Order[i];\r
183 \r
184                 SeqData QSD;\r
185                 Input.GetSeqData(QuerySeqIndex, QSD);\r
186 \r
187                 float QAb = -1.0;\r
188                 if (g_UchimeDeNovo)\r
189                         QAb = GetAbFromLabel(QSD.Label);\r
190 \r
191                 ChimeHit2 Hit;\r
192                 AlnParams &AP = *(AlnParams *) 0;\r
193                 AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
194                 HSPFinder &HF = *(HSPFinder *) 0;\r
195                 bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
196                 if (Found)\r
197                         ++HitCount;\r
198                 else\r
199                         {\r
200                         if (g_UchimeDeNovo)\r
201                                 DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
202                         }\r
203 \r
204                 WriteChimeHit(g_fUChime, Hit);\r
205 \r
206                 ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
207                 \r
208                 }\r
209 \r
210         Log("\n");\r
211         Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
212           opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
213 \r
214         CloseStdioFile(g_fUChime);\r
215         CloseStdioFile(g_fUChimeAlns);\r
216 \r
217         ProgressExit();\r
218         return 0;\r
219         }\r