]> git.donarmstrong.com Git - mothur.git/blob - uchime_src/uchime_main.cpp
changes while testing
[mothur.git] / uchime_src / uchime_main.cpp
1 #include "myutils.h"\r
2 #include "chime.h"\r
3 #include "seqdb.h"\r
4 #include "dp.h"\r
5 #include "ultra.h"\r
6 #include "hspfinder.h"\r
7 #include <algorithm>\r
8 #include <set>\r
9 \r
10 bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
11   const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
12   float MinFractId, ChimeHit2 &Hit);\r
13 \r
14 FILE *g_fUChime;\r
15 FILE *g_fUChimeAlns;\r
16 const vector<float> *g_SortVecFloat;\r
17 bool g_UchimeDeNovo = false;\r
18 \r
19 void Usage()\r
20         {\r
21         printf("\n");\r
22         printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
23         printf("http://www.drive5.com/uchime\n");\r
24         printf("\n");\r
25         printf("This software is donated to the public domain\n");\r
26         printf("\n");\r
27 \r
28         printf(\r
29 #include "help.h"\r
30                 );\r
31         }\r
32 \r
33 void SetBLOSUM62()\r
34         {\r
35         Die("SetBLOSUM62 not implemented");\r
36         }\r
37 \r
38 void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
39         {\r
40         Die("ReadSubstMx not implemented");\r
41         }\r
42 \r
43 void LogAllocs()\r
44         {\r
45         /*empty*/\r
46         }\r
47 \r
48 static bool CmpDescVecFloat(unsigned i, unsigned j)\r
49         {\r
50         return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
51         }\r
52 \r
53 void Range(vector<unsigned> &v, unsigned N)\r
54         {\r
55         v.clear();\r
56         v.reserve(N);\r
57         for (unsigned i = 0; i < N; ++i)\r
58                 v.push_back(i);\r
59         }\r
60 \r
61 void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
62         {\r
63         StartTimer(Sort);\r
64         const unsigned N = SIZE(Values);\r
65         Range(Order, N);\r
66         g_SortVecFloat = &Values;\r
67         sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
68         EndTimer(Sort);\r
69         }\r
70 \r
71 float GetAbFromLabel(const string &Label)\r
72         {\r
73         vector<string> Fields;\r
74         Split(Label, Fields, '/');\r
75         const unsigned N = SIZE(Fields);\r
76         for (unsigned i = 0; i < N; ++i)\r
77                 {\r
78                 const string &Field = Fields[i];\r
79                 if (Field.substr(0, 3) == "ab=")\r
80                         {\r
81                         string a = Field.substr(3, string::npos);\r
82                         return (float) atof(a.c_str());\r
83                         }\r
84                 }\r
85         if (g_UchimeDeNovo)\r
86                 Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
87         return 0.0;\r
88         }\r
89 \r
90 int main(int argc, char *argv[])\r
91         {\r
92                 \r
93         MyCmdLine(argc, argv);\r
94 \r
95         if (argc < 2)\r
96                 {\r
97                 Usage();\r
98                 return 0;\r
99                 }\r
100 \r
101         if (opt_version)\r
102                 {\r
103                 printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
104                 return 0;\r
105                 }\r
106 \r
107         printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
108         printf("by Robert C. Edgar\n");\r
109         printf("http://drive5.com/uchime\n");\r
110         printf("This code is donated to the public domain.\n");\r
111         printf("\n");\r
112         if (!optset_w)\r
113                 opt_w = 8;\r
114         \r
115         float MinFractId = 0.95f;\r
116         if (optset_id)\r
117                 MinFractId = (float) opt_id;\r
118 \r
119         Log("%8.2f  minh\n", opt_minh);\r
120         Log("%8.2f  xn\n", opt_xn);\r
121         Log("%8.2f  dn\n", opt_dn);\r
122         Log("%8.2f  xa\n", opt_xa);\r
123         Log("%8.2f  mindiv\n", opt_mindiv);\r
124         Log("%8u  maxp\n", opt_maxp);\r
125 \r
126         if (opt_input == "" && opt_uchime != "")\r
127                 opt_input = opt_uchime;\r
128 \r
129         if (opt_input == "")\r
130                 Die("Missing --input");\r
131 \r
132         g_UchimeDeNovo = (opt_db == "");\r
133 \r
134         if (opt_uchimeout != "")\r
135                 g_fUChime = CreateStdioFile(opt_uchimeout);\r
136 \r
137         if (opt_uchimealns != "")\r
138                 g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
139 \r
140         SeqDB Input;\r
141         SeqDB DB;\r
142 \r
143         Input.FromFasta(opt_input);\r
144         if (!Input.IsNucleo())\r
145                 Die("Input contains amino acid sequences");\r
146 \r
147         const unsigned QuerySeqCount = Input.GetSeqCount();\r
148         vector<unsigned> Order;\r
149         for (unsigned i = 0; i < QuerySeqCount; ++i)\r
150                 Order.push_back(i);\r
151 \r
152         if (g_UchimeDeNovo)\r
153                 {\r
154                 vector<float> Abs;\r
155                 for (unsigned i = 0; i < QuerySeqCount; ++i)\r
156                         {\r
157                         const char *Label = Input.GetLabel(i);\r
158                         float Ab = GetAbFromLabel(Label);\r
159                         Abs.push_back(Ab);\r
160                         }\r
161                 SortDescending(Abs, Order);\r
162                 DB.m_IsNucleoSet = true;\r
163                 DB.m_IsNucleo = true;\r
164                 }\r
165         else\r
166                 {\r
167                 DB.FromFasta(opt_db);\r
168                 if (!DB.IsNucleo())\r
169                         Die("Database contains amino acid sequences");\r
170                 }\r
171 \r
172         vector<ChimeHit2> Hits;\r
173         unsigned HitCount = 0;\r
174         for (unsigned i = 0; i < QuerySeqCount; ++i)\r
175                 {\r
176                 unsigned QuerySeqIndex = Order[i];\r
177 \r
178                 SeqData QSD;\r
179                 Input.GetSeqData(QuerySeqIndex, QSD);\r
180 \r
181                 float QAb = -1.0;\r
182                 if (g_UchimeDeNovo)\r
183                         QAb = GetAbFromLabel(QSD.Label);\r
184 \r
185                 ChimeHit2 Hit;\r
186                 AlnParams &AP = *(AlnParams *) 0;\r
187                 AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
188                 HSPFinder &HF = *(HSPFinder *) 0;\r
189                 bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
190                 if (Found)\r
191                         ++HitCount;\r
192                 else\r
193                         {\r
194                         if (g_UchimeDeNovo)\r
195                                 DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
196                         }\r
197 \r
198                 WriteChimeHit(g_fUChime, Hit);\r
199 \r
200                 ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
201                 }\r
202 \r
203         Log("\n");\r
204         Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
205           opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
206 \r
207         CloseStdioFile(g_fUChime);\r
208         CloseStdioFile(g_fUChimeAlns);\r
209 \r
210         ProgressExit();\r
211         return 0;\r
212         }\r