]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.h
added topdown parameter to pre.cluster. added more debugging output to bayesian...
[mothur.git] / chimerauchimecommand.h
1 #ifndef CHIMERAUCHIMECOMMAND_H
2 #define CHIMERAUCHIMECOMMAND_H
3
4
5 /*
6  *  chimerauchimecommand.h
7  *  Mothur
8  *
9  *  Created by westcott on 5/13/11.
10  *  Copyright 2011 Schloss Lab. All rights reserved.
11  *
12  */
13
14 #include "mothur.h"
15 #include "command.hpp"
16 #include "sequenceparser.h"
17 #include "counttable.h"
18 #include "sequencecountparser.h"
19
20 /***********************************************************/
21
22 class ChimeraUchimeCommand : public Command {
23 public:
24         ChimeraUchimeCommand(string);
25         ChimeraUchimeCommand();
26         ~ChimeraUchimeCommand() {}
27         
28         vector<string> setParameters();
29         string getCommandName()                 { return "chimera.uchime";              }
30         string getCommandCategory()             { return "Sequence Processing"; }
31         
32         string getHelpString(); 
33     string getOutputPattern(string);    
34         string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code was donated to the public domain.\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection.  Bioinformatics 27:2194.\nhttp://www.mothur.org/wiki/Chimera.uchime\n"; }
35         string getDescription()         { return "detect chimeric sequences"; }
36         
37         int execute(); 
38         void help() { m->mothurOut(getHelpString()); }          
39         
40 private:
41         struct linePair {
42                 int start;
43                 int end;
44                 linePair(int i, int j) : start(i), end(j) {}
45         };
46         
47         vector<int> processIDS;   //processid
48         int driver(string, string, string, string, int&);
49         int createProcesses(string, string, string, string, int&);
50                 
51         bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName, dups;
52         string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation, strand;
53         int processors;
54         
55         SequenceParser* sparser;
56     SequenceCountParser* cparser;
57         vector<string> outputNames;
58         vector<string> fastaFileNames;
59         vector<string> nameFileNames;
60         vector<string> groupFileNames;
61         
62         string getNamesFile(string&);
63         int readFasta(string, map<string, string>&);
64         int printFile(vector<seqPriorityNode>&, string);
65         int deconvoluteResults(map<string, string>&, string, string, string);
66         int driverGroups(string, string, string, string, int, int, vector<string>);
67         int createProcessesGroups(string, string, string, string, vector<string>, string, string, string);
68     int prepFile(string filename, string);
69
70
71 };
72
73 /***********************************************************/
74 /**************************************************************************************************/
75 //custom data structure for threads to use.
76 // This is passed by void pointer so it can be any data type
77 // that can be passed using a single void pointer (LPVOID).
78 struct uchimeData {
79         string fastafile; 
80         string namefile; 
81         string groupfile;
82         string outputFName;
83         string accnos, alns, filename, templatefile, uchimeLocation;
84         MothurOut* m;
85         int start;
86         int end;
87         int threadID, count, numChimeras;
88         vector<string> groups;
89         bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
90         string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
91         
92         uchimeData(){}
93         uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
94                 fastafile = f;
95                 namefile = n;
96                 groupfile = g;
97                 filename = file;
98                 outputFName = o;
99                 templatefile = t;
100                 accnos = ac;
101                 alns = al;
102                 m = mout;
103                 start = st;
104                 end = en;
105                 threadID = tid;
106                 groups = gr;
107                 count = 0;
108                 numChimeras = 0;
109         uchimeLocation = uloc;
110         }
111         void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
112                 useAbskew = Abskew;
113                 chimealns = calns;
114                 useMinH = MinH;
115                 useMindiv = Mindiv;
116                 useXn = Xn;
117                 useDn = Dn;
118                 useXa = Xa;
119                 useChunks = Chunks;
120                 useMinchunk = Minchunk;
121                 useIdsmoothwindow = Idsmoothwindow;
122                 useMinsmoothid = Minsmoothid;
123                 useMaxp = Maxp;
124                 skipgaps = skipgap;
125                 skipgaps2 = skipgap2;
126                 useMinlen = Minlen;
127                 useMaxlen = Maxlen;
128                 ucl = uc;
129                 useQueryfract = Queryfract;
130         hasCount = hc;
131         }
132         
133         void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
134                 abskew = abske;
135                 minh = min;
136                 mindiv = mindi;
137         strand = stra;
138                 xn = x;
139                 dn = d;
140                 xa = xa2;
141                 chunks = chunk;
142                 minchunk = minchun;
143                 idsmoothwindow = idsmoothwindo;
144                 minsmoothid = minsmoothi;
145                 maxp = max;
146                 minlen = minle;
147                 maxlen = maxle;
148                 queryfract = queryfrac;
149         }
150 };
151
152 /**************************************************************************************************/
153 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
154 #else
155 static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ 
156         uchimeData* pDataArray;
157         pDataArray = (uchimeData*)lpParam;
158         
159         try {
160                 
161                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
162                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
163                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
164                 
165                 //clears files
166                 ofstream out, out1, out2;
167                 pDataArray->m->openOutputFile(pDataArray->outputFName, out); out.close(); 
168                 pDataArray->m->openOutputFile(pDataArray->accnos, out1); out1.close();
169                 if (pDataArray->chimealns) { pDataArray->m->openOutputFile(pDataArray->alns, out2); out2.close(); }
170                 
171                 //parse fasta and name file by group
172                 SequenceParser* parser;
173         SequenceCountParser* cparser;
174                 if (pDataArray->hasCount) {
175             CountTable* ct = new CountTable();
176             ct->readTable(pDataArray->namefile);
177             cparser = new SequenceCountParser(pDataArray->fastafile, *ct);
178             delete ct;
179         }else {
180             if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);  }
181             else                                                        { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                                            }
182         }
183                 
184                 int totalSeqs = 0;
185                 int numChimeras = 0;
186                 
187                 for (int i = pDataArray->start; i < pDataArray->end; i++) {
188                         int start = time(NULL);  if (pDataArray->m->control_pressed) {  if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
189                         
190             
191                         int error;
192             if (pDataArray->hasCount) { 
193                 error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete cparser; return 0; }
194             }else {
195                error = parser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete parser; return 0; } 
196             }
197                         
198                         //int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
199                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
200                         
201                         //to allow for spaces in the path
202                         string outputFName = "\"" + pDataArray->outputFName+pDataArray->groups[i] + "\"";
203                         string filename = "\"" + pDataArray->filename + "\"";
204                         string alns = "\"" + pDataArray->alns+pDataArray->groups[i] + "\"";
205                         string accnos = pDataArray->accnos+pDataArray->groups[i];
206                         
207                         vector<char*> cPara;
208                         
209             string uchimeCommand = pDataArray->uchimeLocation;
210             uchimeCommand = "\"" + uchimeCommand + "\"";
211                         
212                         char* tempUchime;
213                         tempUchime= new char[uchimeCommand.length()+1]; 
214                         *tempUchime = '\0';
215                         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
216                         cPara.push_back(tempUchime);
217                         
218                         char* tempIn = new char[8]; 
219                         *tempIn = '\0'; strncat(tempIn, "--input", 7);
220                         //strcpy(tempIn, "--input"); 
221                         cPara.push_back(tempIn);
222                         char* temp = new char[filename.length()+1];
223                         *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
224                         //strcpy(temp, filename.c_str());
225                         cPara.push_back(temp);
226                         
227                         char* tempO = new char[12]; 
228                         *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
229                         //strcpy(tempO, "--uchimeout"); 
230                         cPara.push_back(tempO);
231                         char* tempout = new char[outputFName.length()+1];
232                         //strcpy(tempout, outputFName.c_str());
233                         *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
234                         cPara.push_back(tempout);
235                         
236                         if (pDataArray->chimealns) {
237                                 char* tempA = new char[13]; 
238                                 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
239                                 //strcpy(tempA, "--uchimealns"); 
240                                 cPara.push_back(tempA);
241                                 char* tempa = new char[alns.length()+1];
242                                 //strcpy(tempa, alns.c_str());
243                                 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
244                                 cPara.push_back(tempa);
245                         }
246                         
247             if (pDataArray->strand != "") {
248                 char* tempA = new char[9]; 
249                 *tempA = '\0'; strncat(tempA, "--strand", 8);
250                 cPara.push_back(tempA);
251                 char* tempa = new char[pDataArray->strand.length()+1];
252                 *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
253                 cPara.push_back(tempa);
254             }
255             
256                         if (pDataArray->useAbskew) {
257                                 char* tempskew = new char[9];
258                                 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
259                                 //strcpy(tempskew, "--abskew"); 
260                                 cPara.push_back(tempskew);
261                                 char* tempSkew = new char[pDataArray->abskew.length()+1];
262                                 //strcpy(tempSkew, abskew.c_str());
263                                 *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
264                                 cPara.push_back(tempSkew);
265                         }
266                         
267                         if (pDataArray->useMinH) {
268                                 char* tempminh = new char[7]; 
269                                 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
270                                 //strcpy(tempminh, "--minh"); 
271                                 cPara.push_back(tempminh);
272                                 char* tempMinH = new char[pDataArray->minh.length()+1];
273                                 *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
274                                 //strcpy(tempMinH, minh.c_str());
275                                 cPara.push_back(tempMinH);
276                         }
277                         
278                         if (pDataArray->useMindiv) {
279                                 char* tempmindiv = new char[9]; 
280                                 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
281                                 //strcpy(tempmindiv, "--mindiv"); 
282                                 cPara.push_back(tempmindiv);
283                                 char* tempMindiv = new char[pDataArray->mindiv.length()+1];
284                                 *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
285                                 //strcpy(tempMindiv, mindiv.c_str());
286                                 cPara.push_back(tempMindiv);
287                         }
288                         
289                         if (pDataArray->useXn) {
290                                 char* tempxn = new char[5]; 
291                                 //strcpy(tempxn, "--xn"); 
292                                 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
293                                 cPara.push_back(tempxn);
294                                 char* tempXn = new char[pDataArray->xn.length()+1];
295                                 //strcpy(tempXn, xn.c_str());
296                                 *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
297                                 cPara.push_back(tempXn);
298                         }
299                         
300                         if (pDataArray->useDn) {
301                                 char* tempdn = new char[5]; 
302                                 //strcpy(tempdn, "--dn"); 
303                                 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
304                                 cPara.push_back(tempdn);
305                                 char* tempDn = new char[pDataArray->dn.length()+1];
306                                 *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
307                                 //strcpy(tempDn, dn.c_str());
308                                 cPara.push_back(tempDn);
309                         }
310                         
311                         if (pDataArray->useXa) {
312                                 char* tempxa = new char[5]; 
313                                 //strcpy(tempxa, "--xa"); 
314                                 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
315                                 cPara.push_back(tempxa);
316                                 char* tempXa = new char[pDataArray->xa.length()+1];
317                                 *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
318                                 //strcpy(tempXa, xa.c_str());
319                                 cPara.push_back(tempXa);
320                         }
321                         
322                         if (pDataArray->useChunks) {
323                                 char* tempchunks = new char[9]; 
324                                 //strcpy(tempchunks, "--chunks"); 
325                                 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
326                                 cPara.push_back(tempchunks);
327                                 char* tempChunks = new char[pDataArray->chunks.length()+1];
328                                 *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
329                                 //strcpy(tempChunks, chunks.c_str());
330                                 cPara.push_back(tempChunks);
331                         }
332                         
333                         if (pDataArray->useMinchunk) {
334                                 char* tempminchunk = new char[11]; 
335                                 //strcpy(tempminchunk, "--minchunk"); 
336                                 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
337                                 cPara.push_back(tempminchunk);
338                                 char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
339                                 *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
340                                 //strcpy(tempMinchunk, minchunk.c_str());
341                                 cPara.push_back(tempMinchunk);
342                         }
343                         
344                         if (pDataArray->useIdsmoothwindow) {
345                                 char* tempidsmoothwindow = new char[17]; 
346                                 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
347                                 //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
348                                 cPara.push_back(tempidsmoothwindow);
349                                 char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
350                                 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
351                                 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
352                                 cPara.push_back(tempIdsmoothwindow);
353                         }
354                         
355                         if (pDataArray->useMaxp) {
356                                 char* tempmaxp = new char[7]; 
357                                 //strcpy(tempmaxp, "--maxp"); 
358                                 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
359                                 cPara.push_back(tempmaxp);
360                                 char* tempMaxp = new char[pDataArray->maxp.length()+1];
361                                 *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
362                                 //strcpy(tempMaxp, maxp.c_str());
363                                 cPara.push_back(tempMaxp);
364                         }
365                         
366                         if (!pDataArray->skipgaps) {
367                                 char* tempskipgaps = new char[13]; 
368                                 //strcpy(tempskipgaps, "--[no]skipgaps");
369                                 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
370                                 cPara.push_back(tempskipgaps);
371                         }
372                         
373                         if (!pDataArray->skipgaps2) {
374                                 char* tempskipgaps2 = new char[14]; 
375                                 //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
376                                 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
377                                 cPara.push_back(tempskipgaps2);
378                         }
379                         
380                         if (pDataArray->useMinlen) {
381                                 char* tempminlen = new char[9]; 
382                                 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
383                                 //strcpy(tempminlen, "--minlen"); 
384                                 cPara.push_back(tempminlen);
385                                 char* tempMinlen = new char[pDataArray->minlen.length()+1];
386                                 //strcpy(tempMinlen, minlen.c_str());
387                                 *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
388                                 cPara.push_back(tempMinlen);
389                         }
390                         
391                         if (pDataArray->useMaxlen) {
392                                 char* tempmaxlen = new char[9]; 
393                                 //strcpy(tempmaxlen, "--maxlen"); 
394                                 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
395                                 cPara.push_back(tempmaxlen);
396                                 char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
397                                 *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
398                                 //strcpy(tempMaxlen, maxlen.c_str());
399                                 cPara.push_back(tempMaxlen);
400                         }
401                         
402                         if (pDataArray->ucl) {
403                                 char* tempucl = new char[5]; 
404                                 strcpy(tempucl, "--ucl"); 
405                                 cPara.push_back(tempucl);
406                         }
407                         
408                         if (pDataArray->useQueryfract) {
409                                 char* tempqueryfract = new char[13]; 
410                                 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
411                                 //strcpy(tempqueryfract, "--queryfract"); 
412                                 cPara.push_back(tempqueryfract);
413                                 char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
414                                 *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
415                                 //strcpy(tempQueryfract, queryfract.c_str());
416                                 cPara.push_back(tempQueryfract);
417                         }
418                         
419                         
420                         char** uchimeParameters;
421                         uchimeParameters = new char*[cPara.size()];
422                         string commandString = "";
423                         for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
424                         //int numArgs = cPara.size();
425                         
426                         //uchime_main(numArgs, uchimeParameters); 
427                         //cout << "commandString = " << commandString << endl;
428                         commandString = "\"" + commandString + "\"";
429             
430             if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
431             
432                         system(commandString.c_str());
433                         
434                         //free memory
435                         for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
436                         delete[] uchimeParameters; 
437                         
438                         //remove "" from filenames
439                         outputFName = outputFName.substr(1, outputFName.length()-2);
440                         filename = filename.substr(1, filename.length()-2);
441                         alns = alns.substr(1, alns.length()-2);
442                         
443                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
444                         
445                         //create accnos file from uchime results
446                         ifstream in; 
447                         pDataArray->m->openInputFile(outputFName, in);
448                         
449                         ofstream out;
450                         pDataArray->m->openOutputFile(accnos, out);
451                         
452                         int num = 0;
453                         numChimeras = 0;
454                         while(!in.eof()) {
455                                 
456                                 if (pDataArray->m->control_pressed) { break; }
457                                 
458                                 string name = "";
459                                 string chimeraFlag = "";
460                                 in >> chimeraFlag >> name;
461                                 
462                                 //fix name 
463                                 name = name.substr(0, name.length()-1); //rip off last /
464                                 name = name.substr(0, name.find_last_of('/'));
465                                 
466                                 for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
467                                 pDataArray->m->gobble(in);
468                                 
469                                 if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
470                                 num++;
471                         }
472                         in.close();
473                         out.close();
474                         
475                         
476                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
477                         totalSeqs += num;
478                         pDataArray->numChimeras += numChimeras;
479                         
480                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
481                         
482                         //remove file made for uchime
483                         pDataArray->m->mothurRemove(filename);
484                         
485                         //append files
486                         pDataArray->m->appendFiles(outputFName, pDataArray->outputFName); pDataArray->m->mothurRemove(outputFName);
487                         pDataArray->m->appendFiles(accnos, pDataArray->accnos); pDataArray->m->mothurRemove(accnos);
488                         if (pDataArray->chimealns) { pDataArray->m->appendFiles(alns, pDataArray->alns); pDataArray->m->mothurRemove(alns); }
489                         
490                         pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(num) + " sequences from group " + pDataArray->groups[i] + ".");    pDataArray->m->mothurOutEndLine();                                      
491                         
492                 }       
493                 
494                 pDataArray->count = totalSeqs;
495                 if (pDataArray->hasCount) { delete cparser; } { delete parser; }
496                 return totalSeqs;
497                 
498         }
499         catch(exception& e) {
500                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeThreadFunction");
501                 exit(1);
502         }
503
504 /**************************************************************************************************/
505
506 static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){ 
507         uchimeData* pDataArray;
508         pDataArray = (uchimeData*)lpParam;
509         
510         try {
511                 
512                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
513                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
514                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
515                 
516                 int totalSeqs = 0;
517                 int numChimeras = 0;
518         
519                 int start = time(NULL);  if (pDataArray->m->control_pressed) { return 0; }
520                         
521                 //to allow for spaces in the path
522                 string outputFName = "\"" + pDataArray->outputFName + "\"";
523                 string filename = "\"" + pDataArray->filename + "\"";
524                 string alns = "\"" + pDataArray->alns+ "\"";
525                 string templatefile = "\"" + pDataArray->templatefile + "\"";
526                 string accnos = pDataArray->accnos;
527                 
528                 vector<char*> cPara;
529                 
530                 string uchimeCommand = pDataArray->uchimeLocation;
531         uchimeCommand = "\"" + uchimeCommand + "\"";
532         
533         char* tempUchime;
534         tempUchime= new char[uchimeCommand.length()+1]; 
535         *tempUchime = '\0';
536         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
537         cPara.push_back(tempUchime);
538                 
539         string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
540         //prepFile(filename.substr(1, filename.length()-2), outputFileName);
541         //prepFile(filename, outputFileName);
542         /******************************************/
543         ifstream in23;
544         pDataArray->m->openInputFile((filename.substr(1, filename.length()-2)), in23);
545         
546         ofstream out23;
547         pDataArray->m->openOutputFile(outputFileName, out23);
548         
549         while (!in23.eof()) {
550             if (pDataArray->m->control_pressed) { break;  }
551             
552             Sequence seq(in23); pDataArray->m->gobble(in23);
553             
554             if (seq.getName() != "") { seq.printSequence(out23); }
555         }
556         in23.close();
557         out23.close();
558         /******************************************/
559         
560         filename = outputFileName;
561         filename = "\"" + filename + "\"";
562         
563         //add reference file
564                 char* tempRef = new char[5]; 
565                 //strcpy(tempRef, "--db"); 
566                 *tempRef = '\0'; strncat(tempRef, "--db", 4);
567                 cPara.push_back(tempRef);  
568                 char* tempR = new char[templatefile.length()+1];
569                 //strcpy(tempR, templatefile.c_str());
570                 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
571                 cPara.push_back(tempR);
572         
573                 char* tempIn = new char[8]; 
574                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
575                 //strcpy(tempIn, "--input"); 
576                 cPara.push_back(tempIn);
577                 char* temp = new char[filename.length()+1];
578                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
579                 //strcpy(temp, filename.c_str());
580                 cPara.push_back(temp);
581                 
582                 char* tempO = new char[12]; 
583                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
584                 //strcpy(tempO, "--uchimeout"); 
585                 cPara.push_back(tempO);
586                 char* tempout = new char[outputFName.length()+1];
587                 //strcpy(tempout, outputFName.c_str());
588                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
589                 cPara.push_back(tempout);
590                 
591                 if (pDataArray->chimealns) {
592                         char* tempA = new char[13]; 
593                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
594                         //strcpy(tempA, "--uchimealns"); 
595                         cPara.push_back(tempA);
596                         char* tempa = new char[alns.length()+1];
597                         //strcpy(tempa, alns.c_str());
598                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
599                         cPara.push_back(tempa);
600                 }
601                 
602         if (pDataArray->strand != "") {
603             char* tempA = new char[9]; 
604             *tempA = '\0'; strncat(tempA, "--strand", 8);
605             cPara.push_back(tempA);
606             char* tempa = new char[pDataArray->strand.length()+1];
607             *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
608             cPara.push_back(tempa);
609         }
610         
611                 if (pDataArray->useAbskew) {
612                         char* tempskew = new char[9];
613                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
614                         //strcpy(tempskew, "--abskew"); 
615                         cPara.push_back(tempskew);
616                         char* tempSkew = new char[pDataArray->abskew.length()+1];
617                         //strcpy(tempSkew, abskew.c_str());
618                         *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
619                         cPara.push_back(tempSkew);
620                 }
621                 
622                 if (pDataArray->useMinH) {
623                         char* tempminh = new char[7]; 
624                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
625                         //strcpy(tempminh, "--minh"); 
626                         cPara.push_back(tempminh);
627                         char* tempMinH = new char[pDataArray->minh.length()+1];
628                         *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
629                         //strcpy(tempMinH, minh.c_str());
630                         cPara.push_back(tempMinH);
631                 }
632                 
633                 if (pDataArray->useMindiv) {
634                         char* tempmindiv = new char[9]; 
635                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
636                         //strcpy(tempmindiv, "--mindiv"); 
637                         cPara.push_back(tempmindiv);
638                         char* tempMindiv = new char[pDataArray->mindiv.length()+1];
639                         *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
640                         //strcpy(tempMindiv, mindiv.c_str());
641                         cPara.push_back(tempMindiv);
642                 }
643                 
644                 if (pDataArray->useXn) {
645                         char* tempxn = new char[5]; 
646                         //strcpy(tempxn, "--xn"); 
647                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
648                         cPara.push_back(tempxn);
649                         char* tempXn = new char[pDataArray->xn.length()+1];
650                         //strcpy(tempXn, xn.c_str());
651                         *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
652                         cPara.push_back(tempXn);
653                 }
654                 
655                 if (pDataArray->useDn) {
656                         char* tempdn = new char[5]; 
657                         //strcpy(tempdn, "--dn"); 
658                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
659                         cPara.push_back(tempdn);
660                         char* tempDn = new char[pDataArray->dn.length()+1];
661                         *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
662                         //strcpy(tempDn, dn.c_str());
663                         cPara.push_back(tempDn);
664                 }
665                 
666                 if (pDataArray->useXa) {
667                         char* tempxa = new char[5]; 
668                         //strcpy(tempxa, "--xa"); 
669                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
670                         cPara.push_back(tempxa);
671                         char* tempXa = new char[pDataArray->xa.length()+1];
672                         *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
673                         //strcpy(tempXa, xa.c_str());
674                         cPara.push_back(tempXa);
675                 }
676                 
677                 if (pDataArray->useChunks) {
678                         char* tempchunks = new char[9]; 
679                         //strcpy(tempchunks, "--chunks"); 
680                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
681                         cPara.push_back(tempchunks);
682                         char* tempChunks = new char[pDataArray->chunks.length()+1];
683                         *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
684                         //strcpy(tempChunks, chunks.c_str());
685                         cPara.push_back(tempChunks);
686                 }
687                 
688                 if (pDataArray->useMinchunk) {
689                         char* tempminchunk = new char[11]; 
690                         //strcpy(tempminchunk, "--minchunk"); 
691                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
692                         cPara.push_back(tempminchunk);
693                         char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
694                         *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
695                         //strcpy(tempMinchunk, minchunk.c_str());
696                         cPara.push_back(tempMinchunk);
697                 }
698                 
699                 if (pDataArray->useIdsmoothwindow) {
700                         char* tempidsmoothwindow = new char[17]; 
701                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
702                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
703                         cPara.push_back(tempidsmoothwindow);
704                         char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
705                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
706                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
707                         cPara.push_back(tempIdsmoothwindow);
708                 }
709                 
710                 if (pDataArray->useMaxp) {
711                         char* tempmaxp = new char[7]; 
712                         //strcpy(tempmaxp, "--maxp"); 
713                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
714                         cPara.push_back(tempmaxp);
715                         char* tempMaxp = new char[pDataArray->maxp.length()+1];
716                         *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
717                         //strcpy(tempMaxp, maxp.c_str());
718                         cPara.push_back(tempMaxp);
719                 }
720                 
721                 if (!pDataArray->skipgaps) {
722                         char* tempskipgaps = new char[13]; 
723                         //strcpy(tempskipgaps, "--[no]skipgaps");
724                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
725                         cPara.push_back(tempskipgaps);
726                 }
727                 
728                 if (!pDataArray->skipgaps2) {
729                         char* tempskipgaps2 = new char[14]; 
730                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
731                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
732                         cPara.push_back(tempskipgaps2);
733                 }
734                 
735                 if (pDataArray->useMinlen) {
736                         char* tempminlen = new char[9]; 
737                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
738                         //strcpy(tempminlen, "--minlen"); 
739                         cPara.push_back(tempminlen);
740                         char* tempMinlen = new char[pDataArray->minlen.length()+1];
741                         //strcpy(tempMinlen, minlen.c_str());
742                         *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
743                         cPara.push_back(tempMinlen);
744                 }
745                 
746                 if (pDataArray->useMaxlen) {
747                         char* tempmaxlen = new char[9]; 
748                         //strcpy(tempmaxlen, "--maxlen"); 
749                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
750                         cPara.push_back(tempmaxlen);
751                         char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
752                         *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
753                         //strcpy(tempMaxlen, maxlen.c_str());
754                         cPara.push_back(tempMaxlen);
755                 }
756                 
757                 if (pDataArray->ucl) {
758                         char* tempucl = new char[5]; 
759                         strcpy(tempucl, "--ucl"); 
760                         cPara.push_back(tempucl);
761                 }
762                 
763                 if (pDataArray->useQueryfract) {
764                         char* tempqueryfract = new char[13]; 
765                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
766                         //strcpy(tempqueryfract, "--queryfract"); 
767                         cPara.push_back(tempqueryfract);
768                         char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
769                         *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
770                         //strcpy(tempQueryfract, queryfract.c_str());
771                         cPara.push_back(tempQueryfract);
772                 }
773                 
774                 
775                 char** uchimeParameters;
776                 uchimeParameters = new char*[cPara.size()];
777                 string commandString = "";
778                 for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
779                 //int numArgs = cPara.size();
780                 
781         commandString = "\"" + commandString + "\"";
782         
783                 //uchime_main(numArgs, uchimeParameters); 
784                 //cout << "commandString = " << commandString << endl;
785         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
786                 system(commandString.c_str());
787                 
788                 //free memory
789                 for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
790                 delete[] uchimeParameters; 
791                 
792                 //remove "" from filenames
793                 outputFName = outputFName.substr(1, outputFName.length()-2);
794                 filename = filename.substr(1, filename.length()-2);
795                 alns = alns.substr(1, alns.length()-2);
796                 
797                 if (pDataArray->m->control_pressed) { return 0; }
798                 
799                 //create accnos file from uchime results
800                 ifstream in; 
801                 pDataArray->m->openInputFile(outputFName, in);
802                 
803                 ofstream out;
804                 pDataArray->m->openOutputFile(accnos, out);
805                 
806                 numChimeras = 0;
807                 while(!in.eof()) {
808                         
809                         if (pDataArray->m->control_pressed) { break; }
810                         
811                         string name = "";
812                         string chimeraFlag = "";
813                         in >> chimeraFlag >> name;
814                         
815                         for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
816                         pDataArray->m->gobble(in);
817                         
818                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
819                         totalSeqs++;
820                 }
821                 in.close();
822                 out.close();
823                 
824                 if (pDataArray->m->control_pressed) { return 0; }
825                 
826                 pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences.");       pDataArray->m->mothurOutEndLine();                                      
827         
828                 pDataArray->count = totalSeqs;
829                 pDataArray->numChimeras = numChimeras;
830                 return totalSeqs;
831                 
832         }
833         catch(exception& e) {
834                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeSeqsThreadFunction");
835                 exit(1);
836         }
837
838
839 #endif
840
841 /**************************************************************************************************/
842
843
844 #endif
845
846