]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.h
added checks to make sure windows processes completed their tasks.
[mothur.git] / chimerauchimecommand.h
1 #ifndef CHIMERAUCHIMECOMMAND_H
2 #define CHIMERAUCHIMECOMMAND_H
3
4
5 /*
6  *  chimerauchimecommand.h
7  *  Mothur
8  *
9  *  Created by westcott on 5/13/11.
10  *  Copyright 2011 Schloss Lab. All rights reserved.
11  *
12  */
13
14 #include "mothur.h"
15 #include "command.hpp"
16 #include "sequenceparser.h"
17 #include "counttable.h"
18 #include "sequencecountparser.h"
19
20 /***********************************************************/
21
22 class ChimeraUchimeCommand : public Command {
23 public:
24         ChimeraUchimeCommand(string);
25         ChimeraUchimeCommand();
26         ~ChimeraUchimeCommand() {}
27         
28         vector<string> setParameters();
29         string getCommandName()                 { return "chimera.uchime";              }
30         string getCommandCategory()             { return "Sequence Processing"; }
31         
32         string getHelpString(); 
33     string getOutputPattern(string);    
34         string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code was donated to the public domain.\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection.  Bioinformatics 27:2194.\nhttp://www.mothur.org/wiki/Chimera.uchime\n"; }
35         string getDescription()         { return "detect chimeric sequences"; }
36         
37         int execute(); 
38         void help() { m->mothurOut(getHelpString()); }          
39         
40 private:
41         struct linePair {
42                 int start;
43                 int end;
44                 linePair(int i, int j) : start(i), end(j) {}
45         };
46         
47         vector<int> processIDS;   //processid
48         int driver(string, string, string, string, int&);
49         int createProcesses(string, string, string, string, int&);
50                 
51         bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName, dups;
52         string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation, strand;
53         int processors;
54         
55         SequenceParser* sparser;
56     SequenceCountParser* cparser;
57         vector<string> outputNames;
58         vector<string> fastaFileNames;
59         vector<string> nameFileNames;
60         vector<string> groupFileNames;
61         
62         string getNamesFile(string&);
63         int readFasta(string, map<string, string>&);
64         int printFile(vector<seqPriorityNode>&, string);
65         int deconvoluteResults(map<string, string>&, string, string, string);
66         int driverGroups(string, string, string, string, int, int, vector<string>);
67         int createProcessesGroups(string, string, string, string, vector<string>, string, string, string);
68     int prepFile(string filename, string);
69
70
71 };
72
73 /***********************************************************/
74 /**************************************************************************************************/
75 //custom data structure for threads to use.
76 // This is passed by void pointer so it can be any data type
77 // that can be passed using a single void pointer (LPVOID).
78 struct uchimeData {
79         string fastafile; 
80         string namefile; 
81         string groupfile;
82         string outputFName;
83         string accnos, alns, filename, templatefile, uchimeLocation;
84         MothurOut* m;
85         int start;
86         int end;
87         int threadID, count, numChimeras;
88         vector<string> groups;
89         bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
90         string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
91         
92         uchimeData(){}
93         uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
94                 fastafile = f;
95                 namefile = n;
96                 groupfile = g;
97                 filename = file;
98                 outputFName = o;
99                 templatefile = t;
100                 accnos = ac;
101                 alns = al;
102                 m = mout;
103                 start = st;
104                 end = en;
105                 threadID = tid;
106                 groups = gr;
107                 count = 0;
108                 numChimeras = 0;
109         uchimeLocation = uloc;
110         }
111         void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
112                 useAbskew = Abskew;
113                 chimealns = calns;
114                 useMinH = MinH;
115                 useMindiv = Mindiv;
116                 useXn = Xn;
117                 useDn = Dn;
118                 useXa = Xa;
119                 useChunks = Chunks;
120                 useMinchunk = Minchunk;
121                 useIdsmoothwindow = Idsmoothwindow;
122                 useMinsmoothid = Minsmoothid;
123                 useMaxp = Maxp;
124                 skipgaps = skipgap;
125                 skipgaps2 = skipgap2;
126                 useMinlen = Minlen;
127                 useMaxlen = Maxlen;
128                 ucl = uc;
129                 useQueryfract = Queryfract;
130         hasCount = hc;
131         }
132         
133         void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
134                 abskew = abske;
135                 minh = min;
136                 mindiv = mindi;
137         strand = stra;
138                 xn = x;
139                 dn = d;
140                 xa = xa2;
141                 chunks = chunk;
142                 minchunk = minchun;
143                 idsmoothwindow = idsmoothwindo;
144                 minsmoothid = minsmoothi;
145                 maxp = max;
146                 minlen = minle;
147                 maxlen = maxle;
148                 queryfract = queryfrac;
149         }
150 };
151
152 /**************************************************************************************************/
153 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
154 #else
155 static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ 
156         uchimeData* pDataArray;
157         pDataArray = (uchimeData*)lpParam;
158         
159         try {
160                 
161                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
162                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
163                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
164                 
165                 //clears files
166                 ofstream out, out1, out2;
167                 pDataArray->m->openOutputFile(pDataArray->outputFName, out); out.close(); 
168                 pDataArray->m->openOutputFile(pDataArray->accnos, out1); out1.close();
169                 if (pDataArray->chimealns) { pDataArray->m->openOutputFile(pDataArray->alns, out2); out2.close(); }
170                 
171                 //parse fasta and name file by group
172                 SequenceParser* parser;
173         SequenceCountParser* cparser;
174                 if (pDataArray->hasCount) {
175             CountTable* ct = new CountTable();
176             ct->readTable(pDataArray->namefile);
177             cparser = new SequenceCountParser(pDataArray->fastafile, *ct);
178             delete ct;
179         }else {
180             if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);  }
181             else                                                        { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                                            }
182         }
183                 
184                 int totalSeqs = 0;
185                 int numChimeras = 0;
186                 
187                 for (int i = pDataArray->start; i < pDataArray->end; i++) {
188                         int start = time(NULL);  if (pDataArray->m->control_pressed) {  if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
189                         
190             
191                         int error;
192             if (pDataArray->hasCount) { 
193                 error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete cparser; return 0; }
194             }else {
195                error = parser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete parser; return 0; } 
196             }
197                         
198                         //int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
199                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
200                         
201                         //to allow for spaces in the path
202                         string outputFName = "\"" + pDataArray->outputFName+pDataArray->groups[i] + "\"";
203                         string filename = "\"" + pDataArray->filename + "\"";
204                         string alns = "\"" + pDataArray->alns+pDataArray->groups[i] + "\"";
205                         string accnos = pDataArray->accnos+pDataArray->groups[i];
206                         
207                         vector<char*> cPara;
208                         
209             string uchimeCommand = pDataArray->uchimeLocation;
210             uchimeCommand = "\"" + uchimeCommand + "\"";
211                         
212                         char* tempUchime;
213                         tempUchime= new char[uchimeCommand.length()+1]; 
214                         *tempUchime = '\0';
215                         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
216                         cPara.push_back(tempUchime);
217                         
218                         char* tempIn = new char[8]; 
219                         *tempIn = '\0'; strncat(tempIn, "--input", 7);
220                         //strcpy(tempIn, "--input"); 
221                         cPara.push_back(tempIn);
222                         char* temp = new char[filename.length()+1];
223                         *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
224                         //strcpy(temp, filename.c_str());
225                         cPara.push_back(temp);
226                         
227                         char* tempO = new char[12]; 
228                         *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
229                         //strcpy(tempO, "--uchimeout"); 
230                         cPara.push_back(tempO);
231                         char* tempout = new char[outputFName.length()+1];
232                         //strcpy(tempout, outputFName.c_str());
233                         *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
234                         cPara.push_back(tempout);
235                         
236                         if (pDataArray->chimealns) {
237                                 char* tempA = new char[13]; 
238                                 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
239                                 //strcpy(tempA, "--uchimealns"); 
240                                 cPara.push_back(tempA);
241                                 char* tempa = new char[alns.length()+1];
242                                 //strcpy(tempa, alns.c_str());
243                                 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
244                                 cPara.push_back(tempa);
245                         }
246                         
247             if (pDataArray->strand != "") {
248                 char* tempA = new char[9]; 
249                 *tempA = '\0'; strncat(tempA, "--strand", 8);
250                 cPara.push_back(tempA);
251                 char* tempa = new char[pDataArray->strand.length()+1];
252                 *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
253                 cPara.push_back(tempa);
254             }
255             
256                         if (pDataArray->useAbskew) {
257                                 char* tempskew = new char[9];
258                                 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
259                                 //strcpy(tempskew, "--abskew"); 
260                                 cPara.push_back(tempskew);
261                                 char* tempSkew = new char[pDataArray->abskew.length()+1];
262                                 //strcpy(tempSkew, abskew.c_str());
263                                 *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
264                                 cPara.push_back(tempSkew);
265                         }
266                         
267                         if (pDataArray->useMinH) {
268                                 char* tempminh = new char[7]; 
269                                 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
270                                 //strcpy(tempminh, "--minh"); 
271                                 cPara.push_back(tempminh);
272                                 char* tempMinH = new char[pDataArray->minh.length()+1];
273                                 *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
274                                 //strcpy(tempMinH, minh.c_str());
275                                 cPara.push_back(tempMinH);
276                         }
277                         
278                         if (pDataArray->useMindiv) {
279                                 char* tempmindiv = new char[9]; 
280                                 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
281                                 //strcpy(tempmindiv, "--mindiv"); 
282                                 cPara.push_back(tempmindiv);
283                                 char* tempMindiv = new char[pDataArray->mindiv.length()+1];
284                                 *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
285                                 //strcpy(tempMindiv, mindiv.c_str());
286                                 cPara.push_back(tempMindiv);
287                         }
288                         
289                         if (pDataArray->useXn) {
290                                 char* tempxn = new char[5]; 
291                                 //strcpy(tempxn, "--xn"); 
292                                 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
293                                 cPara.push_back(tempxn);
294                                 char* tempXn = new char[pDataArray->xn.length()+1];
295                                 //strcpy(tempXn, xn.c_str());
296                                 *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
297                                 cPara.push_back(tempXn);
298                         }
299                         
300                         if (pDataArray->useDn) {
301                                 char* tempdn = new char[5]; 
302                                 //strcpy(tempdn, "--dn"); 
303                                 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
304                                 cPara.push_back(tempdn);
305                                 char* tempDn = new char[pDataArray->dn.length()+1];
306                                 *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
307                                 //strcpy(tempDn, dn.c_str());
308                                 cPara.push_back(tempDn);
309                         }
310                         
311                         if (pDataArray->useXa) {
312                                 char* tempxa = new char[5]; 
313                                 //strcpy(tempxa, "--xa"); 
314                                 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
315                                 cPara.push_back(tempxa);
316                                 char* tempXa = new char[pDataArray->xa.length()+1];
317                                 *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
318                                 //strcpy(tempXa, xa.c_str());
319                                 cPara.push_back(tempXa);
320                         }
321                         
322                         if (pDataArray->useChunks) {
323                                 char* tempchunks = new char[9]; 
324                                 //strcpy(tempchunks, "--chunks"); 
325                                 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
326                                 cPara.push_back(tempchunks);
327                                 char* tempChunks = new char[pDataArray->chunks.length()+1];
328                                 *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
329                                 //strcpy(tempChunks, chunks.c_str());
330                                 cPara.push_back(tempChunks);
331                         }
332                         
333                         if (pDataArray->useMinchunk) {
334                                 char* tempminchunk = new char[11]; 
335                                 //strcpy(tempminchunk, "--minchunk"); 
336                                 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
337                                 cPara.push_back(tempminchunk);
338                                 char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
339                                 *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
340                                 //strcpy(tempMinchunk, minchunk.c_str());
341                                 cPara.push_back(tempMinchunk);
342                         }
343                         
344                         if (pDataArray->useIdsmoothwindow) {
345                                 char* tempidsmoothwindow = new char[17]; 
346                                 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
347                                 //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
348                                 cPara.push_back(tempidsmoothwindow);
349                                 char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
350                                 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
351                                 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
352                                 cPara.push_back(tempIdsmoothwindow);
353                         }
354                         
355                         if (pDataArray->useMaxp) {
356                                 char* tempmaxp = new char[7]; 
357                                 //strcpy(tempmaxp, "--maxp"); 
358                                 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
359                                 cPara.push_back(tempmaxp);
360                                 char* tempMaxp = new char[pDataArray->maxp.length()+1];
361                                 *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
362                                 //strcpy(tempMaxp, maxp.c_str());
363                                 cPara.push_back(tempMaxp);
364                         }
365                         
366                         if (!pDataArray->skipgaps) {
367                                 char* tempskipgaps = new char[13]; 
368                                 //strcpy(tempskipgaps, "--[no]skipgaps");
369                                 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
370                                 cPara.push_back(tempskipgaps);
371                         }
372                         
373                         if (!pDataArray->skipgaps2) {
374                                 char* tempskipgaps2 = new char[14]; 
375                                 //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
376                                 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
377                                 cPara.push_back(tempskipgaps2);
378                         }
379                         
380                         if (pDataArray->useMinlen) {
381                                 char* tempminlen = new char[9]; 
382                                 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
383                                 //strcpy(tempminlen, "--minlen"); 
384                                 cPara.push_back(tempminlen);
385                                 char* tempMinlen = new char[pDataArray->minlen.length()+1];
386                                 //strcpy(tempMinlen, minlen.c_str());
387                                 *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
388                                 cPara.push_back(tempMinlen);
389                         }
390                         
391                         if (pDataArray->useMaxlen) {
392                                 char* tempmaxlen = new char[9]; 
393                                 //strcpy(tempmaxlen, "--maxlen"); 
394                                 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
395                                 cPara.push_back(tempmaxlen);
396                                 char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
397                                 *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
398                                 //strcpy(tempMaxlen, maxlen.c_str());
399                                 cPara.push_back(tempMaxlen);
400                         }
401                         
402                         if (pDataArray->ucl) {
403                                 char* tempucl = new char[5]; 
404                                 strcpy(tempucl, "--ucl"); 
405                                 cPara.push_back(tempucl);
406                         }
407                         
408                         if (pDataArray->useQueryfract) {
409                                 char* tempqueryfract = new char[13]; 
410                                 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
411                                 //strcpy(tempqueryfract, "--queryfract"); 
412                                 cPara.push_back(tempqueryfract);
413                                 char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
414                                 *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
415                                 //strcpy(tempQueryfract, queryfract.c_str());
416                                 cPara.push_back(tempQueryfract);
417                         }
418                         
419                         
420                         char** uchimeParameters;
421                         uchimeParameters = new char*[cPara.size()];
422                         string commandString = "";
423                         for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
424                         //int numArgs = cPara.size();
425                         
426                         //uchime_main(numArgs, uchimeParameters); 
427                         //cout << "commandString = " << commandString << endl;
428                         commandString = "\"" + commandString + "\"";
429             
430             if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
431             
432                         system(commandString.c_str());
433                         
434                         //free memory
435                         for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
436                         delete[] uchimeParameters; 
437                         
438                         //remove "" from filenames
439                         outputFName = outputFName.substr(1, outputFName.length()-2);
440                         filename = filename.substr(1, filename.length()-2);
441                         alns = alns.substr(1, alns.length()-2);
442                         
443                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
444                         
445                         //create accnos file from uchime results
446                         ifstream in; 
447                         pDataArray->m->openInputFile(outputFName, in);
448                         
449                         ofstream out;
450                         pDataArray->m->openOutputFile(accnos, out);
451                         
452                         int num = 0;
453                         numChimeras = 0;
454                         while(!in.eof()) {
455                                 
456                                 if (pDataArray->m->control_pressed) { break; }
457                                 
458                                 string name = "";
459                                 string chimeraFlag = "";
460                                 in >> chimeraFlag >> name;
461                                 
462                                 //fix name 
463                                 name = name.substr(0, name.length()-1); //rip off last /
464                                 name = name.substr(0, name.find_last_of('/'));
465                                 
466                                 for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
467                                 pDataArray->m->gobble(in);
468                                 
469                                 if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
470                                 num++;
471                         }
472                         in.close();
473                         out.close();
474                         
475                         
476                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
477                         totalSeqs += num;
478                         pDataArray->numChimeras += numChimeras;
479                         
480                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
481                         
482                         //remove file made for uchime
483                         pDataArray->m->mothurRemove(filename);
484                         
485                         //append files
486                         pDataArray->m->appendFiles(outputFName, pDataArray->outputFName); pDataArray->m->mothurRemove(outputFName);
487                         pDataArray->m->appendFiles(accnos, pDataArray->accnos); pDataArray->m->mothurRemove(accnos);
488                         if (pDataArray->chimealns) { pDataArray->m->appendFiles(alns, pDataArray->alns); pDataArray->m->mothurRemove(alns); }
489                         
490                         pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(num) + " sequences from group " + pDataArray->groups[i] + ".");    pDataArray->m->mothurOutEndLine();                                      
491                         
492                 }       
493                 
494                 pDataArray->count = totalSeqs;
495                 if (pDataArray->hasCount) { delete cparser; } { delete parser; }
496                 return totalSeqs;
497                 
498         }
499         catch(exception& e) {
500                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeThreadFunction");
501                 exit(1);
502         }
503
504 /**************************************************************************************************/
505
506 static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){ 
507         uchimeData* pDataArray;
508         pDataArray = (uchimeData*)lpParam;
509         
510         try {
511                 
512                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
513                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
514                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
515                 
516                 int totalSeqs = 0;
517                 int numChimeras = 0;
518         
519                 int start = time(NULL);  if (pDataArray->m->control_pressed) { return 0; }
520                         
521                 //to allow for spaces in the path
522                 string outputFName = "\"" + pDataArray->outputFName + "\"";
523                 string filename = "\"" + pDataArray->filename + "\"";
524                 string alns = "\"" + pDataArray->alns+ "\"";
525                 string templatefile = "\"" + pDataArray->templatefile + "\"";
526                 string accnos = pDataArray->accnos;
527                 
528                 vector<char*> cPara;
529                 
530                 string uchimeCommand = pDataArray->uchimeLocation;
531         uchimeCommand = "\"" + uchimeCommand + "\"";
532         
533         char* tempUchime;
534         tempUchime= new char[uchimeCommand.length()+1]; 
535         *tempUchime = '\0';
536         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
537         cPara.push_back(tempUchime);
538                 
539         string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
540         //prepFile(filename.substr(1, filename.length()-2), outputFileName);
541         //prepFile(filename, outputFileName);
542         /******************************************/
543         ifstream in23;
544         pDataArray->m->openInputFile((filename.substr(1, filename.length()-2)), in23);
545         
546         ofstream out23;
547         pDataArray->m->openOutputFile(outputFileName, out23);
548         
549         int fcount = 0;
550         while (!in23.eof()) {
551             if (pDataArray->m->control_pressed) { break;  }
552             
553             Sequence seq(in23); pDataArray->m->gobble(in23);
554             
555             if (seq.getName() != "") { seq.printSequence(out23); fcount++; }
556         }
557         in23.close();
558         out23.close();
559         /******************************************/
560         
561         filename = outputFileName;
562         filename = "\"" + filename + "\"";
563         
564         //add reference file
565                 char* tempRef = new char[5]; 
566                 //strcpy(tempRef, "--db"); 
567                 *tempRef = '\0'; strncat(tempRef, "--db", 4);
568                 cPara.push_back(tempRef);  
569                 char* tempR = new char[templatefile.length()+1];
570                 //strcpy(tempR, templatefile.c_str());
571                 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
572                 cPara.push_back(tempR);
573         
574                 char* tempIn = new char[8]; 
575                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
576                 //strcpy(tempIn, "--input"); 
577                 cPara.push_back(tempIn);
578                 char* temp = new char[filename.length()+1];
579                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
580                 //strcpy(temp, filename.c_str());
581                 cPara.push_back(temp);
582                 
583                 char* tempO = new char[12]; 
584                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
585                 //strcpy(tempO, "--uchimeout"); 
586                 cPara.push_back(tempO);
587                 char* tempout = new char[outputFName.length()+1];
588                 //strcpy(tempout, outputFName.c_str());
589                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
590                 cPara.push_back(tempout);
591                 
592                 if (pDataArray->chimealns) {
593                         char* tempA = new char[13]; 
594                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
595                         //strcpy(tempA, "--uchimealns"); 
596                         cPara.push_back(tempA);
597                         char* tempa = new char[alns.length()+1];
598                         //strcpy(tempa, alns.c_str());
599                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
600                         cPara.push_back(tempa);
601                 }
602                 
603         if (pDataArray->strand != "") {
604             char* tempA = new char[9]; 
605             *tempA = '\0'; strncat(tempA, "--strand", 8);
606             cPara.push_back(tempA);
607             char* tempa = new char[pDataArray->strand.length()+1];
608             *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
609             cPara.push_back(tempa);
610         }
611         
612                 if (pDataArray->useAbskew) {
613                         char* tempskew = new char[9];
614                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
615                         //strcpy(tempskew, "--abskew"); 
616                         cPara.push_back(tempskew);
617                         char* tempSkew = new char[pDataArray->abskew.length()+1];
618                         //strcpy(tempSkew, abskew.c_str());
619                         *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
620                         cPara.push_back(tempSkew);
621                 }
622                 
623                 if (pDataArray->useMinH) {
624                         char* tempminh = new char[7]; 
625                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
626                         //strcpy(tempminh, "--minh"); 
627                         cPara.push_back(tempminh);
628                         char* tempMinH = new char[pDataArray->minh.length()+1];
629                         *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
630                         //strcpy(tempMinH, minh.c_str());
631                         cPara.push_back(tempMinH);
632                 }
633                 
634                 if (pDataArray->useMindiv) {
635                         char* tempmindiv = new char[9]; 
636                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
637                         //strcpy(tempmindiv, "--mindiv"); 
638                         cPara.push_back(tempmindiv);
639                         char* tempMindiv = new char[pDataArray->mindiv.length()+1];
640                         *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
641                         //strcpy(tempMindiv, mindiv.c_str());
642                         cPara.push_back(tempMindiv);
643                 }
644                 
645                 if (pDataArray->useXn) {
646                         char* tempxn = new char[5]; 
647                         //strcpy(tempxn, "--xn"); 
648                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
649                         cPara.push_back(tempxn);
650                         char* tempXn = new char[pDataArray->xn.length()+1];
651                         //strcpy(tempXn, xn.c_str());
652                         *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
653                         cPara.push_back(tempXn);
654                 }
655                 
656                 if (pDataArray->useDn) {
657                         char* tempdn = new char[5]; 
658                         //strcpy(tempdn, "--dn"); 
659                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
660                         cPara.push_back(tempdn);
661                         char* tempDn = new char[pDataArray->dn.length()+1];
662                         *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
663                         //strcpy(tempDn, dn.c_str());
664                         cPara.push_back(tempDn);
665                 }
666                 
667                 if (pDataArray->useXa) {
668                         char* tempxa = new char[5]; 
669                         //strcpy(tempxa, "--xa"); 
670                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
671                         cPara.push_back(tempxa);
672                         char* tempXa = new char[pDataArray->xa.length()+1];
673                         *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
674                         //strcpy(tempXa, xa.c_str());
675                         cPara.push_back(tempXa);
676                 }
677                 
678                 if (pDataArray->useChunks) {
679                         char* tempchunks = new char[9]; 
680                         //strcpy(tempchunks, "--chunks"); 
681                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
682                         cPara.push_back(tempchunks);
683                         char* tempChunks = new char[pDataArray->chunks.length()+1];
684                         *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
685                         //strcpy(tempChunks, chunks.c_str());
686                         cPara.push_back(tempChunks);
687                 }
688                 
689                 if (pDataArray->useMinchunk) {
690                         char* tempminchunk = new char[11]; 
691                         //strcpy(tempminchunk, "--minchunk"); 
692                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
693                         cPara.push_back(tempminchunk);
694                         char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
695                         *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
696                         //strcpy(tempMinchunk, minchunk.c_str());
697                         cPara.push_back(tempMinchunk);
698                 }
699                 
700                 if (pDataArray->useIdsmoothwindow) {
701                         char* tempidsmoothwindow = new char[17]; 
702                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
703                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
704                         cPara.push_back(tempidsmoothwindow);
705                         char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
706                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
707                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
708                         cPara.push_back(tempIdsmoothwindow);
709                 }
710                 
711                 if (pDataArray->useMaxp) {
712                         char* tempmaxp = new char[7]; 
713                         //strcpy(tempmaxp, "--maxp"); 
714                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
715                         cPara.push_back(tempmaxp);
716                         char* tempMaxp = new char[pDataArray->maxp.length()+1];
717                         *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
718                         //strcpy(tempMaxp, maxp.c_str());
719                         cPara.push_back(tempMaxp);
720                 }
721                 
722                 if (!pDataArray->skipgaps) {
723                         char* tempskipgaps = new char[13]; 
724                         //strcpy(tempskipgaps, "--[no]skipgaps");
725                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
726                         cPara.push_back(tempskipgaps);
727                 }
728                 
729                 if (!pDataArray->skipgaps2) {
730                         char* tempskipgaps2 = new char[14]; 
731                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
732                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
733                         cPara.push_back(tempskipgaps2);
734                 }
735                 
736                 if (pDataArray->useMinlen) {
737                         char* tempminlen = new char[9]; 
738                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
739                         //strcpy(tempminlen, "--minlen"); 
740                         cPara.push_back(tempminlen);
741                         char* tempMinlen = new char[pDataArray->minlen.length()+1];
742                         //strcpy(tempMinlen, minlen.c_str());
743                         *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
744                         cPara.push_back(tempMinlen);
745                 }
746                 
747                 if (pDataArray->useMaxlen) {
748                         char* tempmaxlen = new char[9]; 
749                         //strcpy(tempmaxlen, "--maxlen"); 
750                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
751                         cPara.push_back(tempmaxlen);
752                         char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
753                         *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
754                         //strcpy(tempMaxlen, maxlen.c_str());
755                         cPara.push_back(tempMaxlen);
756                 }
757                 
758                 if (pDataArray->ucl) {
759                         char* tempucl = new char[5]; 
760                         strcpy(tempucl, "--ucl"); 
761                         cPara.push_back(tempucl);
762                 }
763                 
764                 if (pDataArray->useQueryfract) {
765                         char* tempqueryfract = new char[13]; 
766                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
767                         //strcpy(tempqueryfract, "--queryfract"); 
768                         cPara.push_back(tempqueryfract);
769                         char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
770                         *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
771                         //strcpy(tempQueryfract, queryfract.c_str());
772                         cPara.push_back(tempQueryfract);
773                 }
774                 
775                 
776                 char** uchimeParameters;
777                 uchimeParameters = new char*[cPara.size()];
778                 string commandString = "";
779                 for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
780                 //int numArgs = cPara.size();
781                 
782         commandString = "\"" + commandString + "\"";
783         
784                 //uchime_main(numArgs, uchimeParameters); 
785                 //cout << "commandString = " << commandString << endl;
786         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
787                 system(commandString.c_str());
788                 
789                 //free memory
790                 for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
791                 delete[] uchimeParameters; 
792                 
793                 //remove "" from filenames
794                 outputFName = outputFName.substr(1, outputFName.length()-2);
795                 filename = filename.substr(1, filename.length()-2);
796                 alns = alns.substr(1, alns.length()-2);
797                 
798                 if (pDataArray->m->control_pressed) { return 0; }
799                 
800                 //create accnos file from uchime results
801                 ifstream in; 
802                 pDataArray->m->openInputFile(outputFName, in);
803                 
804                 ofstream out;
805                 pDataArray->m->openOutputFile(accnos, out);
806                 
807                 numChimeras = 0;
808                 while(!in.eof()) {
809                         
810                         if (pDataArray->m->control_pressed) { break; }
811                         
812                         string name = "";
813                         string chimeraFlag = "";
814                         in >> chimeraFlag >> name;
815                         
816                         for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
817                         pDataArray->m->gobble(in);
818                         
819                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
820                         totalSeqs++;
821                 }
822                 in.close();
823                 out.close();
824                 
825         if (fcount != totalSeqs) { pDataArray->m->mothurOut("[ERROR]: process " + toString(pDataArray->threadID) + " only processed " + toString(pDataArray->count) + " of " + toString(pDataArray->end) + " sequences assigned to it, quitting. \n"); pDataArray->m->control_pressed = true; }
826         
827                 if (pDataArray->m->control_pressed) { return 0; }
828                 
829                 pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences.");       pDataArray->m->mothurOutEndLine();                                      
830         
831                 pDataArray->count = totalSeqs;
832                 pDataArray->numChimeras = numChimeras;
833         
834                 return totalSeqs;
835                 
836         }
837         catch(exception& e) {
838                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeSeqsThreadFunction");
839                 exit(1);
840         }
841
842
843 #endif
844
845 /**************************************************************************************************/
846
847
848 #endif
849
850