]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.h
changed random forest output filename
[mothur.git] / chimerauchimecommand.h
1 #ifndef CHIMERAUCHIMECOMMAND_H
2 #define CHIMERAUCHIMECOMMAND_H
3
4
5 /*
6  *  chimerauchimecommand.h
7  *  Mothur
8  *
9  *  Created by westcott on 5/13/11.
10  *  Copyright 2011 Schloss Lab. All rights reserved.
11  *
12  */
13
14 #include "mothur.h"
15 #include "command.hpp"
16 #include "sequenceparser.h"
17 #include "counttable.h"
18 #include "sequencecountparser.h"
19
20 /***********************************************************/
21
22 class ChimeraUchimeCommand : public Command {
23 public:
24         ChimeraUchimeCommand(string);
25         ChimeraUchimeCommand();
26         ~ChimeraUchimeCommand() {}
27         
28         vector<string> setParameters();
29         string getCommandName()                 { return "chimera.uchime";              }
30         string getCommandCategory()             { return "Sequence Processing"; }
31         
32         string getHelpString(); 
33     string getOutputPattern(string);    
34         string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code was donated to the public domain.\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection.  Bioinformatics 27:2194.\nhttp://www.mothur.org/wiki/Chimera.uchime\n"; }
35         string getDescription()         { return "detect chimeric sequences"; }
36         
37         int execute(); 
38         void help() { m->mothurOut(getHelpString()); }          
39         
40 private:
41         struct linePair {
42                 int start;
43                 int end;
44                 linePair(int i, int j) : start(i), end(j) {}
45         };
46         
47         vector<int> processIDS;   //processid
48         int driver(string, string, string, string, int&);
49         int createProcesses(string, string, string, string, int&);
50                 
51         bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName, dups;
52         string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation, strand;
53         int processors;
54         
55         SequenceParser* sparser;
56     SequenceCountParser* cparser;
57         vector<string> outputNames;
58         vector<string> fastaFileNames;
59         vector<string> nameFileNames;
60         vector<string> groupFileNames;
61         
62         string getNamesFile(string&);
63         int readFasta(string, map<string, string>&);
64         int printFile(vector<seqPriorityNode>&, string);
65         int deconvoluteResults(map<string, string>&, string, string, string);
66         int driverGroups(string, string, string, string, string, int, int, vector<string>);
67         int createProcessesGroups(string, string, string, string, string, vector<string>, string, string, string);
68     int prepFile(string filename, string);
69
70
71 };
72
73 /***********************************************************/
74 /**************************************************************************************************/
75 //custom data structure for threads to use.
76 // This is passed by void pointer so it can be any data type
77 // that can be passed using a single void pointer (LPVOID).
78 struct uchimeData {
79         string fastafile; 
80         string namefile; 
81         string groupfile;
82         string outputFName;
83         string accnos, alns, filename, templatefile, uchimeLocation, countlist;
84         MothurOut* m;
85         int start;
86         int end;
87         int threadID, count, numChimeras;
88         vector<string> groups;
89         bool dups, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
90         string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
91         
92         uchimeData(){}
93         uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, string nc, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
94                 fastafile = f;
95                 namefile = n;
96                 groupfile = g;
97                 filename = file;
98                 outputFName = o;
99                 templatefile = t;
100                 accnos = ac;
101                 alns = al;
102                 m = mout;
103                 start = st;
104                 end = en;
105                 threadID = tid;
106                 groups = gr;
107                 count = 0;
108                 numChimeras = 0;
109         uchimeLocation = uloc;
110         countlist = nc;
111         }
112         void setBooleans(bool dps, bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
113                 useAbskew = Abskew;
114                 chimealns = calns;
115                 useMinH = MinH;
116                 useMindiv = Mindiv;
117                 useXn = Xn;
118                 useDn = Dn;
119                 useXa = Xa;
120                 useChunks = Chunks;
121                 useMinchunk = Minchunk;
122                 useIdsmoothwindow = Idsmoothwindow;
123                 useMinsmoothid = Minsmoothid;
124                 useMaxp = Maxp;
125                 skipgaps = skipgap;
126                 skipgaps2 = skipgap2;
127                 useMinlen = Minlen;
128                 useMaxlen = Maxlen;
129                 ucl = uc;
130                 useQueryfract = Queryfract;
131         hasCount = hc;
132         dups = dps;
133         }
134         
135         void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
136                 abskew = abske;
137                 minh = min;
138                 mindiv = mindi;
139         strand = stra;
140                 xn = x;
141                 dn = d;
142                 xa = xa2;
143                 chunks = chunk;
144                 minchunk = minchun;
145                 idsmoothwindow = idsmoothwindo;
146                 minsmoothid = minsmoothi;
147                 maxp = max;
148                 minlen = minle;
149                 maxlen = maxle;
150                 queryfract = queryfrac;
151         }
152 };
153
154 /**************************************************************************************************/
155 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
156 #else
157 static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ 
158         uchimeData* pDataArray;
159         pDataArray = (uchimeData*)lpParam;
160         
161         try {
162                 
163                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
164                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
165                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
166                 
167                 //clears files
168                 ofstream out, out1, out2;
169                 pDataArray->m->openOutputFile(pDataArray->outputFName, out); out.close(); 
170                 pDataArray->m->openOutputFile(pDataArray->accnos, out1); out1.close();
171                 if (pDataArray->chimealns) { pDataArray->m->openOutputFile(pDataArray->alns, out2); out2.close(); }
172                 
173                 //parse fasta and name file by group
174                 SequenceParser* parser;
175         SequenceCountParser* cparser;
176                 if (pDataArray->hasCount) {
177             CountTable* ct = new CountTable();
178             ct->readTable(pDataArray->namefile, true);
179             cparser = new SequenceCountParser(pDataArray->fastafile, *ct);
180             delete ct;
181         }else {
182             if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);  }
183             else                                                        { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                                            }
184         }
185                 
186                 int totalSeqs = 0;
187                 int numChimeras = 0;
188         
189         ofstream outCountList;
190         if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); }
191
192                 
193                 for (int i = pDataArray->start; i < pDataArray->end; i++) {
194                         int start = time(NULL);  if (pDataArray->m->control_pressed) {  if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
195                         
196             
197                         int error;
198             if (pDataArray->hasCount) { 
199                 error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete cparser; return 0; }
200             }else {
201                error = parser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete parser; return 0; } 
202             }
203                         
204                         //int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
205                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
206                         
207                         //to allow for spaces in the path
208                         string outputFName = "\"" + pDataArray->outputFName+pDataArray->groups[i] + "\"";
209                         string filename = "\"" + pDataArray->filename + "\"";
210                         string alns = "\"" + pDataArray->alns+pDataArray->groups[i] + "\"";
211                         string accnos = pDataArray->accnos+pDataArray->groups[i];
212                         
213                         vector<char*> cPara;
214                         
215             string uchimeCommand = pDataArray->uchimeLocation;
216             uchimeCommand = "\"" + uchimeCommand + "\"";
217                         
218                         char* tempUchime;
219                         tempUchime= new char[uchimeCommand.length()+1]; 
220                         *tempUchime = '\0';
221                         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
222                         cPara.push_back(tempUchime);
223                         
224                         char* tempIn = new char[8]; 
225                         *tempIn = '\0'; strncat(tempIn, "--input", 7);
226                         //strcpy(tempIn, "--input"); 
227                         cPara.push_back(tempIn);
228                         char* temp = new char[filename.length()+1];
229                         *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
230                         //strcpy(temp, filename.c_str());
231                         cPara.push_back(temp);
232                         
233                         char* tempO = new char[12]; 
234                         *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
235                         //strcpy(tempO, "--uchimeout"); 
236                         cPara.push_back(tempO);
237                         char* tempout = new char[outputFName.length()+1];
238                         //strcpy(tempout, outputFName.c_str());
239                         *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
240                         cPara.push_back(tempout);
241                         
242                         if (pDataArray->chimealns) {
243                                 char* tempA = new char[13]; 
244                                 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
245                                 //strcpy(tempA, "--uchimealns"); 
246                                 cPara.push_back(tempA);
247                                 char* tempa = new char[alns.length()+1];
248                                 //strcpy(tempa, alns.c_str());
249                                 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
250                                 cPara.push_back(tempa);
251                         }
252                         
253             if (pDataArray->strand != "") {
254                 char* tempA = new char[9]; 
255                 *tempA = '\0'; strncat(tempA, "--strand", 8);
256                 cPara.push_back(tempA);
257                 char* tempa = new char[pDataArray->strand.length()+1];
258                 *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
259                 cPara.push_back(tempa);
260             }
261             
262                         if (pDataArray->useAbskew) {
263                                 char* tempskew = new char[9];
264                                 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
265                                 //strcpy(tempskew, "--abskew"); 
266                                 cPara.push_back(tempskew);
267                                 char* tempSkew = new char[pDataArray->abskew.length()+1];
268                                 //strcpy(tempSkew, abskew.c_str());
269                                 *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
270                                 cPara.push_back(tempSkew);
271                         }
272                         
273                         if (pDataArray->useMinH) {
274                                 char* tempminh = new char[7]; 
275                                 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
276                                 //strcpy(tempminh, "--minh"); 
277                                 cPara.push_back(tempminh);
278                                 char* tempMinH = new char[pDataArray->minh.length()+1];
279                                 *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
280                                 //strcpy(tempMinH, minh.c_str());
281                                 cPara.push_back(tempMinH);
282                         }
283                         
284                         if (pDataArray->useMindiv) {
285                                 char* tempmindiv = new char[9]; 
286                                 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
287                                 //strcpy(tempmindiv, "--mindiv"); 
288                                 cPara.push_back(tempmindiv);
289                                 char* tempMindiv = new char[pDataArray->mindiv.length()+1];
290                                 *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
291                                 //strcpy(tempMindiv, mindiv.c_str());
292                                 cPara.push_back(tempMindiv);
293                         }
294                         
295                         if (pDataArray->useXn) {
296                                 char* tempxn = new char[5]; 
297                                 //strcpy(tempxn, "--xn"); 
298                                 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
299                                 cPara.push_back(tempxn);
300                                 char* tempXn = new char[pDataArray->xn.length()+1];
301                                 //strcpy(tempXn, xn.c_str());
302                                 *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
303                                 cPara.push_back(tempXn);
304                         }
305                         
306                         if (pDataArray->useDn) {
307                                 char* tempdn = new char[5]; 
308                                 //strcpy(tempdn, "--dn"); 
309                                 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
310                                 cPara.push_back(tempdn);
311                                 char* tempDn = new char[pDataArray->dn.length()+1];
312                                 *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
313                                 //strcpy(tempDn, dn.c_str());
314                                 cPara.push_back(tempDn);
315                         }
316                         
317                         if (pDataArray->useXa) {
318                                 char* tempxa = new char[5]; 
319                                 //strcpy(tempxa, "--xa"); 
320                                 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
321                                 cPara.push_back(tempxa);
322                                 char* tempXa = new char[pDataArray->xa.length()+1];
323                                 *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
324                                 //strcpy(tempXa, xa.c_str());
325                                 cPara.push_back(tempXa);
326                         }
327                         
328                         if (pDataArray->useChunks) {
329                                 char* tempchunks = new char[9]; 
330                                 //strcpy(tempchunks, "--chunks"); 
331                                 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
332                                 cPara.push_back(tempchunks);
333                                 char* tempChunks = new char[pDataArray->chunks.length()+1];
334                                 *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
335                                 //strcpy(tempChunks, chunks.c_str());
336                                 cPara.push_back(tempChunks);
337                         }
338                         
339                         if (pDataArray->useMinchunk) {
340                                 char* tempminchunk = new char[11]; 
341                                 //strcpy(tempminchunk, "--minchunk"); 
342                                 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
343                                 cPara.push_back(tempminchunk);
344                                 char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
345                                 *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
346                                 //strcpy(tempMinchunk, minchunk.c_str());
347                                 cPara.push_back(tempMinchunk);
348                         }
349                         
350                         if (pDataArray->useIdsmoothwindow) {
351                                 char* tempidsmoothwindow = new char[17]; 
352                                 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
353                                 //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
354                                 cPara.push_back(tempidsmoothwindow);
355                                 char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
356                                 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
357                                 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
358                                 cPara.push_back(tempIdsmoothwindow);
359                         }
360                         
361                         if (pDataArray->useMaxp) {
362                                 char* tempmaxp = new char[7]; 
363                                 //strcpy(tempmaxp, "--maxp"); 
364                                 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
365                                 cPara.push_back(tempmaxp);
366                                 char* tempMaxp = new char[pDataArray->maxp.length()+1];
367                                 *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
368                                 //strcpy(tempMaxp, maxp.c_str());
369                                 cPara.push_back(tempMaxp);
370                         }
371                         
372                         if (!pDataArray->skipgaps) {
373                                 char* tempskipgaps = new char[13]; 
374                                 //strcpy(tempskipgaps, "--[no]skipgaps");
375                                 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
376                                 cPara.push_back(tempskipgaps);
377                         }
378                         
379                         if (!pDataArray->skipgaps2) {
380                                 char* tempskipgaps2 = new char[14]; 
381                                 //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
382                                 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
383                                 cPara.push_back(tempskipgaps2);
384                         }
385                         
386                         if (pDataArray->useMinlen) {
387                                 char* tempminlen = new char[9]; 
388                                 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
389                                 //strcpy(tempminlen, "--minlen"); 
390                                 cPara.push_back(tempminlen);
391                                 char* tempMinlen = new char[pDataArray->minlen.length()+1];
392                                 //strcpy(tempMinlen, minlen.c_str());
393                                 *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
394                                 cPara.push_back(tempMinlen);
395                         }
396                         
397                         if (pDataArray->useMaxlen) {
398                                 char* tempmaxlen = new char[9]; 
399                                 //strcpy(tempmaxlen, "--maxlen"); 
400                                 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
401                                 cPara.push_back(tempmaxlen);
402                                 char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
403                                 *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
404                                 //strcpy(tempMaxlen, maxlen.c_str());
405                                 cPara.push_back(tempMaxlen);
406                         }
407                         
408                         if (pDataArray->ucl) {
409                                 char* tempucl = new char[5]; 
410                                 strcpy(tempucl, "--ucl"); 
411                                 cPara.push_back(tempucl);
412                         }
413                         
414                         if (pDataArray->useQueryfract) {
415                                 char* tempqueryfract = new char[13]; 
416                                 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
417                                 //strcpy(tempqueryfract, "--queryfract"); 
418                                 cPara.push_back(tempqueryfract);
419                                 char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
420                                 *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
421                                 //strcpy(tempQueryfract, queryfract.c_str());
422                                 cPara.push_back(tempQueryfract);
423                         }
424                         
425                         
426                         char** uchimeParameters;
427                         uchimeParameters = new char*[cPara.size()];
428                         string commandString = "";
429                         for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
430                         //int numArgs = cPara.size();
431                         
432                         //uchime_main(numArgs, uchimeParameters); 
433                         //cout << "commandString = " << commandString << endl;
434                         commandString = "\"" + commandString + "\"";
435             
436             if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
437             
438                         system(commandString.c_str());
439                         
440                         //free memory
441                         for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
442                         delete[] uchimeParameters; 
443                         
444                         //remove "" from filenames
445                         outputFName = outputFName.substr(1, outputFName.length()-2);
446                         filename = filename.substr(1, filename.length()-2);
447                         alns = alns.substr(1, alns.length()-2);
448                         
449                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
450                         
451                         //create accnos file from uchime results
452                         ifstream in; 
453                         pDataArray->m->openInputFile(outputFName, in);
454                         
455                         ofstream out;
456                         pDataArray->m->openOutputFile(accnos, out);
457             
458                         
459                         int num = 0;
460                         numChimeras = 0;
461             map<string, string> thisnamemap;
462             map<string, string>::iterator itN;
463             if (pDataArray->dups && !pDataArray->hasCount) { thisnamemap = parser->getNameMap(pDataArray->groups[i]); }
464                 
465                         while(!in.eof()) {
466                                 
467                                 if (pDataArray->m->control_pressed) { break; }
468                                 
469                                 string name = "";
470                                 string chimeraFlag = "";
471                                 in >> chimeraFlag >> name;
472                                 
473                                 //fix name 
474                                 name = name.substr(0, name.length()-1); //rip off last /
475                                 name = name.substr(0, name.find_last_of('/'));
476                                 
477                                 for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
478                                 pDataArray->m->gobble(in);
479                                 
480                                 if (chimeraFlag == "Y") {
481                     if (pDataArray->dups) {
482                         if (!pDataArray->hasCount) { //output redundant names for each group
483                             itN = thisnamemap.find(name);
484                             if (itN != thisnamemap.end()) {
485                                 vector<string> tempNames; pDataArray->m->splitAtComma(itN->second, tempNames);
486                                 for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
487                             }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; }
488
489                         }else {
490                             out << name << endl;
491                             outCountList << name << '\t' << pDataArray->groups[i] << endl;
492                         }
493                     }else{  out << name << endl;  }
494                     numChimeras++;
495                 }
496                                 num++;
497                         }
498                         in.close();
499                         out.close();
500                         
501                         
502                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
503                         totalSeqs += num;
504                         pDataArray->numChimeras += numChimeras;
505                         
506                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
507                         
508                         //remove file made for uchime
509                         pDataArray->m->mothurRemove(filename);
510                         
511                         //append files
512                         pDataArray->m->appendFiles(outputFName, pDataArray->outputFName); pDataArray->m->mothurRemove(outputFName);
513                         pDataArray->m->appendFiles(accnos, pDataArray->accnos); pDataArray->m->mothurRemove(accnos);
514                         if (pDataArray->chimealns) { pDataArray->m->appendFiles(alns, pDataArray->alns); pDataArray->m->mothurRemove(alns); }
515                         
516                         pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(num) + " sequences from group " + pDataArray->groups[i] + ".");    pDataArray->m->mothurOutEndLine();                                      
517                         
518                 }       
519                 
520         if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); }
521                 pDataArray->count = totalSeqs;
522                 if (pDataArray->hasCount) { delete cparser; } { delete parser; }
523                 return totalSeqs;
524                 
525         }
526         catch(exception& e) {
527                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeThreadFunction");
528                 exit(1);
529         }
530
531 /**************************************************************************************************/
532
533 static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){ 
534         uchimeData* pDataArray;
535         pDataArray = (uchimeData*)lpParam;
536         
537         try {
538                 
539                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
540                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
541                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
542                 
543                 int totalSeqs = 0;
544                 int numChimeras = 0;
545         
546                 int start = time(NULL);  if (pDataArray->m->control_pressed) { return 0; }
547                         
548                 //to allow for spaces in the path
549                 string outputFName = "\"" + pDataArray->outputFName + "\"";
550                 string filename = "\"" + pDataArray->filename + "\"";
551                 string alns = "\"" + pDataArray->alns+ "\"";
552                 string templatefile = "\"" + pDataArray->templatefile + "\"";
553                 string accnos = pDataArray->accnos;
554                 
555                 vector<char*> cPara;
556                 
557                 string uchimeCommand = pDataArray->uchimeLocation;
558         uchimeCommand = "\"" + uchimeCommand + "\"";
559         
560         char* tempUchime;
561         tempUchime= new char[uchimeCommand.length()+1]; 
562         *tempUchime = '\0';
563         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
564         cPara.push_back(tempUchime);
565                 
566         string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
567         //prepFile(filename.substr(1, filename.length()-2), outputFileName);
568         //prepFile(filename, outputFileName);
569         /******************************************/
570         ifstream in23;
571         pDataArray->m->openInputFile((filename.substr(1, filename.length()-2)), in23);
572         
573         ofstream out23;
574         pDataArray->m->openOutputFile(outputFileName, out23);
575         
576         int fcount = 0;
577         while (!in23.eof()) {
578             if (pDataArray->m->control_pressed) { break;  }
579             
580             Sequence seq(in23); pDataArray->m->gobble(in23);
581             
582             if (seq.getName() != "") { seq.printSequence(out23); fcount++; }
583         }
584         in23.close();
585         out23.close();
586         /******************************************/
587         
588         filename = outputFileName;
589         filename = "\"" + filename + "\"";
590         
591         //add reference file
592                 char* tempRef = new char[5]; 
593                 //strcpy(tempRef, "--db"); 
594                 *tempRef = '\0'; strncat(tempRef, "--db", 4);
595                 cPara.push_back(tempRef);  
596                 char* tempR = new char[templatefile.length()+1];
597                 //strcpy(tempR, templatefile.c_str());
598                 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
599                 cPara.push_back(tempR);
600         
601                 char* tempIn = new char[8]; 
602                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
603                 //strcpy(tempIn, "--input"); 
604                 cPara.push_back(tempIn);
605                 char* temp = new char[filename.length()+1];
606                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
607                 //strcpy(temp, filename.c_str());
608                 cPara.push_back(temp);
609                 
610                 char* tempO = new char[12]; 
611                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
612                 //strcpy(tempO, "--uchimeout"); 
613                 cPara.push_back(tempO);
614                 char* tempout = new char[outputFName.length()+1];
615                 //strcpy(tempout, outputFName.c_str());
616                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
617                 cPara.push_back(tempout);
618                 
619                 if (pDataArray->chimealns) {
620                         char* tempA = new char[13]; 
621                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
622                         //strcpy(tempA, "--uchimealns"); 
623                         cPara.push_back(tempA);
624                         char* tempa = new char[alns.length()+1];
625                         //strcpy(tempa, alns.c_str());
626                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
627                         cPara.push_back(tempa);
628                 }
629                 
630         if (pDataArray->strand != "") {
631             char* tempA = new char[9]; 
632             *tempA = '\0'; strncat(tempA, "--strand", 8);
633             cPara.push_back(tempA);
634             char* tempa = new char[pDataArray->strand.length()+1];
635             *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
636             cPara.push_back(tempa);
637         }
638         
639                 if (pDataArray->useAbskew) {
640                         char* tempskew = new char[9];
641                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
642                         //strcpy(tempskew, "--abskew"); 
643                         cPara.push_back(tempskew);
644                         char* tempSkew = new char[pDataArray->abskew.length()+1];
645                         //strcpy(tempSkew, abskew.c_str());
646                         *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
647                         cPara.push_back(tempSkew);
648                 }
649                 
650                 if (pDataArray->useMinH) {
651                         char* tempminh = new char[7]; 
652                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
653                         //strcpy(tempminh, "--minh"); 
654                         cPara.push_back(tempminh);
655                         char* tempMinH = new char[pDataArray->minh.length()+1];
656                         *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
657                         //strcpy(tempMinH, minh.c_str());
658                         cPara.push_back(tempMinH);
659                 }
660                 
661                 if (pDataArray->useMindiv) {
662                         char* tempmindiv = new char[9]; 
663                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
664                         //strcpy(tempmindiv, "--mindiv"); 
665                         cPara.push_back(tempmindiv);
666                         char* tempMindiv = new char[pDataArray->mindiv.length()+1];
667                         *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
668                         //strcpy(tempMindiv, mindiv.c_str());
669                         cPara.push_back(tempMindiv);
670                 }
671                 
672                 if (pDataArray->useXn) {
673                         char* tempxn = new char[5]; 
674                         //strcpy(tempxn, "--xn"); 
675                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
676                         cPara.push_back(tempxn);
677                         char* tempXn = new char[pDataArray->xn.length()+1];
678                         //strcpy(tempXn, xn.c_str());
679                         *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
680                         cPara.push_back(tempXn);
681                 }
682                 
683                 if (pDataArray->useDn) {
684                         char* tempdn = new char[5]; 
685                         //strcpy(tempdn, "--dn"); 
686                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
687                         cPara.push_back(tempdn);
688                         char* tempDn = new char[pDataArray->dn.length()+1];
689                         *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
690                         //strcpy(tempDn, dn.c_str());
691                         cPara.push_back(tempDn);
692                 }
693                 
694                 if (pDataArray->useXa) {
695                         char* tempxa = new char[5]; 
696                         //strcpy(tempxa, "--xa"); 
697                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
698                         cPara.push_back(tempxa);
699                         char* tempXa = new char[pDataArray->xa.length()+1];
700                         *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
701                         //strcpy(tempXa, xa.c_str());
702                         cPara.push_back(tempXa);
703                 }
704                 
705                 if (pDataArray->useChunks) {
706                         char* tempchunks = new char[9]; 
707                         //strcpy(tempchunks, "--chunks"); 
708                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
709                         cPara.push_back(tempchunks);
710                         char* tempChunks = new char[pDataArray->chunks.length()+1];
711                         *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
712                         //strcpy(tempChunks, chunks.c_str());
713                         cPara.push_back(tempChunks);
714                 }
715                 
716                 if (pDataArray->useMinchunk) {
717                         char* tempminchunk = new char[11]; 
718                         //strcpy(tempminchunk, "--minchunk"); 
719                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
720                         cPara.push_back(tempminchunk);
721                         char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
722                         *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
723                         //strcpy(tempMinchunk, minchunk.c_str());
724                         cPara.push_back(tempMinchunk);
725                 }
726                 
727                 if (pDataArray->useIdsmoothwindow) {
728                         char* tempidsmoothwindow = new char[17]; 
729                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
730                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
731                         cPara.push_back(tempidsmoothwindow);
732                         char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
733                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
734                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
735                         cPara.push_back(tempIdsmoothwindow);
736                 }
737                 
738                 if (pDataArray->useMaxp) {
739                         char* tempmaxp = new char[7]; 
740                         //strcpy(tempmaxp, "--maxp"); 
741                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
742                         cPara.push_back(tempmaxp);
743                         char* tempMaxp = new char[pDataArray->maxp.length()+1];
744                         *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
745                         //strcpy(tempMaxp, maxp.c_str());
746                         cPara.push_back(tempMaxp);
747                 }
748                 
749                 if (!pDataArray->skipgaps) {
750                         char* tempskipgaps = new char[13]; 
751                         //strcpy(tempskipgaps, "--[no]skipgaps");
752                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
753                         cPara.push_back(tempskipgaps);
754                 }
755                 
756                 if (!pDataArray->skipgaps2) {
757                         char* tempskipgaps2 = new char[14]; 
758                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
759                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
760                         cPara.push_back(tempskipgaps2);
761                 }
762                 
763                 if (pDataArray->useMinlen) {
764                         char* tempminlen = new char[9]; 
765                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
766                         //strcpy(tempminlen, "--minlen"); 
767                         cPara.push_back(tempminlen);
768                         char* tempMinlen = new char[pDataArray->minlen.length()+1];
769                         //strcpy(tempMinlen, minlen.c_str());
770                         *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
771                         cPara.push_back(tempMinlen);
772                 }
773                 
774                 if (pDataArray->useMaxlen) {
775                         char* tempmaxlen = new char[9]; 
776                         //strcpy(tempmaxlen, "--maxlen"); 
777                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
778                         cPara.push_back(tempmaxlen);
779                         char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
780                         *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
781                         //strcpy(tempMaxlen, maxlen.c_str());
782                         cPara.push_back(tempMaxlen);
783                 }
784                 
785                 if (pDataArray->ucl) {
786                         char* tempucl = new char[5]; 
787                         strcpy(tempucl, "--ucl"); 
788                         cPara.push_back(tempucl);
789                 }
790                 
791                 if (pDataArray->useQueryfract) {
792                         char* tempqueryfract = new char[13]; 
793                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
794                         //strcpy(tempqueryfract, "--queryfract"); 
795                         cPara.push_back(tempqueryfract);
796                         char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
797                         *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
798                         //strcpy(tempQueryfract, queryfract.c_str());
799                         cPara.push_back(tempQueryfract);
800                 }
801                 
802                 
803                 char** uchimeParameters;
804                 uchimeParameters = new char*[cPara.size()];
805                 string commandString = "";
806                 for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
807                 //int numArgs = cPara.size();
808                 
809         commandString = "\"" + commandString + "\"";
810         
811                 //uchime_main(numArgs, uchimeParameters); 
812                 //cout << "commandString = " << commandString << endl;
813         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
814                 system(commandString.c_str());
815                 
816                 //free memory
817                 for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
818                 delete[] uchimeParameters; 
819                 
820                 //remove "" from filenames
821                 outputFName = outputFName.substr(1, outputFName.length()-2);
822                 filename = filename.substr(1, filename.length()-2);
823                 alns = alns.substr(1, alns.length()-2);
824                 
825                 if (pDataArray->m->control_pressed) { return 0; }
826                 
827                 //create accnos file from uchime results
828                 ifstream in; 
829                 pDataArray->m->openInputFile(outputFName, in);
830                 
831                 ofstream out;
832                 pDataArray->m->openOutputFile(accnos, out);
833                 
834                 numChimeras = 0;
835                 while(!in.eof()) {
836                         
837                         if (pDataArray->m->control_pressed) { break; }
838                         
839                         string name = "";
840                         string chimeraFlag = "";
841                         in >> chimeraFlag >> name;
842                         
843                         for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
844                         pDataArray->m->gobble(in);
845                         
846                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
847                         totalSeqs++;
848                 }
849                 in.close();
850                 out.close();
851                 
852         if (fcount != totalSeqs) { pDataArray->m->mothurOut("[ERROR]: process " + toString(pDataArray->threadID) + " only processed " + toString(pDataArray->count) + " of " + toString(pDataArray->end) + " sequences assigned to it, quitting. \n"); pDataArray->m->control_pressed = true; }
853         
854                 if (pDataArray->m->control_pressed) { return 0; }
855                 
856                 pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences.");       pDataArray->m->mothurOutEndLine();                                      
857         
858                 pDataArray->count = totalSeqs;
859                 pDataArray->numChimeras = numChimeras;
860         
861                 return totalSeqs;
862                 
863         }
864         catch(exception& e) {
865                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeSeqsThreadFunction");
866                 exit(1);
867         }
868
869
870 #endif
871
872 /**************************************************************************************************/
873
874
875 #endif
876
877