]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.h
67c77f24f4597838f959224ceaf6bab4467ce5df
[mothur.git] / chimerauchimecommand.h
1 #ifndef CHIMERAUCHIMECOMMAND_H
2 #define CHIMERAUCHIMECOMMAND_H
3
4
5 /*
6  *  chimerauchimecommand.h
7  *  Mothur
8  *
9  *  Created by westcott on 5/13/11.
10  *  Copyright 2011 Schloss Lab. All rights reserved.
11  *
12  */
13
14 #include "mothur.h"
15 #include "command.hpp"
16 #include "sequenceparser.h"
17 #include "counttable.h"
18 #include "sequencecountparser.h"
19
20 /***********************************************************/
21
22 class ChimeraUchimeCommand : public Command {
23 public:
24         ChimeraUchimeCommand(string);
25         ChimeraUchimeCommand();
26         ~ChimeraUchimeCommand() {}
27         
28         vector<string> setParameters();
29         string getCommandName()                 { return "chimera.uchime";              }
30         string getCommandCategory()             { return "Sequence Processing"; }
31         string getOutputFileNameTag(string, string);
32         string getHelpString(); 
33         string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\nhttp://www.mothur.org/wiki/Chimera.uchime\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection, Bioinformatics, in press.\n"; }
34         string getDescription()         { return "detect chimeric sequences"; }
35         
36         int execute(); 
37         void help() { m->mothurOut(getHelpString()); }          
38         
39 private:
40         struct linePair {
41                 int start;
42                 int end;
43                 linePair(int i, int j) : start(i), end(j) {}
44         };
45         
46         vector<int> processIDS;   //processid
47         int driver(string, string, string, string, int&);
48         int createProcesses(string, string, string, string, int&);
49                 
50         bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName;
51         string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation;
52         int processors;
53         
54         SequenceParser* sparser;
55     SequenceCountParser* cparser;
56         vector<string> outputNames;
57         vector<string> fastaFileNames;
58         vector<string> nameFileNames;
59         vector<string> groupFileNames;
60         
61         string getNamesFile(string&);
62         int readFasta(string, map<string, string>&);
63         int printFile(vector<seqPriorityNode>&, string);
64         int deconvoluteResults(map<string, string>&, string, string, string);
65         int driverGroups(string, string, string, string, int, int, vector<string>);
66         int createProcessesGroups(string, string, string, string, vector<string>, string, string, string);
67     int prepFile(string filename, string);
68
69
70 };
71
72 /***********************************************************/
73 /**************************************************************************************************/
74 //custom data structure for threads to use.
75 // This is passed by void pointer so it can be any data type
76 // that can be passed using a single void pointer (LPVOID).
77 struct uchimeData {
78         string fastafile; 
79         string namefile; 
80         string groupfile;
81         string outputFName;
82         string accnos, alns, filename, templatefile, uchimeLocation;
83         MothurOut* m;
84         int start;
85         int end;
86         int threadID, count, numChimeras;
87         vector<string> groups;
88         bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
89         string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
90         
91         uchimeData(){}
92         uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
93                 fastafile = f;
94                 namefile = n;
95                 groupfile = g;
96                 filename = file;
97                 outputFName = o;
98                 templatefile = t;
99                 accnos = ac;
100                 alns = al;
101                 m = mout;
102                 start = st;
103                 end = en;
104                 threadID = tid;
105                 groups = gr;
106                 count = 0;
107                 numChimeras = 0;
108         uchimeLocation = uloc;
109         }
110         void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
111                 useAbskew = Abskew;
112                 chimealns = calns;
113                 useMinH = MinH;
114                 useMindiv = Mindiv;
115                 useXn = Xn;
116                 useDn = Dn;
117                 useXa = Xa;
118                 useChunks = Chunks;
119                 useMinchunk = Minchunk;
120                 useIdsmoothwindow = Idsmoothwindow;
121                 useMinsmoothid = Minsmoothid;
122                 useMaxp = Maxp;
123                 skipgaps = skipgap;
124                 skipgaps2 = skipgap2;
125                 useMinlen = Minlen;
126                 useMaxlen = Maxlen;
127                 ucl = uc;
128                 useQueryfract = Queryfract;
129         hasCount = hc;
130         }
131         
132         void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac) {
133                 abskew = abske;
134                 minh = min;
135                 mindiv = mindi;
136                 xn = x;
137                 dn = d;
138                 xa = xa2;
139                 chunks = chunk;
140                 minchunk = minchun;
141                 idsmoothwindow = idsmoothwindo;
142                 minsmoothid = minsmoothi;
143                 maxp = max;
144                 minlen = minle;
145                 maxlen = maxle;
146                 queryfract = queryfrac;
147         }
148 };
149
150 /**************************************************************************************************/
151 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
152 #else
153 static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ 
154         uchimeData* pDataArray;
155         pDataArray = (uchimeData*)lpParam;
156         
157         try {
158                 
159                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
160                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
161                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
162                 
163                 //clears files
164                 ofstream out, out1, out2;
165                 pDataArray->m->openOutputFile(pDataArray->outputFName, out); out.close(); 
166                 pDataArray->m->openOutputFile(pDataArray->accnos, out1); out1.close();
167                 if (pDataArray->chimealns) { pDataArray->m->openOutputFile(pDataArray->alns, out2); out2.close(); }
168                 
169                 //parse fasta and name file by group
170                 SequenceParser* parser;
171         SequenceCountParser* cparser;
172                 if (pDataArray->hasCount) {
173             CountTable* ct = new CountTable();
174             ct->readTable(pDataArray->namefile);
175             cparser = new SequenceCountParser(pDataArray->fastafile, *ct);
176             delete ct;
177         }else {
178             if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);  }
179             else                                                        { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                                            }
180         }
181                 
182                 int totalSeqs = 0;
183                 int numChimeras = 0;
184                 
185                 for (int i = pDataArray->start; i < pDataArray->end; i++) {
186                         int start = time(NULL);  if (pDataArray->m->control_pressed) {  if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
187                         
188             
189                         int error;
190             if (pDataArray->hasCount) { 
191                 error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete cparser; return 0; }
192             }else {
193                error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete parser; return 0; } 
194             }
195                         
196                         //int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
197                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
198                         
199                         //to allow for spaces in the path
200                         string outputFName = "\"" + pDataArray->outputFName+pDataArray->groups[i] + "\"";
201                         string filename = "\"" + pDataArray->filename + "\"";
202                         string alns = "\"" + pDataArray->alns+pDataArray->groups[i] + "\"";
203                         string accnos = pDataArray->accnos+pDataArray->groups[i];
204                         
205                         vector<char*> cPara;
206                         
207             string uchimeCommand = pDataArray->uchimeLocation;
208             uchimeCommand = "\"" + uchimeCommand + "\"";
209                         
210                         char* tempUchime;
211                         tempUchime= new char[uchimeCommand.length()+1]; 
212                         *tempUchime = '\0';
213                         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
214                         cPara.push_back(tempUchime);
215                         
216                         char* tempIn = new char[8]; 
217                         *tempIn = '\0'; strncat(tempIn, "--input", 7);
218                         //strcpy(tempIn, "--input"); 
219                         cPara.push_back(tempIn);
220                         char* temp = new char[filename.length()+1];
221                         *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
222                         //strcpy(temp, filename.c_str());
223                         cPara.push_back(temp);
224                         
225                         char* tempO = new char[12]; 
226                         *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
227                         //strcpy(tempO, "--uchimeout"); 
228                         cPara.push_back(tempO);
229                         char* tempout = new char[outputFName.length()+1];
230                         //strcpy(tempout, outputFName.c_str());
231                         *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
232                         cPara.push_back(tempout);
233                         
234                         if (pDataArray->chimealns) {
235                                 char* tempA = new char[13]; 
236                                 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
237                                 //strcpy(tempA, "--uchimealns"); 
238                                 cPara.push_back(tempA);
239                                 char* tempa = new char[alns.length()+1];
240                                 //strcpy(tempa, alns.c_str());
241                                 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
242                                 cPara.push_back(tempa);
243                         }
244                         
245                         if (pDataArray->useAbskew) {
246                                 char* tempskew = new char[9];
247                                 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
248                                 //strcpy(tempskew, "--abskew"); 
249                                 cPara.push_back(tempskew);
250                                 char* tempSkew = new char[pDataArray->abskew.length()+1];
251                                 //strcpy(tempSkew, abskew.c_str());
252                                 *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
253                                 cPara.push_back(tempSkew);
254                         }
255                         
256                         if (pDataArray->useMinH) {
257                                 char* tempminh = new char[7]; 
258                                 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
259                                 //strcpy(tempminh, "--minh"); 
260                                 cPara.push_back(tempminh);
261                                 char* tempMinH = new char[pDataArray->minh.length()+1];
262                                 *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
263                                 //strcpy(tempMinH, minh.c_str());
264                                 cPara.push_back(tempMinH);
265                         }
266                         
267                         if (pDataArray->useMindiv) {
268                                 char* tempmindiv = new char[9]; 
269                                 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
270                                 //strcpy(tempmindiv, "--mindiv"); 
271                                 cPara.push_back(tempmindiv);
272                                 char* tempMindiv = new char[pDataArray->mindiv.length()+1];
273                                 *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
274                                 //strcpy(tempMindiv, mindiv.c_str());
275                                 cPara.push_back(tempMindiv);
276                         }
277                         
278                         if (pDataArray->useXn) {
279                                 char* tempxn = new char[5]; 
280                                 //strcpy(tempxn, "--xn"); 
281                                 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
282                                 cPara.push_back(tempxn);
283                                 char* tempXn = new char[pDataArray->xn.length()+1];
284                                 //strcpy(tempXn, xn.c_str());
285                                 *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
286                                 cPara.push_back(tempXn);
287                         }
288                         
289                         if (pDataArray->useDn) {
290                                 char* tempdn = new char[5]; 
291                                 //strcpy(tempdn, "--dn"); 
292                                 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
293                                 cPara.push_back(tempdn);
294                                 char* tempDn = new char[pDataArray->dn.length()+1];
295                                 *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
296                                 //strcpy(tempDn, dn.c_str());
297                                 cPara.push_back(tempDn);
298                         }
299                         
300                         if (pDataArray->useXa) {
301                                 char* tempxa = new char[5]; 
302                                 //strcpy(tempxa, "--xa"); 
303                                 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
304                                 cPara.push_back(tempxa);
305                                 char* tempXa = new char[pDataArray->xa.length()+1];
306                                 *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
307                                 //strcpy(tempXa, xa.c_str());
308                                 cPara.push_back(tempXa);
309                         }
310                         
311                         if (pDataArray->useChunks) {
312                                 char* tempchunks = new char[9]; 
313                                 //strcpy(tempchunks, "--chunks"); 
314                                 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
315                                 cPara.push_back(tempchunks);
316                                 char* tempChunks = new char[pDataArray->chunks.length()+1];
317                                 *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
318                                 //strcpy(tempChunks, chunks.c_str());
319                                 cPara.push_back(tempChunks);
320                         }
321                         
322                         if (pDataArray->useMinchunk) {
323                                 char* tempminchunk = new char[11]; 
324                                 //strcpy(tempminchunk, "--minchunk"); 
325                                 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
326                                 cPara.push_back(tempminchunk);
327                                 char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
328                                 *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
329                                 //strcpy(tempMinchunk, minchunk.c_str());
330                                 cPara.push_back(tempMinchunk);
331                         }
332                         
333                         if (pDataArray->useIdsmoothwindow) {
334                                 char* tempidsmoothwindow = new char[17]; 
335                                 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
336                                 //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
337                                 cPara.push_back(tempidsmoothwindow);
338                                 char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
339                                 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
340                                 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
341                                 cPara.push_back(tempIdsmoothwindow);
342                         }
343                         
344                         if (pDataArray->useMaxp) {
345                                 char* tempmaxp = new char[7]; 
346                                 //strcpy(tempmaxp, "--maxp"); 
347                                 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
348                                 cPara.push_back(tempmaxp);
349                                 char* tempMaxp = new char[pDataArray->maxp.length()+1];
350                                 *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
351                                 //strcpy(tempMaxp, maxp.c_str());
352                                 cPara.push_back(tempMaxp);
353                         }
354                         
355                         if (!pDataArray->skipgaps) {
356                                 char* tempskipgaps = new char[13]; 
357                                 //strcpy(tempskipgaps, "--[no]skipgaps");
358                                 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
359                                 cPara.push_back(tempskipgaps);
360                         }
361                         
362                         if (!pDataArray->skipgaps2) {
363                                 char* tempskipgaps2 = new char[14]; 
364                                 //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
365                                 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
366                                 cPara.push_back(tempskipgaps2);
367                         }
368                         
369                         if (pDataArray->useMinlen) {
370                                 char* tempminlen = new char[9]; 
371                                 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
372                                 //strcpy(tempminlen, "--minlen"); 
373                                 cPara.push_back(tempminlen);
374                                 char* tempMinlen = new char[pDataArray->minlen.length()+1];
375                                 //strcpy(tempMinlen, minlen.c_str());
376                                 *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
377                                 cPara.push_back(tempMinlen);
378                         }
379                         
380                         if (pDataArray->useMaxlen) {
381                                 char* tempmaxlen = new char[9]; 
382                                 //strcpy(tempmaxlen, "--maxlen"); 
383                                 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
384                                 cPara.push_back(tempmaxlen);
385                                 char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
386                                 *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
387                                 //strcpy(tempMaxlen, maxlen.c_str());
388                                 cPara.push_back(tempMaxlen);
389                         }
390                         
391                         if (pDataArray->ucl) {
392                                 char* tempucl = new char[5]; 
393                                 strcpy(tempucl, "--ucl"); 
394                                 cPara.push_back(tempucl);
395                         }
396                         
397                         if (pDataArray->useQueryfract) {
398                                 char* tempqueryfract = new char[13]; 
399                                 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
400                                 //strcpy(tempqueryfract, "--queryfract"); 
401                                 cPara.push_back(tempqueryfract);
402                                 char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
403                                 *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
404                                 //strcpy(tempQueryfract, queryfract.c_str());
405                                 cPara.push_back(tempQueryfract);
406                         }
407                         
408                         
409                         char** uchimeParameters;
410                         uchimeParameters = new char*[cPara.size()];
411                         string commandString = "";
412                         for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
413                         //int numArgs = cPara.size();
414                         
415                         //uchime_main(numArgs, uchimeParameters); 
416                         //cout << "commandString = " << commandString << endl;
417                         commandString = "\"" + commandString + "\"";
418             
419             if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
420             
421                         system(commandString.c_str());
422                         
423                         //free memory
424                         for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
425                         delete[] uchimeParameters; 
426                         
427                         //remove "" from filenames
428                         outputFName = outputFName.substr(1, outputFName.length()-2);
429                         filename = filename.substr(1, filename.length()-2);
430                         alns = alns.substr(1, alns.length()-2);
431                         
432                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
433                         
434                         //create accnos file from uchime results
435                         ifstream in; 
436                         pDataArray->m->openInputFile(outputFName, in);
437                         
438                         ofstream out;
439                         pDataArray->m->openOutputFile(accnos, out);
440                         
441                         int num = 0;
442                         numChimeras = 0;
443                         while(!in.eof()) {
444                                 
445                                 if (pDataArray->m->control_pressed) { break; }
446                                 
447                                 string name = "";
448                                 string chimeraFlag = "";
449                                 in >> chimeraFlag >> name;
450                                 
451                                 //fix name 
452                                 name = name.substr(0, name.length()-1); //rip off last /
453                                 name = name.substr(0, name.find_last_of('/'));
454                                 
455                                 for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
456                                 pDataArray->m->gobble(in);
457                                 
458                                 if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
459                                 num++;
460                         }
461                         in.close();
462                         out.close();
463                         
464                         
465                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
466                         totalSeqs += num;
467                         pDataArray->numChimeras += numChimeras;
468                         
469                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
470                         
471                         //remove file made for uchime
472                         pDataArray->m->mothurRemove(filename);
473                         
474                         //append files
475                         pDataArray->m->appendFiles(outputFName, pDataArray->outputFName); pDataArray->m->mothurRemove(outputFName);
476                         pDataArray->m->appendFiles(accnos, pDataArray->accnos); pDataArray->m->mothurRemove(accnos);
477                         if (pDataArray->chimealns) { pDataArray->m->appendFiles(alns, pDataArray->alns); pDataArray->m->mothurRemove(alns); }
478                         
479                         pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(num) + " sequences from group " + pDataArray->groups[i] + ".");    pDataArray->m->mothurOutEndLine();                                      
480                         
481                 }       
482                 
483                 pDataArray->count = totalSeqs;
484                 if (pDataArray->hasCount) { delete cparser; } { delete parser; }
485                 return totalSeqs;
486                 
487         }
488         catch(exception& e) {
489                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeThreadFunction");
490                 exit(1);
491         }
492
493 /**************************************************************************************************/
494
495 static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){ 
496         uchimeData* pDataArray;
497         pDataArray = (uchimeData*)lpParam;
498         
499         try {
500                 
501                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
502                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
503                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
504                 
505                 int totalSeqs = 0;
506                 int numChimeras = 0;
507         
508                 int start = time(NULL);  if (pDataArray->m->control_pressed) { return 0; }
509                         
510                 //to allow for spaces in the path
511                 string outputFName = "\"" + pDataArray->outputFName + "\"";
512                 string filename = "\"" + pDataArray->filename + "\"";
513                 string alns = "\"" + pDataArray->alns+ "\"";
514                 string templatefile = "\"" + pDataArray->templatefile + "\"";
515                 string accnos = pDataArray->accnos;
516                 
517                 vector<char*> cPara;
518                 
519                 string uchimeCommand = pDataArray->uchimeLocation;
520         uchimeCommand = "\"" + uchimeCommand + "\"";
521         
522         char* tempUchime;
523         tempUchime= new char[uchimeCommand.length()+1]; 
524         *tempUchime = '\0';
525         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
526         cPara.push_back(tempUchime);
527                 
528         string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
529         //prepFile(filename.substr(1, filename.length()-2), outputFileName);
530         //prepFile(filename, outputFileName);
531         /******************************************/
532         ifstream in23;
533         m->openInputFile((filename.substr(1, filename.length()-2)), in23);
534         
535         ofstream out23;
536         m->openOutputFile(outputFileName, out23);
537         
538         while (!in23.eof()) {
539             if (m->control_pressed) { break;  }
540             
541             Sequence seq(in23); m->gobble(in23);
542             
543             if (seq.getName() != "") { seq.printSequence(out23); }
544         }
545         in23.close();
546         out23.close();
547         /******************************************/
548         
549         filename = outputFileName;
550         filename = "\"" + filename + "\"";
551         
552         //add reference file
553                 char* tempRef = new char[5]; 
554                 //strcpy(tempRef, "--db"); 
555                 *tempRef = '\0'; strncat(tempRef, "--db", 4);
556                 cPara.push_back(tempRef);  
557                 char* tempR = new char[templatefile.length()+1];
558                 //strcpy(tempR, templatefile.c_str());
559                 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
560                 cPara.push_back(tempR);
561         
562                 char* tempIn = new char[8]; 
563                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
564                 //strcpy(tempIn, "--input"); 
565                 cPara.push_back(tempIn);
566                 char* temp = new char[filename.length()+1];
567                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
568                 //strcpy(temp, filename.c_str());
569                 cPara.push_back(temp);
570                 
571                 char* tempO = new char[12]; 
572                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
573                 //strcpy(tempO, "--uchimeout"); 
574                 cPara.push_back(tempO);
575                 char* tempout = new char[outputFName.length()+1];
576                 //strcpy(tempout, outputFName.c_str());
577                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
578                 cPara.push_back(tempout);
579                 
580                 if (pDataArray->chimealns) {
581                         char* tempA = new char[13]; 
582                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
583                         //strcpy(tempA, "--uchimealns"); 
584                         cPara.push_back(tempA);
585                         char* tempa = new char[alns.length()+1];
586                         //strcpy(tempa, alns.c_str());
587                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
588                         cPara.push_back(tempa);
589                 }
590                 
591                 if (pDataArray->useAbskew) {
592                         char* tempskew = new char[9];
593                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
594                         //strcpy(tempskew, "--abskew"); 
595                         cPara.push_back(tempskew);
596                         char* tempSkew = new char[pDataArray->abskew.length()+1];
597                         //strcpy(tempSkew, abskew.c_str());
598                         *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
599                         cPara.push_back(tempSkew);
600                 }
601                 
602                 if (pDataArray->useMinH) {
603                         char* tempminh = new char[7]; 
604                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
605                         //strcpy(tempminh, "--minh"); 
606                         cPara.push_back(tempminh);
607                         char* tempMinH = new char[pDataArray->minh.length()+1];
608                         *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
609                         //strcpy(tempMinH, minh.c_str());
610                         cPara.push_back(tempMinH);
611                 }
612                 
613                 if (pDataArray->useMindiv) {
614                         char* tempmindiv = new char[9]; 
615                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
616                         //strcpy(tempmindiv, "--mindiv"); 
617                         cPara.push_back(tempmindiv);
618                         char* tempMindiv = new char[pDataArray->mindiv.length()+1];
619                         *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
620                         //strcpy(tempMindiv, mindiv.c_str());
621                         cPara.push_back(tempMindiv);
622                 }
623                 
624                 if (pDataArray->useXn) {
625                         char* tempxn = new char[5]; 
626                         //strcpy(tempxn, "--xn"); 
627                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
628                         cPara.push_back(tempxn);
629                         char* tempXn = new char[pDataArray->xn.length()+1];
630                         //strcpy(tempXn, xn.c_str());
631                         *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
632                         cPara.push_back(tempXn);
633                 }
634                 
635                 if (pDataArray->useDn) {
636                         char* tempdn = new char[5]; 
637                         //strcpy(tempdn, "--dn"); 
638                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
639                         cPara.push_back(tempdn);
640                         char* tempDn = new char[pDataArray->dn.length()+1];
641                         *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
642                         //strcpy(tempDn, dn.c_str());
643                         cPara.push_back(tempDn);
644                 }
645                 
646                 if (pDataArray->useXa) {
647                         char* tempxa = new char[5]; 
648                         //strcpy(tempxa, "--xa"); 
649                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
650                         cPara.push_back(tempxa);
651                         char* tempXa = new char[pDataArray->xa.length()+1];
652                         *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
653                         //strcpy(tempXa, xa.c_str());
654                         cPara.push_back(tempXa);
655                 }
656                 
657                 if (pDataArray->useChunks) {
658                         char* tempchunks = new char[9]; 
659                         //strcpy(tempchunks, "--chunks"); 
660                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
661                         cPara.push_back(tempchunks);
662                         char* tempChunks = new char[pDataArray->chunks.length()+1];
663                         *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
664                         //strcpy(tempChunks, chunks.c_str());
665                         cPara.push_back(tempChunks);
666                 }
667                 
668                 if (pDataArray->useMinchunk) {
669                         char* tempminchunk = new char[11]; 
670                         //strcpy(tempminchunk, "--minchunk"); 
671                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
672                         cPara.push_back(tempminchunk);
673                         char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
674                         *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
675                         //strcpy(tempMinchunk, minchunk.c_str());
676                         cPara.push_back(tempMinchunk);
677                 }
678                 
679                 if (pDataArray->useIdsmoothwindow) {
680                         char* tempidsmoothwindow = new char[17]; 
681                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
682                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
683                         cPara.push_back(tempidsmoothwindow);
684                         char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
685                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
686                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
687                         cPara.push_back(tempIdsmoothwindow);
688                 }
689                 
690                 if (pDataArray->useMaxp) {
691                         char* tempmaxp = new char[7]; 
692                         //strcpy(tempmaxp, "--maxp"); 
693                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
694                         cPara.push_back(tempmaxp);
695                         char* tempMaxp = new char[pDataArray->maxp.length()+1];
696                         *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
697                         //strcpy(tempMaxp, maxp.c_str());
698                         cPara.push_back(tempMaxp);
699                 }
700                 
701                 if (!pDataArray->skipgaps) {
702                         char* tempskipgaps = new char[13]; 
703                         //strcpy(tempskipgaps, "--[no]skipgaps");
704                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
705                         cPara.push_back(tempskipgaps);
706                 }
707                 
708                 if (!pDataArray->skipgaps2) {
709                         char* tempskipgaps2 = new char[14]; 
710                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
711                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
712                         cPara.push_back(tempskipgaps2);
713                 }
714                 
715                 if (pDataArray->useMinlen) {
716                         char* tempminlen = new char[9]; 
717                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
718                         //strcpy(tempminlen, "--minlen"); 
719                         cPara.push_back(tempminlen);
720                         char* tempMinlen = new char[pDataArray->minlen.length()+1];
721                         //strcpy(tempMinlen, minlen.c_str());
722                         *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
723                         cPara.push_back(tempMinlen);
724                 }
725                 
726                 if (pDataArray->useMaxlen) {
727                         char* tempmaxlen = new char[9]; 
728                         //strcpy(tempmaxlen, "--maxlen"); 
729                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
730                         cPara.push_back(tempmaxlen);
731                         char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
732                         *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
733                         //strcpy(tempMaxlen, maxlen.c_str());
734                         cPara.push_back(tempMaxlen);
735                 }
736                 
737                 if (pDataArray->ucl) {
738                         char* tempucl = new char[5]; 
739                         strcpy(tempucl, "--ucl"); 
740                         cPara.push_back(tempucl);
741                 }
742                 
743                 if (pDataArray->useQueryfract) {
744                         char* tempqueryfract = new char[13]; 
745                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
746                         //strcpy(tempqueryfract, "--queryfract"); 
747                         cPara.push_back(tempqueryfract);
748                         char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
749                         *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
750                         //strcpy(tempQueryfract, queryfract.c_str());
751                         cPara.push_back(tempQueryfract);
752                 }
753                 
754                 
755                 char** uchimeParameters;
756                 uchimeParameters = new char*[cPara.size()];
757                 string commandString = "";
758                 for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
759                 //int numArgs = cPara.size();
760                 
761                 //uchime_main(numArgs, uchimeParameters); 
762                 //cout << "commandString = " << commandString << endl;
763         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
764                 system(commandString.c_str());
765                 
766                 //free memory
767                 for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
768                 delete[] uchimeParameters; 
769                 
770                 //remove "" from filenames
771                 outputFName = outputFName.substr(1, outputFName.length()-2);
772                 filename = filename.substr(1, filename.length()-2);
773                 alns = alns.substr(1, alns.length()-2);
774                 
775                 if (pDataArray->m->control_pressed) { return 0; }
776                 
777                 //create accnos file from uchime results
778                 ifstream in; 
779                 pDataArray->m->openInputFile(outputFName, in);
780                 
781                 ofstream out;
782                 pDataArray->m->openOutputFile(accnos, out);
783                 
784                 numChimeras = 0;
785                 while(!in.eof()) {
786                         
787                         if (pDataArray->m->control_pressed) { break; }
788                         
789                         string name = "";
790                         string chimeraFlag = "";
791                         in >> chimeraFlag >> name;
792                         
793                         for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
794                         pDataArray->m->gobble(in);
795                         
796                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
797                         totalSeqs++;
798                 }
799                 in.close();
800                 out.close();
801                 
802                 if (pDataArray->m->control_pressed) { return 0; }
803                 
804                 pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences.");       pDataArray->m->mothurOutEndLine();                                      
805         
806                 pDataArray->count = totalSeqs;
807                 pDataArray->numChimeras = numChimeras;
808                 return totalSeqs;
809                 
810         }
811         catch(exception& e) {
812                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeSeqsThreadFunction");
813                 exit(1);
814         }
815
816
817 #endif
818
819 /**************************************************************************************************/
820
821
822 #endif
823
824