]> git.donarmstrong.com Git - mothur.git/blob - chimerauchimecommand.h
735c97d1269848b94ac5a01926f93e08d1ccd5ef
[mothur.git] / chimerauchimecommand.h
1 #ifndef CHIMERAUCHIMECOMMAND_H
2 #define CHIMERAUCHIMECOMMAND_H
3
4
5 /*
6  *  chimerauchimecommand.h
7  *  Mothur
8  *
9  *  Created by westcott on 5/13/11.
10  *  Copyright 2011 Schloss Lab. All rights reserved.
11  *
12  */
13
14 #include "mothur.h"
15 #include "command.hpp"
16 #include "sequenceparser.h"
17 #include "counttable.h"
18 #include "sequencecountparser.h"
19
20 /***********************************************************/
21
22 class ChimeraUchimeCommand : public Command {
23 public:
24         ChimeraUchimeCommand(string);
25         ChimeraUchimeCommand();
26         ~ChimeraUchimeCommand() {}
27         
28         vector<string> setParameters();
29         string getCommandName()                 { return "chimera.uchime";              }
30         string getCommandCategory()             { return "Sequence Processing"; }
31         
32         string getHelpString(); 
33     string getOutputPattern(string);    
34         string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code was donated to the public domain.\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection.  Bioinformatics 27:2194.\nhttp://www.mothur.org/wiki/Chimera.uchime\n"; }
35         string getDescription()         { return "detect chimeric sequences"; }
36         
37         int execute(); 
38         void help() { m->mothurOut(getHelpString()); }          
39         
40 private:
41         struct linePair {
42                 int start;
43                 int end;
44                 linePair(int i, int j) : start(i), end(j) {}
45         };
46         
47         vector<int> processIDS;   //processid
48         int driver(string, string, string, string, int&);
49         int createProcesses(string, string, string, string, int&);
50                 
51         bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName, dups;
52         string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation;
53         int processors;
54         
55         SequenceParser* sparser;
56     SequenceCountParser* cparser;
57         vector<string> outputNames;
58         vector<string> fastaFileNames;
59         vector<string> nameFileNames;
60         vector<string> groupFileNames;
61         
62         string getNamesFile(string&);
63         int readFasta(string, map<string, string>&);
64         int printFile(vector<seqPriorityNode>&, string);
65         int deconvoluteResults(map<string, string>&, string, string, string);
66         int driverGroups(string, string, string, string, int, int, vector<string>);
67         int createProcessesGroups(string, string, string, string, vector<string>, string, string, string);
68     int prepFile(string filename, string);
69
70
71 };
72
73 /***********************************************************/
74 /**************************************************************************************************/
75 //custom data structure for threads to use.
76 // This is passed by void pointer so it can be any data type
77 // that can be passed using a single void pointer (LPVOID).
78 struct uchimeData {
79         string fastafile; 
80         string namefile; 
81         string groupfile;
82         string outputFName;
83         string accnos, alns, filename, templatefile, uchimeLocation;
84         MothurOut* m;
85         int start;
86         int end;
87         int threadID, count, numChimeras;
88         vector<string> groups;
89         bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
90         string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
91         
92         uchimeData(){}
93         uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
94                 fastafile = f;
95                 namefile = n;
96                 groupfile = g;
97                 filename = file;
98                 outputFName = o;
99                 templatefile = t;
100                 accnos = ac;
101                 alns = al;
102                 m = mout;
103                 start = st;
104                 end = en;
105                 threadID = tid;
106                 groups = gr;
107                 count = 0;
108                 numChimeras = 0;
109         uchimeLocation = uloc;
110         }
111         void setBooleans(bool Abskew, bool calns, bool MinH, bool Mindiv, bool Xn, bool Dn, bool Xa, bool Chunks, bool Minchunk, bool Idsmoothwindow, bool Minsmoothid, bool Maxp, bool skipgap, bool skipgap2, bool Minlen, bool Maxlen, bool uc, bool Queryfract, bool hc) {
112                 useAbskew = Abskew;
113                 chimealns = calns;
114                 useMinH = MinH;
115                 useMindiv = Mindiv;
116                 useXn = Xn;
117                 useDn = Dn;
118                 useXa = Xa;
119                 useChunks = Chunks;
120                 useMinchunk = Minchunk;
121                 useIdsmoothwindow = Idsmoothwindow;
122                 useMinsmoothid = Minsmoothid;
123                 useMaxp = Maxp;
124                 skipgaps = skipgap;
125                 skipgaps2 = skipgap2;
126                 useMinlen = Minlen;
127                 useMaxlen = Maxlen;
128                 ucl = uc;
129                 useQueryfract = Queryfract;
130         hasCount = hc;
131         }
132         
133         void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac) {
134                 abskew = abske;
135                 minh = min;
136                 mindiv = mindi;
137                 xn = x;
138                 dn = d;
139                 xa = xa2;
140                 chunks = chunk;
141                 minchunk = minchun;
142                 idsmoothwindow = idsmoothwindo;
143                 minsmoothid = minsmoothi;
144                 maxp = max;
145                 minlen = minle;
146                 maxlen = maxle;
147                 queryfract = queryfrac;
148         }
149 };
150
151 /**************************************************************************************************/
152 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
153 #else
154 static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ 
155         uchimeData* pDataArray;
156         pDataArray = (uchimeData*)lpParam;
157         
158         try {
159                 
160                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
161                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
162                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
163                 
164                 //clears files
165                 ofstream out, out1, out2;
166                 pDataArray->m->openOutputFile(pDataArray->outputFName, out); out.close(); 
167                 pDataArray->m->openOutputFile(pDataArray->accnos, out1); out1.close();
168                 if (pDataArray->chimealns) { pDataArray->m->openOutputFile(pDataArray->alns, out2); out2.close(); }
169                 
170                 //parse fasta and name file by group
171                 SequenceParser* parser;
172         SequenceCountParser* cparser;
173                 if (pDataArray->hasCount) {
174             CountTable* ct = new CountTable();
175             ct->readTable(pDataArray->namefile);
176             cparser = new SequenceCountParser(pDataArray->fastafile, *ct);
177             delete ct;
178         }else {
179             if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);  }
180             else                                                        { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                                            }
181         }
182                 
183                 int totalSeqs = 0;
184                 int numChimeras = 0;
185                 
186                 for (int i = pDataArray->start; i < pDataArray->end; i++) {
187                         int start = time(NULL);  if (pDataArray->m->control_pressed) {  if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
188                         
189             
190                         int error;
191             if (pDataArray->hasCount) { 
192                 error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete cparser; return 0; }
193             }else {
194                error = parser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) {  delete parser; return 0; } 
195             }
196                         
197                         //int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
198                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
199                         
200                         //to allow for spaces in the path
201                         string outputFName = "\"" + pDataArray->outputFName+pDataArray->groups[i] + "\"";
202                         string filename = "\"" + pDataArray->filename + "\"";
203                         string alns = "\"" + pDataArray->alns+pDataArray->groups[i] + "\"";
204                         string accnos = pDataArray->accnos+pDataArray->groups[i];
205                         
206                         vector<char*> cPara;
207                         
208             string uchimeCommand = pDataArray->uchimeLocation;
209             uchimeCommand = "\"" + uchimeCommand + "\"";
210                         
211                         char* tempUchime;
212                         tempUchime= new char[uchimeCommand.length()+1]; 
213                         *tempUchime = '\0';
214                         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
215                         cPara.push_back(tempUchime);
216                         
217                         char* tempIn = new char[8]; 
218                         *tempIn = '\0'; strncat(tempIn, "--input", 7);
219                         //strcpy(tempIn, "--input"); 
220                         cPara.push_back(tempIn);
221                         char* temp = new char[filename.length()+1];
222                         *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
223                         //strcpy(temp, filename.c_str());
224                         cPara.push_back(temp);
225                         
226                         char* tempO = new char[12]; 
227                         *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
228                         //strcpy(tempO, "--uchimeout"); 
229                         cPara.push_back(tempO);
230                         char* tempout = new char[outputFName.length()+1];
231                         //strcpy(tempout, outputFName.c_str());
232                         *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
233                         cPara.push_back(tempout);
234                         
235                         if (pDataArray->chimealns) {
236                                 char* tempA = new char[13]; 
237                                 *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
238                                 //strcpy(tempA, "--uchimealns"); 
239                                 cPara.push_back(tempA);
240                                 char* tempa = new char[alns.length()+1];
241                                 //strcpy(tempa, alns.c_str());
242                                 *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
243                                 cPara.push_back(tempa);
244                         }
245                         
246                         if (pDataArray->useAbskew) {
247                                 char* tempskew = new char[9];
248                                 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
249                                 //strcpy(tempskew, "--abskew"); 
250                                 cPara.push_back(tempskew);
251                                 char* tempSkew = new char[pDataArray->abskew.length()+1];
252                                 //strcpy(tempSkew, abskew.c_str());
253                                 *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
254                                 cPara.push_back(tempSkew);
255                         }
256                         
257                         if (pDataArray->useMinH) {
258                                 char* tempminh = new char[7]; 
259                                 *tempminh = '\0'; strncat(tempminh, "--minh", 6);
260                                 //strcpy(tempminh, "--minh"); 
261                                 cPara.push_back(tempminh);
262                                 char* tempMinH = new char[pDataArray->minh.length()+1];
263                                 *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
264                                 //strcpy(tempMinH, minh.c_str());
265                                 cPara.push_back(tempMinH);
266                         }
267                         
268                         if (pDataArray->useMindiv) {
269                                 char* tempmindiv = new char[9]; 
270                                 *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
271                                 //strcpy(tempmindiv, "--mindiv"); 
272                                 cPara.push_back(tempmindiv);
273                                 char* tempMindiv = new char[pDataArray->mindiv.length()+1];
274                                 *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
275                                 //strcpy(tempMindiv, mindiv.c_str());
276                                 cPara.push_back(tempMindiv);
277                         }
278                         
279                         if (pDataArray->useXn) {
280                                 char* tempxn = new char[5]; 
281                                 //strcpy(tempxn, "--xn"); 
282                                 *tempxn = '\0'; strncat(tempxn, "--xn", 4);
283                                 cPara.push_back(tempxn);
284                                 char* tempXn = new char[pDataArray->xn.length()+1];
285                                 //strcpy(tempXn, xn.c_str());
286                                 *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
287                                 cPara.push_back(tempXn);
288                         }
289                         
290                         if (pDataArray->useDn) {
291                                 char* tempdn = new char[5]; 
292                                 //strcpy(tempdn, "--dn"); 
293                                 *tempdn = '\0'; strncat(tempdn, "--dn", 4);
294                                 cPara.push_back(tempdn);
295                                 char* tempDn = new char[pDataArray->dn.length()+1];
296                                 *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
297                                 //strcpy(tempDn, dn.c_str());
298                                 cPara.push_back(tempDn);
299                         }
300                         
301                         if (pDataArray->useXa) {
302                                 char* tempxa = new char[5]; 
303                                 //strcpy(tempxa, "--xa"); 
304                                 *tempxa = '\0'; strncat(tempxa, "--xa", 4);
305                                 cPara.push_back(tempxa);
306                                 char* tempXa = new char[pDataArray->xa.length()+1];
307                                 *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
308                                 //strcpy(tempXa, xa.c_str());
309                                 cPara.push_back(tempXa);
310                         }
311                         
312                         if (pDataArray->useChunks) {
313                                 char* tempchunks = new char[9]; 
314                                 //strcpy(tempchunks, "--chunks"); 
315                                 *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
316                                 cPara.push_back(tempchunks);
317                                 char* tempChunks = new char[pDataArray->chunks.length()+1];
318                                 *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
319                                 //strcpy(tempChunks, chunks.c_str());
320                                 cPara.push_back(tempChunks);
321                         }
322                         
323                         if (pDataArray->useMinchunk) {
324                                 char* tempminchunk = new char[11]; 
325                                 //strcpy(tempminchunk, "--minchunk"); 
326                                 *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
327                                 cPara.push_back(tempminchunk);
328                                 char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
329                                 *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
330                                 //strcpy(tempMinchunk, minchunk.c_str());
331                                 cPara.push_back(tempMinchunk);
332                         }
333                         
334                         if (pDataArray->useIdsmoothwindow) {
335                                 char* tempidsmoothwindow = new char[17]; 
336                                 *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
337                                 //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
338                                 cPara.push_back(tempidsmoothwindow);
339                                 char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
340                                 *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
341                                 //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
342                                 cPara.push_back(tempIdsmoothwindow);
343                         }
344                         
345                         if (pDataArray->useMaxp) {
346                                 char* tempmaxp = new char[7]; 
347                                 //strcpy(tempmaxp, "--maxp"); 
348                                 *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
349                                 cPara.push_back(tempmaxp);
350                                 char* tempMaxp = new char[pDataArray->maxp.length()+1];
351                                 *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
352                                 //strcpy(tempMaxp, maxp.c_str());
353                                 cPara.push_back(tempMaxp);
354                         }
355                         
356                         if (!pDataArray->skipgaps) {
357                                 char* tempskipgaps = new char[13]; 
358                                 //strcpy(tempskipgaps, "--[no]skipgaps");
359                                 *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
360                                 cPara.push_back(tempskipgaps);
361                         }
362                         
363                         if (!pDataArray->skipgaps2) {
364                                 char* tempskipgaps2 = new char[14]; 
365                                 //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
366                                 *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
367                                 cPara.push_back(tempskipgaps2);
368                         }
369                         
370                         if (pDataArray->useMinlen) {
371                                 char* tempminlen = new char[9]; 
372                                 *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
373                                 //strcpy(tempminlen, "--minlen"); 
374                                 cPara.push_back(tempminlen);
375                                 char* tempMinlen = new char[pDataArray->minlen.length()+1];
376                                 //strcpy(tempMinlen, minlen.c_str());
377                                 *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
378                                 cPara.push_back(tempMinlen);
379                         }
380                         
381                         if (pDataArray->useMaxlen) {
382                                 char* tempmaxlen = new char[9]; 
383                                 //strcpy(tempmaxlen, "--maxlen"); 
384                                 *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
385                                 cPara.push_back(tempmaxlen);
386                                 char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
387                                 *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
388                                 //strcpy(tempMaxlen, maxlen.c_str());
389                                 cPara.push_back(tempMaxlen);
390                         }
391                         
392                         if (pDataArray->ucl) {
393                                 char* tempucl = new char[5]; 
394                                 strcpy(tempucl, "--ucl"); 
395                                 cPara.push_back(tempucl);
396                         }
397                         
398                         if (pDataArray->useQueryfract) {
399                                 char* tempqueryfract = new char[13]; 
400                                 *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
401                                 //strcpy(tempqueryfract, "--queryfract"); 
402                                 cPara.push_back(tempqueryfract);
403                                 char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
404                                 *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
405                                 //strcpy(tempQueryfract, queryfract.c_str());
406                                 cPara.push_back(tempQueryfract);
407                         }
408                         
409                         
410                         char** uchimeParameters;
411                         uchimeParameters = new char*[cPara.size()];
412                         string commandString = "";
413                         for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
414                         //int numArgs = cPara.size();
415                         
416                         //uchime_main(numArgs, uchimeParameters); 
417                         //cout << "commandString = " << commandString << endl;
418                         commandString = "\"" + commandString + "\"";
419             
420             if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
421             
422                         system(commandString.c_str());
423                         
424                         //free memory
425                         for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
426                         delete[] uchimeParameters; 
427                         
428                         //remove "" from filenames
429                         outputFName = outputFName.substr(1, outputFName.length()-2);
430                         filename = filename.substr(1, filename.length()-2);
431                         alns = alns.substr(1, alns.length()-2);
432                         
433                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
434                         
435                         //create accnos file from uchime results
436                         ifstream in; 
437                         pDataArray->m->openInputFile(outputFName, in);
438                         
439                         ofstream out;
440                         pDataArray->m->openOutputFile(accnos, out);
441                         
442                         int num = 0;
443                         numChimeras = 0;
444                         while(!in.eof()) {
445                                 
446                                 if (pDataArray->m->control_pressed) { break; }
447                                 
448                                 string name = "";
449                                 string chimeraFlag = "";
450                                 in >> chimeraFlag >> name;
451                                 
452                                 //fix name 
453                                 name = name.substr(0, name.length()-1); //rip off last /
454                                 name = name.substr(0, name.find_last_of('/'));
455                                 
456                                 for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
457                                 pDataArray->m->gobble(in);
458                                 
459                                 if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
460                                 num++;
461                         }
462                         in.close();
463                         out.close();
464                         
465                         
466                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
467                         totalSeqs += num;
468                         pDataArray->numChimeras += numChimeras;
469                         
470                         if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } return 0; }
471                         
472                         //remove file made for uchime
473                         pDataArray->m->mothurRemove(filename);
474                         
475                         //append files
476                         pDataArray->m->appendFiles(outputFName, pDataArray->outputFName); pDataArray->m->mothurRemove(outputFName);
477                         pDataArray->m->appendFiles(accnos, pDataArray->accnos); pDataArray->m->mothurRemove(accnos);
478                         if (pDataArray->chimealns) { pDataArray->m->appendFiles(alns, pDataArray->alns); pDataArray->m->mothurRemove(alns); }
479                         
480                         pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(num) + " sequences from group " + pDataArray->groups[i] + ".");    pDataArray->m->mothurOutEndLine();                                      
481                         
482                 }       
483                 
484                 pDataArray->count = totalSeqs;
485                 if (pDataArray->hasCount) { delete cparser; } { delete parser; }
486                 return totalSeqs;
487                 
488         }
489         catch(exception& e) {
490                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeThreadFunction");
491                 exit(1);
492         }
493
494 /**************************************************************************************************/
495
496 static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){ 
497         uchimeData* pDataArray;
498         pDataArray = (uchimeData*)lpParam;
499         
500         try {
501                 
502                 pDataArray->outputFName = pDataArray->m->getFullPathName(pDataArray->outputFName);
503                 pDataArray->filename = pDataArray->m->getFullPathName(pDataArray->filename);
504                 pDataArray->alns = pDataArray->m->getFullPathName(pDataArray->alns);
505                 
506                 int totalSeqs = 0;
507                 int numChimeras = 0;
508         
509                 int start = time(NULL);  if (pDataArray->m->control_pressed) { return 0; }
510                         
511                 //to allow for spaces in the path
512                 string outputFName = "\"" + pDataArray->outputFName + "\"";
513                 string filename = "\"" + pDataArray->filename + "\"";
514                 string alns = "\"" + pDataArray->alns+ "\"";
515                 string templatefile = "\"" + pDataArray->templatefile + "\"";
516                 string accnos = pDataArray->accnos;
517                 
518                 vector<char*> cPara;
519                 
520                 string uchimeCommand = pDataArray->uchimeLocation;
521         uchimeCommand = "\"" + uchimeCommand + "\"";
522         
523         char* tempUchime;
524         tempUchime= new char[uchimeCommand.length()+1]; 
525         *tempUchime = '\0';
526         strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
527         cPara.push_back(tempUchime);
528                 
529         string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted";
530         //prepFile(filename.substr(1, filename.length()-2), outputFileName);
531         //prepFile(filename, outputFileName);
532         /******************************************/
533         ifstream in23;
534         pDataArray->m->openInputFile((filename.substr(1, filename.length()-2)), in23);
535         
536         ofstream out23;
537         pDataArray->m->openOutputFile(outputFileName, out23);
538         
539         while (!in23.eof()) {
540             if (pDataArray->m->control_pressed) { break;  }
541             
542             Sequence seq(in23); pDataArray->m->gobble(in23);
543             
544             if (seq.getName() != "") { seq.printSequence(out23); }
545         }
546         in23.close();
547         out23.close();
548         /******************************************/
549         
550         filename = outputFileName;
551         filename = "\"" + filename + "\"";
552         
553         //add reference file
554                 char* tempRef = new char[5]; 
555                 //strcpy(tempRef, "--db"); 
556                 *tempRef = '\0'; strncat(tempRef, "--db", 4);
557                 cPara.push_back(tempRef);  
558                 char* tempR = new char[templatefile.length()+1];
559                 //strcpy(tempR, templatefile.c_str());
560                 *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
561                 cPara.push_back(tempR);
562         
563                 char* tempIn = new char[8]; 
564                 *tempIn = '\0'; strncat(tempIn, "--input", 7);
565                 //strcpy(tempIn, "--input"); 
566                 cPara.push_back(tempIn);
567                 char* temp = new char[filename.length()+1];
568                 *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
569                 //strcpy(temp, filename.c_str());
570                 cPara.push_back(temp);
571                 
572                 char* tempO = new char[12]; 
573                 *tempO = '\0'; strncat(tempO, "--uchimeout", 11);
574                 //strcpy(tempO, "--uchimeout"); 
575                 cPara.push_back(tempO);
576                 char* tempout = new char[outputFName.length()+1];
577                 //strcpy(tempout, outputFName.c_str());
578                 *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length());
579                 cPara.push_back(tempout);
580                 
581                 if (pDataArray->chimealns) {
582                         char* tempA = new char[13]; 
583                         *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
584                         //strcpy(tempA, "--uchimealns"); 
585                         cPara.push_back(tempA);
586                         char* tempa = new char[alns.length()+1];
587                         //strcpy(tempa, alns.c_str());
588                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
589                         cPara.push_back(tempa);
590                 }
591                 
592                 if (pDataArray->useAbskew) {
593                         char* tempskew = new char[9];
594                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
595                         //strcpy(tempskew, "--abskew"); 
596                         cPara.push_back(tempskew);
597                         char* tempSkew = new char[pDataArray->abskew.length()+1];
598                         //strcpy(tempSkew, abskew.c_str());
599                         *tempSkew = '\0'; strncat(tempSkew, pDataArray->abskew.c_str(), pDataArray->abskew.length());
600                         cPara.push_back(tempSkew);
601                 }
602                 
603                 if (pDataArray->useMinH) {
604                         char* tempminh = new char[7]; 
605                         *tempminh = '\0'; strncat(tempminh, "--minh", 6);
606                         //strcpy(tempminh, "--minh"); 
607                         cPara.push_back(tempminh);
608                         char* tempMinH = new char[pDataArray->minh.length()+1];
609                         *tempMinH = '\0'; strncat(tempMinH, pDataArray->minh.c_str(), pDataArray->minh.length());
610                         //strcpy(tempMinH, minh.c_str());
611                         cPara.push_back(tempMinH);
612                 }
613                 
614                 if (pDataArray->useMindiv) {
615                         char* tempmindiv = new char[9]; 
616                         *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
617                         //strcpy(tempmindiv, "--mindiv"); 
618                         cPara.push_back(tempmindiv);
619                         char* tempMindiv = new char[pDataArray->mindiv.length()+1];
620                         *tempMindiv = '\0'; strncat(tempMindiv, pDataArray->mindiv.c_str(), pDataArray->mindiv.length());
621                         //strcpy(tempMindiv, mindiv.c_str());
622                         cPara.push_back(tempMindiv);
623                 }
624                 
625                 if (pDataArray->useXn) {
626                         char* tempxn = new char[5]; 
627                         //strcpy(tempxn, "--xn"); 
628                         *tempxn = '\0'; strncat(tempxn, "--xn", 4);
629                         cPara.push_back(tempxn);
630                         char* tempXn = new char[pDataArray->xn.length()+1];
631                         //strcpy(tempXn, xn.c_str());
632                         *tempXn = '\0'; strncat(tempXn, pDataArray->xn.c_str(), pDataArray->xn.length());
633                         cPara.push_back(tempXn);
634                 }
635                 
636                 if (pDataArray->useDn) {
637                         char* tempdn = new char[5]; 
638                         //strcpy(tempdn, "--dn"); 
639                         *tempdn = '\0'; strncat(tempdn, "--dn", 4);
640                         cPara.push_back(tempdn);
641                         char* tempDn = new char[pDataArray->dn.length()+1];
642                         *tempDn = '\0'; strncat(tempDn, pDataArray->dn.c_str(), pDataArray->dn.length());
643                         //strcpy(tempDn, dn.c_str());
644                         cPara.push_back(tempDn);
645                 }
646                 
647                 if (pDataArray->useXa) {
648                         char* tempxa = new char[5]; 
649                         //strcpy(tempxa, "--xa"); 
650                         *tempxa = '\0'; strncat(tempxa, "--xa", 4);
651                         cPara.push_back(tempxa);
652                         char* tempXa = new char[pDataArray->xa.length()+1];
653                         *tempXa = '\0'; strncat(tempXa, pDataArray->xa.c_str(), pDataArray->xa.length());
654                         //strcpy(tempXa, xa.c_str());
655                         cPara.push_back(tempXa);
656                 }
657                 
658                 if (pDataArray->useChunks) {
659                         char* tempchunks = new char[9]; 
660                         //strcpy(tempchunks, "--chunks"); 
661                         *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8);
662                         cPara.push_back(tempchunks);
663                         char* tempChunks = new char[pDataArray->chunks.length()+1];
664                         *tempChunks = '\0'; strncat(tempChunks, pDataArray->chunks.c_str(), pDataArray->chunks.length());
665                         //strcpy(tempChunks, chunks.c_str());
666                         cPara.push_back(tempChunks);
667                 }
668                 
669                 if (pDataArray->useMinchunk) {
670                         char* tempminchunk = new char[11]; 
671                         //strcpy(tempminchunk, "--minchunk"); 
672                         *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10);
673                         cPara.push_back(tempminchunk);
674                         char* tempMinchunk = new char[pDataArray->minchunk.length()+1];
675                         *tempMinchunk = '\0'; strncat(tempMinchunk, pDataArray->minchunk.c_str(), pDataArray->minchunk.length());
676                         //strcpy(tempMinchunk, minchunk.c_str());
677                         cPara.push_back(tempMinchunk);
678                 }
679                 
680                 if (pDataArray->useIdsmoothwindow) {
681                         char* tempidsmoothwindow = new char[17]; 
682                         *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16);
683                         //strcpy(tempidsmoothwindow, "--idsmoothwindow"); 
684                         cPara.push_back(tempidsmoothwindow);
685                         char* tempIdsmoothwindow = new char[pDataArray->idsmoothwindow.length()+1];
686                         *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, pDataArray->idsmoothwindow.c_str(), pDataArray->idsmoothwindow.length());
687                         //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str());
688                         cPara.push_back(tempIdsmoothwindow);
689                 }
690                 
691                 if (pDataArray->useMaxp) {
692                         char* tempmaxp = new char[7]; 
693                         //strcpy(tempmaxp, "--maxp"); 
694                         *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6);
695                         cPara.push_back(tempmaxp);
696                         char* tempMaxp = new char[pDataArray->maxp.length()+1];
697                         *tempMaxp = '\0'; strncat(tempMaxp, pDataArray->maxp.c_str(), pDataArray->maxp.length());
698                         //strcpy(tempMaxp, maxp.c_str());
699                         cPara.push_back(tempMaxp);
700                 }
701                 
702                 if (!pDataArray->skipgaps) {
703                         char* tempskipgaps = new char[13]; 
704                         //strcpy(tempskipgaps, "--[no]skipgaps");
705                         *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12);
706                         cPara.push_back(tempskipgaps);
707                 }
708                 
709                 if (!pDataArray->skipgaps2) {
710                         char* tempskipgaps2 = new char[14]; 
711                         //strcpy(tempskipgaps2, "--[no]skipgaps2"); 
712                         *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13);
713                         cPara.push_back(tempskipgaps2);
714                 }
715                 
716                 if (pDataArray->useMinlen) {
717                         char* tempminlen = new char[9]; 
718                         *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8);
719                         //strcpy(tempminlen, "--minlen"); 
720                         cPara.push_back(tempminlen);
721                         char* tempMinlen = new char[pDataArray->minlen.length()+1];
722                         //strcpy(tempMinlen, minlen.c_str());
723                         *tempMinlen = '\0'; strncat(tempMinlen, pDataArray->minlen.c_str(), pDataArray->minlen.length());
724                         cPara.push_back(tempMinlen);
725                 }
726                 
727                 if (pDataArray->useMaxlen) {
728                         char* tempmaxlen = new char[9]; 
729                         //strcpy(tempmaxlen, "--maxlen"); 
730                         *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8);
731                         cPara.push_back(tempmaxlen);
732                         char* tempMaxlen = new char[pDataArray->maxlen.length()+1];
733                         *tempMaxlen = '\0'; strncat(tempMaxlen, pDataArray->maxlen.c_str(), pDataArray->maxlen.length());
734                         //strcpy(tempMaxlen, maxlen.c_str());
735                         cPara.push_back(tempMaxlen);
736                 }
737                 
738                 if (pDataArray->ucl) {
739                         char* tempucl = new char[5]; 
740                         strcpy(tempucl, "--ucl"); 
741                         cPara.push_back(tempucl);
742                 }
743                 
744                 if (pDataArray->useQueryfract) {
745                         char* tempqueryfract = new char[13]; 
746                         *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12);
747                         //strcpy(tempqueryfract, "--queryfract"); 
748                         cPara.push_back(tempqueryfract);
749                         char* tempQueryfract = new char[pDataArray->queryfract.length()+1];
750                         *tempQueryfract = '\0'; strncat(tempQueryfract, pDataArray->queryfract.c_str(), pDataArray->queryfract.length());
751                         //strcpy(tempQueryfract, queryfract.c_str());
752                         cPara.push_back(tempQueryfract);
753                 }
754                 
755                 
756                 char** uchimeParameters;
757                 uchimeParameters = new char*[cPara.size()];
758                 string commandString = "";
759                 for (int j = 0; j < cPara.size(); j++) {  uchimeParameters[j] = cPara[j];  commandString += toString(cPara[j]) + " "; } 
760                 //int numArgs = cPara.size();
761                 
762         commandString = "\"" + commandString + "\"";
763         
764                 //uchime_main(numArgs, uchimeParameters); 
765                 //cout << "commandString = " << commandString << endl;
766         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); }
767                 system(commandString.c_str());
768                 
769                 //free memory
770                 for(int j = 0; j < cPara.size(); j++)  {  delete cPara[j];  }
771                 delete[] uchimeParameters; 
772                 
773                 //remove "" from filenames
774                 outputFName = outputFName.substr(1, outputFName.length()-2);
775                 filename = filename.substr(1, filename.length()-2);
776                 alns = alns.substr(1, alns.length()-2);
777                 
778                 if (pDataArray->m->control_pressed) { return 0; }
779                 
780                 //create accnos file from uchime results
781                 ifstream in; 
782                 pDataArray->m->openInputFile(outputFName, in);
783                 
784                 ofstream out;
785                 pDataArray->m->openOutputFile(accnos, out);
786                 
787                 numChimeras = 0;
788                 while(!in.eof()) {
789                         
790                         if (pDataArray->m->control_pressed) { break; }
791                         
792                         string name = "";
793                         string chimeraFlag = "";
794                         in >> chimeraFlag >> name;
795                         
796                         for (int j = 0; j < 15; j++) {  in >> chimeraFlag; }
797                         pDataArray->m->gobble(in);
798                         
799                         if (chimeraFlag == "Y") {  out << name << endl; numChimeras++; }
800                         totalSeqs++;
801                 }
802                 in.close();
803                 out.close();
804                 
805                 if (pDataArray->m->control_pressed) { return 0; }
806                 
807                 pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences.");       pDataArray->m->mothurOutEndLine();                                      
808         
809                 pDataArray->count = totalSeqs;
810                 pDataArray->numChimeras = numChimeras;
811                 return totalSeqs;
812                 
813         }
814         catch(exception& e) {
815                 pDataArray->m->errorOut(e, "ChimeraUchimeCommand", "MyUchimeSeqsThreadFunction");
816                 exit(1);
817         }
818
819
820 #endif
821
822 /**************************************************************************************************/
823
824
825 #endif
826
827