]> git.donarmstrong.com Git - mothur.git/blob - chopseqscommand.cpp
working on pam
[mothur.git] / chopseqscommand.cpp
1 /*
2  *  chopseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chopseqscommand.h"
11 #include "sequence.hpp"
12 #include "removeseqscommand.h"
13
14 //**********************************************************************************************************************
15 vector<string> ChopSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
18         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
21                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
22         CommandParameter pnumbases("numbases", "Number", "", "0", "", "", "","",false,true,true); parameters.push_back(pnumbases);
23                 CommandParameter pcountgaps("countgaps", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcountgaps);
24                 CommandParameter pshort("short", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pshort);
25                 CommandParameter pkeep("keep", "Multiple", "front-back", "front", "", "", "","",false,false); parameters.push_back(pkeep);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ChopSeqsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string ChopSeqsCommand::getHelpString(){        
40         try {
41                 string helpString = "";
42                 helpString += "The chop.seqs command reads a fasta file and outputs a .chop.fasta containing the trimmed sequences. Note: If a sequence is completely 'chopped', an accnos file will be created with the names of the sequences removed. \n";
43                 helpString += "The chop.seqs command parameters are fasta, name, group, count, numbases, countgaps and keep. fasta is required unless you have a valid current fasta file. numbases is required.\n";
44                 helpString += "The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, numbases=yourNum, keep=yourKeep).\n";
45         helpString += "If you provide a name, group or count file any sequences removed from the fasta file will also be removed from those files.\n";
46                 helpString += "The numbases parameter allows you to specify the number of bases you want to keep.\n";
47                 helpString += "The keep parameter allows you to specify whether you want to keep the front or the back of your sequence, default=front.\n";
48                 helpString += "The countgaps parameter allows you to specify whether you want to count gaps as bases, default=false.\n";
49                 helpString += "The short parameter allows you to specify you want to keep sequences that are too short to chop, default=false.\n";
50                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
51         helpString += "For example, if you ran chop.seqs with numbases=200 and short=t, if a sequence had 100 bases mothur would keep the sequence rather than eliminate it.\n";
52                 helpString += "Example chop.seqs(fasta=amazon.fasta, numbases=200, keep=front).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "ChopSeqsCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string ChopSeqsCommand::getOutputPattern(string type) {
63     try {
64         string pattern = "";
65         
66         if (type == "fasta") {  pattern = "[filename],chop.fasta"; }
67         else if (type == "name") {  pattern = "[filename],chop.names"; }
68         else if (type == "group") {  pattern = "[filename],chop.groups"; }
69         else if (type == "count") {  pattern = "[filename],chop.count_table"; } 
70         else if (type == "accnos") {  pattern = "[filename],chop.accnos"; } 
71         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
72         
73         return pattern;
74     }
75     catch(exception& e) {
76         m->errorOut(e, "ChopSeqsCommand", "getOutputPattern");
77         exit(1);
78     }
79 }
80 //**********************************************************************************************************************
81 ChopSeqsCommand::ChopSeqsCommand(){     
82         try {
83                 abort = true; calledHelp = true; 
84                 setParameters();
85                 vector<string> tempOutNames;
86                 outputTypes["fasta"] = tempOutNames;
87                 outputTypes["accnos"] = tempOutNames;
88         outputTypes["name"] = tempOutNames;
89         outputTypes["group"] = tempOutNames;
90         outputTypes["count"] = tempOutNames;
91         }
92         catch(exception& e) {
93                 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
94                 exit(1);
95         }
96 }
97 //**********************************************************************************************************************
98 ChopSeqsCommand::ChopSeqsCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false;   
101                 
102                 //allow user to run help
103                 if(option == "help") { help(); abort = true; calledHelp = true; }
104                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
105                 
106                 else {
107                         vector<string> myArray = setParameters();
108                         
109                         OptionParser parser(option);
110                         map<string,string> parameters = parser.getParameters();
111                         
112                         ValidParameters validParameter;
113                         map<string,string>::iterator it;
114                         
115                         //check to make sure all parameters are valid for command
116                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
117                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
118                         }
119                         
120                         //initialize outputTypes
121                         vector<string> tempOutNames;
122                         outputTypes["fasta"] = tempOutNames;
123                         outputTypes["accnos"] = tempOutNames;
124             outputTypes["name"] = tempOutNames;
125             outputTypes["group"] = tempOutNames;
126             outputTypes["count"] = tempOutNames;
127                 
128                         //if the user changes the input directory command factory will send this info to us in the output parameter 
129                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
130                         if (inputDir == "not found"){   inputDir = "";          }
131                         else {
132                                 string path;
133                                 it = parameters.find("fasta");
134                                 //user has given a template file
135                                 if(it != parameters.end()){ 
136                                         path = m->hasPath(it->second);
137                                         //if the user has not given a path then, add inputdir. else leave path alone.
138                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
139                                 }
140                 
141                 it = parameters.find("name");
142                                 //user has given a template file
143                                 if(it != parameters.end()){
144                                         path = m->hasPath(it->second);
145                                         //if the user has not given a path then, add inputdir. else leave path alone.
146                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
147                                 }
148                 
149                 it = parameters.find("group");
150                                 //user has given a template file
151                                 if(it != parameters.end()){
152                                         path = m->hasPath(it->second);
153                                         //if the user has not given a path then, add inputdir. else leave path alone.
154                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
155                                 }
156                 
157                 it = parameters.find("count");
158                                 //user has given a template file
159                                 if(it != parameters.end()){
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
163                                 }
164                         }
165
166                         //check for required parameters
167                         fastafile = validParameter.validFile(parameters, "fasta", true);
168                         if (fastafile == "not open") { abort = true; }
169                         else if (fastafile == "not found") {                            //if there is a current fasta file, use it
170                                 fastafile = m->getFastaFile(); 
171                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
172                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
173                         }else { m->setFastaFile(fastafile); }   
174                         
175                         namefile = validParameter.validFile(parameters, "name", true);
176                         if (namefile == "not open") { namefile = ""; abort = true; }
177                         else if (namefile == "not found") { namefile = ""; }
178                         else { m->setNameFile(namefile); }
179                         
180                         groupfile = validParameter.validFile(parameters, "group", true);
181                         if (groupfile == "not open") { groupfile = ""; abort = true; }
182                         else if (groupfile == "not found") { groupfile = ""; }
183                         else { m->setGroupFile(groupfile); }
184                                 
185             countfile = validParameter.validFile(parameters, "count", true);
186                         if (countfile == "not open") { countfile = ""; abort = true; }
187                         else if (countfile == "not found") { countfile = "";  }
188                         else { m->setCountTableFile(countfile); }
189             
190             if ((namefile != "") && (countfile != "")) {
191                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
192             }
193                         
194             if ((groupfile != "") && (countfile != "")) {
195                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
196             }
197
198                         //if the user changes the output directory command factory will send this info to us in the output parameter 
199                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
200                         
201                         string temp = validParameter.validFile(parameters, "numbases", false);  if (temp == "not found") { temp = "0"; } 
202                         m->mothurConvert(temp, numbases);   
203                         
204             temp = validParameter.validFile(parameters, "processors", false);   if (temp == "not found"){       temp = m->getProcessors();      }
205                         m->setProcessors(temp);
206                         m->mothurConvert(temp, processors);
207             
208                         temp = validParameter.validFile(parameters, "countgaps", false);        if (temp == "not found") { temp = "f"; } 
209                         countGaps = m->isTrue(temp);  
210                         
211                         temp = validParameter.validFile(parameters, "short", false);    if (temp == "not found") { temp = "f"; } 
212                         Short = m->isTrue(temp);   
213                 
214                         keep = validParameter.validFile(parameters, "keep", false);             if (keep == "not found") { keep = "front"; } 
215                                 
216                         if (numbases == 0)  { m->mothurOut("You must provide the number of bases you want to keep for the chops.seqs command."); m->mothurOutEndLine(); abort = true;  }
217                 }
218
219         }
220         catch(exception& e) {
221                 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
222                 exit(1);
223         }
224 }
225 //**********************************************************************************************************************
226
227 int ChopSeqsCommand::execute(){
228         try {
229                 
230                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
231                 
232         map<string, string> variables;
233         string thisOutputDir = outputDir;
234                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
235         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
236         string outputFileName = getOutputFileName("fasta", variables);
237         outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
238         string outputFileNameAccnos = getOutputFileName("accnos", variables);        
239         
240         vector<unsigned long long> positions; 
241         vector<linePair> lines;
242 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
243         positions = m->divideFile(fastafile, processors);
244         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
245 #else
246         int numSeqs = 0;
247         positions = m->setFilePosFasta(fastafile, numSeqs); 
248         if (positions.size() < processors) { processors = positions.size(); }
249                 
250         //figure out how many sequences you have to process
251         int numSeqsPerProcessor = numSeqs / processors;
252         for (int i = 0; i < processors; i++) {
253             int startIndex =  i * numSeqsPerProcessor;
254             if(i == (processors - 1)){  numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;        }
255             lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
256         }
257 #endif
258         
259         bool wroteAccnos = false;
260         if(processors == 1) {   wroteAccnos = driver(lines[0], fastafile, outputFileName, outputFileNameAccnos);        }
261         else                {   wroteAccnos = createProcesses(lines, fastafile, outputFileName, outputFileNameAccnos);  }
262         
263         if (m->control_pressed) {  return 0; }
264                 
265         if (wroteAccnos) {
266             outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos);
267             
268              //use remove.seqs to create new name, group and count file
269             if ((countfile != "") || (namefile != "") || (groupfile != "")) {
270                 string inputString = "accnos=" + outputFileNameAccnos;
271                 
272                 if (countfile != "") {  inputString += ", count=" + countfile;  }
273                 else{
274                     if (namefile != "") {  inputString += ", name=" + namefile;  }
275                     if (groupfile != "") {  inputString += ", group=" + groupfile;  }
276                 }
277                 
278                 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
279                 m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine();
280                 m->mothurCalling = true;
281                 
282                 Command* removeCommand = new RemoveSeqsCommand(inputString);
283                 removeCommand->execute();
284                 
285                 map<string, vector<string> > filenames = removeCommand->getOutputFiles();
286                 
287                 delete removeCommand;
288                 m->mothurCalling = false;
289                 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
290                 
291                 if (groupfile != "") {
292                     thisOutputDir = outputDir;
293                     if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
294                     variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
295                     string outGroup = getOutputFileName("group", variables);
296                     m->renameFile(filenames["group"][0], outGroup);
297                     outputNames.push_back(outGroup); outputTypes["group"].push_back(outGroup);
298                 }
299                 
300                 if (namefile != "") {
301                     thisOutputDir = outputDir;
302                     if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
303                     variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
304                     string outName = getOutputFileName("name", variables);
305                     m->renameFile(filenames["name"][0], outName);
306                     outputNames.push_back(outName); outputTypes["name"].push_back(outName);
307                 }
308                 
309                 if (countfile != "") {
310                     thisOutputDir = outputDir;
311                     if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
312                     variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
313                     string outCount = getOutputFileName("count", variables);
314                     m->renameFile(filenames["count"][0], outCount);
315                     outputNames.push_back(outCount); outputTypes["count"].push_back(outCount);
316                 }
317             }
318         }
319                 else {  m->mothurRemove(outputFileNameAccnos);  }
320                 
321                 //set fasta file as new current fastafile
322                 string current = "";
323                 itTypes = outputTypes.find("fasta");
324                 if (itTypes != outputTypes.end()) {
325                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
326                 }
327         
328                 if (wroteAccnos) { //set accnos file as new current accnosfile
329                         itTypes = outputTypes.find("accnos");
330                         if (itTypes != outputTypes.end()) {
331                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
332                         }
333             
334             itTypes = outputTypes.find("name");
335             if (itTypes != outputTypes.end()) {
336                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
337             }
338             
339             itTypes = outputTypes.find("group");
340             if (itTypes != outputTypes.end()) {
341                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
342             }
343             
344             itTypes = outputTypes.find("count");
345             if (itTypes != outputTypes.end()) {
346                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
347             }
348                 }
349                 
350         m->mothurOutEndLine();
351                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
352                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
353                 m->mothurOutEndLine();
354                 
355                 return 0;               
356         }
357
358         catch(exception& e) {
359                 m->errorOut(e, "ChopSeqsCommand", "execute");
360                 exit(1);
361         }
362 }
363 /**************************************************************************************************/
364 bool ChopSeqsCommand::createProcesses(vector<linePair> lines, string filename, string outFasta, string outAccnos) {
365         try {
366                 int process = 1;
367                 bool wroteAccnos = false;
368                 vector<int> processIDS;
369         vector<string> nonBlankAccnosFiles;
370                 
371 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
372                 
373                 //loop through and create all the processes you want
374                 while (process != processors) {
375                         int pid = fork();
376                         
377                         if (pid > 0) {
378                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
379                                 process++;
380                         }else if (pid == 0){
381                                 wroteAccnos = driver(lines[process], filename, outFasta + toString(getpid()) + ".temp", outAccnos + toString(getpid()) + ".temp");
382                                 
383                                 //pass numSeqs to parent
384                                 ofstream out;
385                                 string tempFile = fastafile + toString(getpid()) + ".bool.temp";
386                                 m->openOutputFile(tempFile, out);
387                                 out << wroteAccnos << endl;                             
388                                 out.close();
389                                 
390                                 exit(0);
391                         }else { 
392                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
393                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
394                                 exit(0);
395                         }
396                 }
397                 
398                 //do your part
399                 wroteAccnos = driver(lines[0], filename, outFasta, outAccnos);
400         
401                 //force parent to wait until all the processes are done
402                 for (int i=0;i<processIDS.size();i++) { 
403                         int temp = processIDS[i];
404                         wait(&temp);
405                 }
406                 
407         
408                 if (wroteAccnos) { nonBlankAccnosFiles.push_back(outAccnos); }
409                 else { m->mothurRemove(outAccnos); } //remove so other files can be renamed to it
410         
411                 //parent reads in and combine Filter info
412                 for (int i = 0; i < processIDS.size(); i++) {
413                         string tempFilename = fastafile + toString(processIDS[i]) + ".bool.temp";
414                         ifstream in;
415                         m->openInputFile(tempFilename, in);
416                         
417                         bool temp;
418                         in >> temp; m->gobble(in); 
419             if (temp) { wroteAccnos = temp; nonBlankAccnosFiles.push_back(outAccnos + toString(processIDS[i]) + ".temp");  }
420                         else { m->mothurRemove((outAccnos + toString(processIDS[i]) + ".temp"));  }
421             
422                         in.close();
423                         m->mothurRemove(tempFilename);
424                 }
425 #else
426                 //////////////////////////////////////////////////////////////////////////////////////////////////////
427                 //Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
428                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
429                 //Taking advantage of shared memory to allow both threads to add info to vectors.
430                 //////////////////////////////////////////////////////////////////////////////////////////////////////
431                 
432                 vector<chopData*> pDataArray; 
433                 DWORD   dwThreadIdArray[processors-1];
434                 HANDLE  hThreadArray[processors-1]; 
435                 
436                 //Create processor worker threads.
437                 for( int i=0; i<processors-1; i++ ){
438             
439             string extension = "";
440             if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
441                         // Allocate memory for thread data.
442                         chopData* tempChop = new chopData(filename, (outFasta+extension), (outAccnos+extension), m, lines[i].start, lines[i].end, keep, countGaps, numbases, Short);
443                         pDataArray.push_back(tempChop);
444                         
445                         //MyChopThreadFunction is in header. It must be global or static to work with the threads.
446                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
447                         hThreadArray[i] = CreateThread(NULL, 0, MyChopThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
448                 }
449                 
450         //do your part
451                 wroteAccnos = driver(lines[processors-1], filename, (outFasta + toString(processors-1) + ".temp"), (outAccnos + toString(processors-1) + ".temp"));
452         processIDS.push_back(processors-1);
453         
454                 //Wait until all threads have terminated.
455                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
456                 
457         if (wroteAccnos) { nonBlankAccnosFiles.push_back(outAccnos); }
458                 else { m->mothurRemove(outAccnos); } //remove so other files can be renamed to it
459
460                 //Close all thread handles and free memory allocations.
461                 for(int i=0; i < pDataArray.size(); i++){
462             if (pDataArray[i]->wroteAccnos) { wroteAccnos = pDataArray[i]->wroteAccnos; nonBlankAccnosFiles.push_back(outAccnos + toString(processIDS[i]) + ".temp");  }
463                         else { m->mothurRemove((outAccnos + toString(processIDS[i]) + ".temp"));  }
464             //check to make sure the process finished
465             if (pDataArray[i]->count != pDataArray[i]->end) {
466                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
467             }
468                         CloseHandle(hThreadArray[i]);
469                         delete pDataArray[i];
470                 }
471 #endif          
472                 
473                 for (int i = 0; i < processIDS.size(); i++) {
474                         m->appendFiles((outFasta + toString(processIDS[i]) + ".temp"), outFasta);
475                         m->mothurRemove((outFasta + toString(processIDS[i]) + ".temp"));
476                 }
477                 
478         if (nonBlankAccnosFiles.size() != 0) { 
479                         m->renameFile(nonBlankAccnosFiles[0], outAccnos);
480                         
481                         for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
482                                 m->appendFiles(nonBlankAccnosFiles[h], outAccnos);
483                                 m->mothurRemove(nonBlankAccnosFiles[h]);
484                         }
485                 }else { //recreate the accnosfile if needed
486                         ofstream out;
487                         m->openOutputFile(outAccnos, out);
488                         out.close();
489                 }
490
491                 return wroteAccnos;
492         }
493         catch(exception& e) {
494                 m->errorOut(e, "ChopSeqsCommand", "createProcesses");
495                 exit(1);
496         }
497 }
498 /**************************************************************************************/
499 bool ChopSeqsCommand::driver(linePair filePos, string filename, string outFasta, string outAccnos) {    
500         try {
501                 
502                 ofstream out;
503                 m->openOutputFile(outFasta, out);
504         
505         ofstream outAcc;
506                 m->openOutputFile(outAccnos, outAcc);
507         
508                 ifstream in;
509                 m->openInputFile(filename, in);
510         
511                 in.seekg(filePos.start);
512         
513                 bool done = false;
514         bool wroteAccnos = false;
515                 int count = 0;
516         
517                 while (!done) {
518             
519                         if (m->control_pressed) { in.close(); out.close(); return 1; }
520             
521                         Sequence seq(in); m->gobble(in);
522                         
523                         if (m->control_pressed) {  in.close(); out.close(); outAcc.close(); m->mothurRemove(outFasta); m->mothurRemove(outAccnos); return 0;  }
524                         
525                         if (seq.getName() != "") {
526                                 string newSeqString = getChopped(seq);
527                                 
528                                 //output trimmed sequence
529                                 if (newSeqString != "") {
530                                         out << ">" << seq.getName() << endl << newSeqString << endl;
531                                 }else{
532                                         outAcc << seq.getName() << endl;
533                                         wroteAccnos = true;
534                                 }
535                 count++;
536                         }
537                         
538 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
539             unsigned long long pos = in.tellg();
540             if ((pos == -1) || (pos >= filePos.end)) { break; }
541 #else
542             if (in.eof()) { break; }
543 #endif
544             //report progress
545                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
546                         
547                 }
548                 //report progress
549                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
550
551                 
552                 in.close();
553         out.close();
554         outAcc.close();
555                 
556                 return wroteAccnos;
557         }
558         catch(exception& e) {
559                 m->errorOut(e, "ChopSeqsCommand", "driver");
560                 exit(1);
561         }
562 }
563 //**********************************************************************************************************************
564 string ChopSeqsCommand::getChopped(Sequence seq) {
565         try {
566                 string temp = seq.getAligned();
567                 string tempUnaligned = seq.getUnaligned();
568                 
569                 if (countGaps) {
570                         //if needed trim sequence
571                         if (keep == "front") {//you want to keep the beginning
572                                 int tempLength = temp.length();
573
574                                 if (tempLength > numbases) { //you have enough bases to remove some
575                                 
576                                         int stopSpot = 0;
577                                         int numBasesCounted = 0;
578                                         
579                                         for (int i = 0; i < temp.length(); i++) {
580                                                 //eliminate N's
581                                                 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
582                                                 
583                                                 numBasesCounted++; 
584                                                 
585                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
586                                         }
587                                         
588                                         if (stopSpot == 0) { temp = ""; }
589                                         else {  temp = temp.substr(0, stopSpot+1);  }
590                                                         
591                                 }else { 
592                                         if (!Short) { temp = ""; } //sequence too short
593                                 }
594                         }else { //you are keeping the back
595                                 int tempLength = temp.length();
596                                 if (tempLength > numbases) { //you have enough bases to remove some
597                                         
598                                         int stopSpot = 0;
599                                         int numBasesCounted = 0;
600                                         
601                                         for (int i = (temp.length()-1); i >= 0; i--) {
602                                                 //eliminate N's
603                                                 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
604                                                 
605                                                 numBasesCounted++; 
606
607                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
608                                         }
609                                 
610                                         if (stopSpot == 0) { temp = ""; }
611                                         else {  temp = temp.substr(stopSpot+1);  }
612                                 }else { 
613                                         if (!Short) { temp = ""; } //sequence too short
614                                 }
615                         }
616
617                 }else{
618                                 
619                         //if needed trim sequence
620                         if (keep == "front") {//you want to keep the beginning
621                                 int tempLength = tempUnaligned.length();
622
623                                 if (tempLength > numbases) { //you have enough bases to remove some
624                                         
625                                         int stopSpot = 0;
626                                         int numBasesCounted = 0;
627                                         
628                                         for (int i = 0; i < temp.length(); i++) {
629                                                 //eliminate N's
630                                                 if (toupper(temp[i]) == 'N') { 
631                                                         temp[i] = '.'; 
632                                                         tempLength--;
633                                                         if (tempLength < numbases) { stopSpot = 0; break; }
634                                                 }
635                                                 
636                                                 if(isalpha(temp[i])) { numBasesCounted++; }
637                                                 
638                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
639                                         }
640                                         
641                                         if (stopSpot == 0) { temp = ""; }
642                                         else {  temp = temp.substr(0, stopSpot+1);  }
643                                                         
644                                 }else { 
645                                         if (!Short) { temp = ""; } //sequence too short
646                                 }                               
647                         }else { //you are keeping the back
648                                 int tempLength = tempUnaligned.length();
649                                 if (tempLength > numbases) { //you have enough bases to remove some
650                                         
651                                         int stopSpot = 0;
652                                         int numBasesCounted = 0;
653                                         
654                                         for (int i = (temp.length()-1); i >= 0; i--) {
655                                                 //eliminate N's
656                                                 if (toupper(temp[i]) == 'N') { 
657                                                         temp[i] = '.'; 
658                                                         tempLength--;
659                                                         if (tempLength < numbases) { stopSpot = 0; break; }
660                                                 }
661                                                 
662                                                 if(isalpha(temp[i])) { numBasesCounted++; }
663
664                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
665                                         }
666                                 
667                                         if (stopSpot == 0) { temp = ""; }
668                                         else {  temp = temp.substr(stopSpot);  }
669                                 }else { 
670                                         if (!Short) { temp = ""; } //sequence too short
671                                 }
672                         }
673                 }
674                 
675                 return temp;
676         }
677         catch(exception& e) {
678                 m->errorOut(e, "ChopSeqsCommand", "getChopped");
679                 exit(1);
680         }
681 }
682 //**********************************************************************************************************************
683
684