- m->mothurOut("The chop.seqs command reads a fasta file and outputs a .chop.fasta containing the trimmed sequences.\n");
- m->mothurOut("The chop.seqs command parameters are fasta, numbases, countgaps and keep. fasta and numbases are required required.\n");
- m->mothurOut("The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, numbases=yourNum, keep=yourKeep).\n");
- m->mothurOut("The numbases parameter allows you to specify the number of bases you want to keep.\n");
- m->mothurOut("The keep parameter allows you to specify whether you want to keep the front or the back of your sequence, default=front.\n");
- m->mothurOut("The countgaps parameter allows you to specify whether you want to count gaps as bases, default=false.\n");
- m->mothurOut("The short parameter allows you to specify you want to keep sequences that are too short to chop, default=false.\n");
- m->mothurOut("For example, if you ran chop.seqs with numbases=200 and short=t, if a sequence had 100 bases mothur would keep the sequence rather than eliminate it.\n");
- m->mothurOut("Example chop.seqs(fasta=amazon.fasta, numbases=200, keep=front).\n");
- m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ map<string, string> variables;
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
+ string outputFileName = getOutputFileName("fasta", variables);
+ outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
+ string outputFileNameAccnos = getOutputFileName("accnos", variables);
+
+ vector<unsigned long long> positions;
+ vector<linePair> lines;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ positions = m->divideFile(fastafile, processors);
+ for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
+#else
+ int numSeqs = 0;
+ positions = m->setFilePosFasta(fastafile, numSeqs);
+ if (positions.size() < processors) { processors = positions.size(); }
+
+ //figure out how many sequences you have to process
+ int numSeqsPerProcessor = numSeqs / processors;
+ for (int i = 0; i < processors; i++) {
+ int startIndex = i * numSeqsPerProcessor;
+ if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; }
+ lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
+ }
+#endif
+
+ bool wroteAccnos = false;
+ if(processors == 1) { wroteAccnos = driver(lines[0], fastafile, outputFileName, outputFileNameAccnos); }
+ else { wroteAccnos = createProcesses(lines, fastafile, outputFileName, outputFileNameAccnos); }
+
+ if (m->control_pressed) { return 0; }
+
+ if (wroteAccnos) {
+ outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos);
+
+ //use remove.seqs to create new name, group and count file
+ if ((countfile != "") || (namefile != "") || (groupfile != "")) {
+ string inputString = "accnos=" + outputFileNameAccnos;
+
+ if (countfile != "") { inputString += ", count=" + countfile; }
+ else{
+ if (namefile != "") { inputString += ", name=" + namefile; }
+ if (groupfile != "") { inputString += ", group=" + groupfile; }
+ }
+
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* removeCommand = new RemoveSeqsCommand(inputString);
+ removeCommand->execute();
+
+ map<string, vector<string> > filenames = removeCommand->getOutputFiles();
+
+ delete removeCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ if (groupfile != "") {
+ thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
+ string outGroup = getOutputFileName("group", variables);
+ m->renameFile(filenames["group"][0], outGroup);
+ outputNames.push_back(outGroup); outputTypes["group"].push_back(outGroup);
+ }
+
+ if (namefile != "") {
+ thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
+ string outName = getOutputFileName("name", variables);
+ m->renameFile(filenames["name"][0], outName);
+ outputNames.push_back(outName); outputTypes["name"].push_back(outName);
+ }
+
+ if (countfile != "") {
+ thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+ string outCount = getOutputFileName("count", variables);
+ m->renameFile(filenames["count"][0], outCount);
+ outputNames.push_back(outCount); outputTypes["count"].push_back(outCount);
+ }
+ }
+ }
+ else { m->mothurRemove(outputFileNameAccnos); }
+
+ //set fasta file as new current fastafile
+ string current = "";
+ itTypes = outputTypes.find("fasta");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+ }
+
+ if (wroteAccnos) { //set accnos file as new current accnosfile
+ itTypes = outputTypes.find("accnos");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+ }
+
+ itTypes = outputTypes.find("name");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
+ }
+
+ itTypes = outputTypes.find("group");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
+ }
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;