5 * Created by westcott on 5/10/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chopseqscommand.h"
11 #include "sequence.hpp"
13 //**********************************************************************************************************************
14 vector<string> ChopSeqsCommand::getValidParameters(){
16 string AlignArray[] = {"fasta","short","numbases","countgaps","keep","outputdir","inputdir"};
17 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
21 m->errorOut(e, "ChopSeqsCommand", "getValidParameters");
25 //**********************************************************************************************************************
26 ChopSeqsCommand::ChopSeqsCommand(){
29 //initialize outputTypes
30 vector<string> tempOutNames;
31 outputTypes["fasta"] = tempOutNames;
32 outputTypes["accnos"] = tempOutNames;
35 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
39 //**********************************************************************************************************************
40 vector<string> ChopSeqsCommand::getRequiredParameters(){
42 string Array[] = {"fasta","numbases"};
43 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
47 m->errorOut(e, "ChopSeqsCommand", "getRequiredParameters");
51 //**********************************************************************************************************************
52 vector<string> ChopSeqsCommand::getRequiredFiles(){
54 vector<string> myArray;
58 m->errorOut(e, "ChopSeqsCommand", "getRequiredFiles");
62 //**********************************************************************************************************************
63 ChopSeqsCommand::ChopSeqsCommand(string option) {
67 //allow user to run help
68 if(option == "help") { help(); abort = true; }
71 //valid paramters for this command
72 string Array[] = {"fasta","numbases","countgaps","keep","short","outputdir","inputdir"};
73 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
75 OptionParser parser(option);
76 map<string,string> parameters = parser.getParameters();
78 ValidParameters validParameter;
79 map<string,string>::iterator it;
81 //check to make sure all parameters are valid for command
82 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
83 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
86 //initialize outputTypes
87 vector<string> tempOutNames;
88 outputTypes["fasta"] = tempOutNames;
89 outputTypes["accnos"] = tempOutNames;
91 //if the user changes the input directory command factory will send this info to us in the output parameter
92 string inputDir = validParameter.validFile(parameters, "inputdir", false);
93 if (inputDir == "not found"){ inputDir = ""; }
96 it = parameters.find("fasta");
97 //user has given a template file
98 if(it != parameters.end()){
99 path = m->hasPath(it->second);
100 //if the user has not given a path then, add inputdir. else leave path alone.
101 if (path == "") { parameters["fasta"] = inputDir + it->second; }
105 //check for required parameters
106 fastafile = validParameter.validFile(parameters, "fasta", true);
107 if (fastafile == "not open") { abort = true; }
108 else if (fastafile == "not found") { m->mothurOut("You must provide a fasta file."); m->mothurOutEndLine(); abort = true; }
110 //if the user changes the output directory command factory will send this info to us in the output parameter
111 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
113 string temp = validParameter.validFile(parameters, "numbases", false); if (temp == "not found") { temp = "0"; }
114 convert(temp, numbases);
116 temp = validParameter.validFile(parameters, "countgaps", false); if (temp == "not found") { temp = "f"; }
117 countGaps = m->isTrue(temp);
119 temp = validParameter.validFile(parameters, "short", false); if (temp == "not found") { temp = "f"; }
120 Short = m->isTrue(temp);
122 keep = validParameter.validFile(parameters, "keep", false); if (keep == "not found") { keep = "front"; }
124 if (numbases == 0) { m->mothurOut("You must provide the number of bases you want to keep for the chops.seqs command."); m->mothurOutEndLine(); abort = true; }
128 catch(exception& e) {
129 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
133 //**********************************************************************************************************************
135 void ChopSeqsCommand::help(){
137 m->mothurOut("The chop.seqs command reads a fasta file and outputs a .chop.fasta containing the trimmed sequences.\n");
138 m->mothurOut("The chop.seqs command parameters are fasta, numbases, countgaps and keep. fasta and numbases are required required.\n");
139 m->mothurOut("The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, numbases=yourNum, keep=yourKeep).\n");
140 m->mothurOut("The numbases parameter allows you to specify the number of bases you want to keep.\n");
141 m->mothurOut("The keep parameter allows you to specify whether you want to keep the front or the back of your sequence, default=front.\n");
142 m->mothurOut("The countgaps parameter allows you to specify whether you want to count gaps as bases, default=false.\n");
143 m->mothurOut("The short parameter allows you to specify you want to keep sequences that are too short to chop, default=false.\n");
144 m->mothurOut("For example, if you ran chop.seqs with numbases=200 and short=t, if a sequence had 100 bases mothur would keep the sequence rather than eliminate it.\n");
145 m->mothurOut("Example chop.seqs(fasta=amazon.fasta, numbases=200, keep=front).\n");
146 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
148 catch(exception& e) {
149 m->errorOut(e, "ChopSeqsCommand", "help");
154 //**********************************************************************************************************************
156 int ChopSeqsCommand::execute(){
159 if (abort == true) { return 0; }
161 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "chop.fasta";
162 string outputFileNameAccnos = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "chop.accnos";
165 m->openOutputFile(outputFileName, out);
168 m->openOutputFile(outputFileNameAccnos, outAcc);
171 m->openInputFile(fastafile, in);
173 bool wroteAccnos = false;
179 if (m->control_pressed) { outputTypes.clear(); in.close(); out.close(); outAcc.close(); remove(outputFileName.c_str()); remove(outputFileNameAccnos.c_str()); return 0; }
181 if (seq.getName() != "") {
182 string newSeqString = getChopped(seq);
184 //output trimmed sequence
185 if (newSeqString != "") {
186 out << ">" << seq.getName() << endl << newSeqString << endl;
188 outAcc << seq.getName() << endl;
197 m->mothurOutEndLine();
198 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
199 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
201 if (wroteAccnos) { m->mothurOut(outputFileNameAccnos); m->mothurOutEndLine(); outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos); }
202 else { remove(outputFileNameAccnos.c_str()); }
204 m->mothurOutEndLine();
209 catch(exception& e) {
210 m->errorOut(e, "ChopSeqsCommand", "execute");
214 //**********************************************************************************************************************
215 string ChopSeqsCommand::getChopped(Sequence seq) {
217 string temp = seq.getAligned();
218 string tempUnaligned = seq.getUnaligned();
221 //if needed trim sequence
222 if (keep == "front") {//you want to keep the beginning
223 int tempLength = temp.length();
225 if (tempLength > numbases) { //you have enough bases to remove some
228 int numBasesCounted = 0;
230 for (int i = 0; i < temp.length(); i++) {
232 if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
236 if (numBasesCounted >= numbases) { stopSpot = i; break; }
239 if (stopSpot == 0) { temp = ""; }
240 else { temp = temp.substr(0, stopSpot); }
243 if (!Short) { temp = ""; } //sequence too short
245 }else { //you are keeping the back
246 int tempLength = temp.length();
247 if (tempLength > numbases) { //you have enough bases to remove some
250 int numBasesCounted = 0;
252 for (int i = (temp.length()-1); i >= 0; i--) {
254 if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
258 if (numBasesCounted >= numbases) { stopSpot = i; break; }
261 if (stopSpot == 0) { temp = ""; }
262 else { temp = temp.substr(stopSpot+1); }
264 if (!Short) { temp = ""; } //sequence too short
270 //if needed trim sequence
271 if (keep == "front") {//you want to keep the beginning
272 int tempLength = tempUnaligned.length();
274 if (tempLength > numbases) { //you have enough bases to remove some
277 int numBasesCounted = 0;
279 for (int i = 0; i < temp.length(); i++) {
281 if (toupper(temp[i]) == 'N') {
284 if (tempLength < numbases) { stopSpot = 0; break; }
287 if(isalpha(temp[i])) { numBasesCounted++; }
289 if (numBasesCounted >= numbases) { stopSpot = i; break; }
292 if (stopSpot == 0) { temp = ""; }
293 else { temp = temp.substr(0, stopSpot); }
296 if (!Short) { temp = ""; } //sequence too short
298 }else { //you are keeping the back
299 int tempLength = tempUnaligned.length();
300 if (tempLength > numbases) { //you have enough bases to remove some
303 int numBasesCounted = 0;
305 for (int i = (temp.length()-1); i >= 0; i--) {
307 if (toupper(temp[i]) == 'N') {
310 if (tempLength < numbases) { stopSpot = 0; break; }
313 if(isalpha(temp[i])) { numBasesCounted++; }
315 if (numBasesCounted >= numbases) { stopSpot = i; break; }
318 if (stopSpot == 0) { temp = ""; }
319 else { temp = temp.substr(stopSpot+1); }
321 if (!Short) { temp = ""; } //sequence too short
328 catch(exception& e) {
329 m->errorOut(e, "ChopSeqsCommand", "getChopped");
333 //**********************************************************************************************************************