]> git.donarmstrong.com Git - mothur.git/blob - chopseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chopseqscommand.cpp
1 /*
2  *  chopseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chopseqscommand.h"
11 #include "sequence.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> ChopSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter pnumbases("numbases", "Number", "", "0", "", "", "",false,true); parameters.push_back(pnumbases);
18                 CommandParameter pcountgaps("countgaps", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcountgaps);
19                 CommandParameter pshort("short", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pshort);
20                 CommandParameter pkeep("keep", "Multiple", "front-back", "front", "", "", "",false,false); parameters.push_back(pkeep);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "ChopSeqsCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string ChopSeqsCommand::getHelpString(){        
35         try {
36                 string helpString = "";
37                 helpString += "The chop.seqs command reads a fasta file and outputs a .chop.fasta containing the trimmed sequences. Note: If a sequence is completely 'chopped', an accnos file will be created with the names of the sequences removed. \n";
38                 helpString += "The chop.seqs command parameters are fasta, numbases, countgaps and keep. fasta is required unless you have a valid current fasta file. numbases is required.\n";
39                 helpString += "The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, numbases=yourNum, keep=yourKeep).\n";
40                 helpString += "The numbases parameter allows you to specify the number of bases you want to keep.\n";
41                 helpString += "The keep parameter allows you to specify whether you want to keep the front or the back of your sequence, default=front.\n";
42                 helpString += "The countgaps parameter allows you to specify whether you want to count gaps as bases, default=false.\n";
43                 helpString += "The short parameter allows you to specify you want to keep sequences that are too short to chop, default=false.\n";
44                 helpString += "For example, if you ran chop.seqs with numbases=200 and short=t, if a sequence had 100 bases mothur would keep the sequence rather than eliminate it.\n";
45                 helpString += "Example chop.seqs(fasta=amazon.fasta, numbases=200, keep=front).\n";
46                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
47                 return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "ChopSeqsCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string ChopSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
56         try {
57         string outputFileName = "";
58                 map<string, vector<string> >::iterator it;
59         
60         //is this a type this command creates
61         it = outputTypes.find(type);
62         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
63         else {
64             if (type == "fasta") {  outputFileName =  "chop.fasta"; }
65             else if (type == "accnos") {  outputFileName =  "chop.accnos"; }
66             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
67         }
68         return outputFileName;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "ChopSeqsCommand", "getOutputFileNameTag");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 ChopSeqsCommand::ChopSeqsCommand(){     
77         try {
78                 abort = true; calledHelp = true; 
79                 setParameters();
80                 vector<string> tempOutNames;
81                 outputTypes["fasta"] = tempOutNames;
82                 outputTypes["accnos"] = tempOutNames;
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
86                 exit(1);
87         }
88 }
89 //**********************************************************************************************************************
90 ChopSeqsCommand::ChopSeqsCommand(string option)  {
91         try {
92                 abort = false; calledHelp = false;   
93                 
94                 //allow user to run help
95                 if(option == "help") { help(); abort = true; calledHelp = true; }
96                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
97                 
98                 else {
99                         vector<string> myArray = setParameters();
100                         
101                         OptionParser parser(option);
102                         map<string,string> parameters = parser.getParameters();
103                         
104                         ValidParameters validParameter;
105                         map<string,string>::iterator it;
106                         
107                         //check to make sure all parameters are valid for command
108                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
109                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
110                         }
111                         
112                         //initialize outputTypes
113                         vector<string> tempOutNames;
114                         outputTypes["fasta"] = tempOutNames;
115                         outputTypes["accnos"] = tempOutNames;
116                 
117                         //if the user changes the input directory command factory will send this info to us in the output parameter 
118                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
119                         if (inputDir == "not found"){   inputDir = "";          }
120                         else {
121                                 string path;
122                                 it = parameters.find("fasta");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
128                                 }
129                         }
130
131                         //check for required parameters
132                         fastafile = validParameter.validFile(parameters, "fasta", true);
133                         if (fastafile == "not open") { abort = true; }
134                         else if (fastafile == "not found") {                            //if there is a current fasta file, use it
135                                 fastafile = m->getFastaFile(); 
136                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
137                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
138                         }else { m->setFastaFile(fastafile); }   
139                         
140                         //if the user changes the output directory command factory will send this info to us in the output parameter 
141                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
142                         
143                         string temp = validParameter.validFile(parameters, "numbases", false);  if (temp == "not found") { temp = "0"; } 
144                         m->mothurConvert(temp, numbases);   
145                         
146                         temp = validParameter.validFile(parameters, "countgaps", false);        if (temp == "not found") { temp = "f"; } 
147                         countGaps = m->isTrue(temp);  
148                         
149                         temp = validParameter.validFile(parameters, "short", false);    if (temp == "not found") { temp = "f"; } 
150                         Short = m->isTrue(temp);   
151                 
152                         keep = validParameter.validFile(parameters, "keep", false);             if (keep == "not found") { keep = "front"; } 
153                                 
154                         if (numbases == 0)  { m->mothurOut("You must provide the number of bases you want to keep for the chops.seqs command."); m->mothurOutEndLine(); abort = true;  }
155                 }
156
157         }
158         catch(exception& e) {
159                 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
160                 exit(1);
161         }
162 }
163 //**********************************************************************************************************************
164
165 int ChopSeqsCommand::execute(){
166         try {
167                 
168                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
169                 
170                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
171                 string outputFileNameAccnos = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
172                 
173                 ofstream out;
174                 m->openOutputFile(outputFileName, out);
175                 
176                 ofstream outAcc;
177                 m->openOutputFile(outputFileNameAccnos, outAcc);
178                 
179                 ifstream in;
180                 m->openInputFile(fastafile, in);
181                 
182                 bool wroteAccnos = false;
183                 
184                 while (!in.eof()) {
185                         
186                         Sequence seq(in);
187                         
188                         if (m->control_pressed) { outputTypes.clear(); in.close(); out.close(); outAcc.close(); m->mothurRemove(outputFileName); m->mothurRemove(outputFileNameAccnos); return 0;  }
189                         
190                         if (seq.getName() != "") {
191                                 string newSeqString = getChopped(seq);
192                                 
193                                 //output trimmed sequence
194                                 if (newSeqString != "") {
195                                         out << ">" << seq.getName() << endl << newSeqString << endl;
196                                 }else{
197                                         outAcc << seq.getName() << endl;
198                                         wroteAccnos = true;
199                                 }
200                         }
201                 }
202                 in.close();
203                 out.close();
204                 outAcc.close();
205                 
206                 m->mothurOutEndLine();
207                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
208                 m->mothurOut(outputFileName); m->mothurOutEndLine();    outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
209                 
210                 if (wroteAccnos) { m->mothurOut(outputFileNameAccnos); m->mothurOutEndLine(); outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos); }
211                 else {  m->mothurRemove(outputFileNameAccnos);  }
212                 
213                 m->mothurOutEndLine();
214                 
215                 //set fasta file as new current fastafile
216                 string current = "";
217                 itTypes = outputTypes.find("fasta");
218                 if (itTypes != outputTypes.end()) {
219                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
220                 }
221                 
222                 if (wroteAccnos) { //set accnos file as new current accnosfile
223                         itTypes = outputTypes.find("accnos");
224                         if (itTypes != outputTypes.end()) {
225                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
226                         }
227                 }
228                 
229                 
230                 return 0;               
231         }
232
233         catch(exception& e) {
234                 m->errorOut(e, "ChopSeqsCommand", "execute");
235                 exit(1);
236         }
237 }
238 //**********************************************************************************************************************
239 string ChopSeqsCommand::getChopped(Sequence seq) {
240         try {
241                 string temp = seq.getAligned();
242                 string tempUnaligned = seq.getUnaligned();
243                 
244                 if (countGaps) {
245                         //if needed trim sequence
246                         if (keep == "front") {//you want to keep the beginning
247                                 int tempLength = temp.length();
248
249                                 if (tempLength > numbases) { //you have enough bases to remove some
250                                 
251                                         int stopSpot = 0;
252                                         int numBasesCounted = 0;
253                                         
254                                         for (int i = 0; i < temp.length(); i++) {
255                                                 //eliminate N's
256                                                 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
257                                                 
258                                                 numBasesCounted++; 
259                                                 
260                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
261                                         }
262                                         
263                                         if (stopSpot == 0) { temp = ""; }
264                                         else {  temp = temp.substr(0, stopSpot+1);  }
265                                                         
266                                 }else { 
267                                         if (!Short) { temp = ""; } //sequence too short
268                                 }
269                         }else { //you are keeping the back
270                                 int tempLength = temp.length();
271                                 if (tempLength > numbases) { //you have enough bases to remove some
272                                         
273                                         int stopSpot = 0;
274                                         int numBasesCounted = 0;
275                                         
276                                         for (int i = (temp.length()-1); i >= 0; i--) {
277                                                 //eliminate N's
278                                                 if (toupper(temp[i]) == 'N') { temp[i] = '.'; }
279                                                 
280                                                 numBasesCounted++; 
281
282                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
283                                         }
284                                 
285                                         if (stopSpot == 0) { temp = ""; }
286                                         else {  temp = temp.substr(stopSpot+1);  }
287                                 }else { 
288                                         if (!Short) { temp = ""; } //sequence too short
289                                 }
290                         }
291
292                 }else{
293                                 
294                         //if needed trim sequence
295                         if (keep == "front") {//you want to keep the beginning
296                                 int tempLength = tempUnaligned.length();
297
298                                 if (tempLength > numbases) { //you have enough bases to remove some
299                                         
300                                         int stopSpot = 0;
301                                         int numBasesCounted = 0;
302                                         
303                                         for (int i = 0; i < temp.length(); i++) {
304                                                 //eliminate N's
305                                                 if (toupper(temp[i]) == 'N') { 
306                                                         temp[i] = '.'; 
307                                                         tempLength--;
308                                                         if (tempLength < numbases) { stopSpot = 0; break; }
309                                                 }
310                                                 
311                                                 if(isalpha(temp[i])) { numBasesCounted++; }
312                                                 
313                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
314                                         }
315                                         
316                                         if (stopSpot == 0) { temp = ""; }
317                                         else {  temp = temp.substr(0, stopSpot+1);  }
318                                                         
319                                 }else { 
320                                         if (!Short) { temp = ""; } //sequence too short
321                                 }                               
322                         }else { //you are keeping the back
323                                 int tempLength = tempUnaligned.length();
324                                 if (tempLength > numbases) { //you have enough bases to remove some
325                                         
326                                         int stopSpot = 0;
327                                         int numBasesCounted = 0;
328                                         
329                                         for (int i = (temp.length()-1); i >= 0; i--) {
330                                                 //eliminate N's
331                                                 if (toupper(temp[i]) == 'N') { 
332                                                         temp[i] = '.'; 
333                                                         tempLength--;
334                                                         if (tempLength < numbases) { stopSpot = 0; break; }
335                                                 }
336                                                 
337                                                 if(isalpha(temp[i])) { numBasesCounted++; }
338
339                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
340                                         }
341                                 
342                                         if (stopSpot == 0) { temp = ""; }
343                                         else {  temp = temp.substr(stopSpot);  }
344                                 }else { 
345                                         if (!Short) { temp = ""; } //sequence too short
346                                 }
347                         }
348                 }
349                 
350                 return temp;
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "ChopSeqsCommand", "getChopped");
354                 exit(1);
355         }
356 }
357 //**********************************************************************************************************************
358
359