]> git.donarmstrong.com Git - mothur.git/blob - chopseqscommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / chopseqscommand.cpp
1 /*
2  *  chopseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chopseqscommand.h"
11 #include "sequence.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> ChopSeqsCommand::getValidParameters(){   
15         try {
16                 string AlignArray[] =  {"fasta","numbases","countgaps","keep","outputdir","inputdir"};
17                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "ChopSeqsCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 ChopSeqsCommand::ChopSeqsCommand(){     
27         try {
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["fasta"] = tempOutNames;
31                 outputTypes["accnos"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> ChopSeqsCommand::getRequiredParameters(){        
40         try {
41                 string Array[] =  {"fasta","numbases"};
42                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "ChopSeqsCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> ChopSeqsCommand::getRequiredFiles(){     
52         try {
53                 vector<string> myArray;
54                 return myArray;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "ChopSeqsCommand", "getRequiredFiles");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 ChopSeqsCommand::ChopSeqsCommand(string option)  {
63         try {
64                 abort = false;
65                 
66                 //allow user to run help
67                 if(option == "help") { help(); abort = true; }
68                 
69                 else {
70                         //valid paramters for this command
71                         string Array[] =  {"fasta","numbases","countgaps","keep","outputdir","inputdir"};
72                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
73                         
74                         OptionParser parser(option);
75                         map<string,string> parameters = parser.getParameters();
76                         
77                         ValidParameters validParameter;
78                         map<string,string>::iterator it;
79                         
80                         //check to make sure all parameters are valid for command
81                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
82                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
83                         }
84                         
85                         //initialize outputTypes
86                         vector<string> tempOutNames;
87                         outputTypes["fasta"] = tempOutNames;
88                         outputTypes["accnos"] = tempOutNames;
89                 
90                         //if the user changes the input directory command factory will send this info to us in the output parameter 
91                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
92                         if (inputDir == "not found"){   inputDir = "";          }
93                         else {
94                                 string path;
95                                 it = parameters.find("fasta");
96                                 //user has given a template file
97                                 if(it != parameters.end()){ 
98                                         path = m->hasPath(it->second);
99                                         //if the user has not given a path then, add inputdir. else leave path alone.
100                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
101                                 }
102                         }
103
104                         //check for required parameters
105                         fastafile = validParameter.validFile(parameters, "fasta", true);
106                         if (fastafile == "not open") { abort = true; }
107                         else if (fastafile == "not found") {  m->mothurOut("You must provide a fasta file."); m->mothurOutEndLine(); abort = true; }    
108                         
109                         //if the user changes the output directory command factory will send this info to us in the output parameter 
110                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
111                         
112                         string temp = validParameter.validFile(parameters, "numbases", false);  if (temp == "not found") { temp = "0"; } 
113                         convert(temp, numbases);   
114                         
115                         temp = validParameter.validFile(parameters, "countgaps", false);        if (temp == "not found") { temp = "f"; } 
116                         countGaps = m->isTrue(temp);   
117                 
118                         keep = validParameter.validFile(parameters, "keep", false);             if (keep == "not found") { keep = "front"; } 
119                                 
120                         if (numbases == 0)  { m->mothurOut("You must provide the number of bases you want to keep for the chops.seqs command."); m->mothurOutEndLine(); abort = true;  }
121                 }
122
123         }
124         catch(exception& e) {
125                 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
126                 exit(1);
127         }
128 }
129 //**********************************************************************************************************************
130
131 void ChopSeqsCommand::help(){
132         try {
133                 m->mothurOut("The chop.seqs command reads a fasta file and outputs a .chop.fasta containing the trimmed sequences.\n");
134                 m->mothurOut("The chop.seqs command parameters are fasta, numbases, countgaps and keep. fasta and numbases are required required.\n");
135                 m->mothurOut("The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, numbases=yourNum, keep=yourKeep).\n");
136                 m->mothurOut("The numbases parameter allows you to specify the number of bases you want to keep.\n");
137                 m->mothurOut("The keep parameter allows you to specify whether you want to keep the front or the back of your sequence, default=front.\n");
138                 m->mothurOut("The countgaps parameter allows you to specify whether you want to count gaps as bases, default=false.\n");
139                 m->mothurOut("Example chop.seqs(fasta=amazon.fasta, numbases=200, keep=front).\n");
140                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "ChopSeqsCommand", "help");
144                 exit(1);
145         }
146 }
147
148 //**********************************************************************************************************************
149
150 int ChopSeqsCommand::execute(){
151         try {
152                 
153                 if (abort == true) { return 0; }
154                 
155                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "chop.fasta";
156                 string outputFileNameAccnos = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "chop.accnos";
157                 
158                 ofstream out;
159                 m->openOutputFile(outputFileName, out);
160                 
161                 ofstream outAcc;
162                 m->openOutputFile(outputFileNameAccnos, outAcc);
163                 
164                 ifstream in;
165                 m->openInputFile(fastafile, in);
166                 
167                 bool wroteAccnos = false;
168                 
169                 while (!in.eof()) {
170                         
171                         Sequence seq(in);
172                         
173                         if (m->control_pressed) { outputTypes.clear(); in.close(); out.close(); outAcc.close(); remove(outputFileName.c_str()); remove(outputFileNameAccnos.c_str()); return 0;  }
174                         
175                         if (seq.getName() != "") {
176                                 string newSeqString = getChopped(seq);
177                                 
178                                 //output trimmed sequence
179                                 if (newSeqString != "") {
180                                         out << ">" << seq.getName() << endl << newSeqString << endl;
181                                 }else{
182                                         outAcc << seq.getName() << endl;
183                                         wroteAccnos = true;
184                                 }
185                         }
186                 }
187                 in.close();
188                 out.close();
189                 outAcc.close();
190                 
191                 m->mothurOutEndLine();
192                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
193                 m->mothurOut(outputFileName); m->mothurOutEndLine();    outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
194                 
195                 if (wroteAccnos) { m->mothurOut(outputFileNameAccnos); m->mothurOutEndLine(); outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos); }
196                 else {  remove(outputFileNameAccnos.c_str());  }
197                 
198                 m->mothurOutEndLine();
199                 
200                 return 0;               
201         }
202
203         catch(exception& e) {
204                 m->errorOut(e, "ChopSeqsCommand", "execute");
205                 exit(1);
206         }
207 }
208 //**********************************************************************************************************************
209 string ChopSeqsCommand::getChopped(Sequence seq) {
210         try {
211                 string temp = seq.getAligned();
212                 string tempUnaligned = seq.getUnaligned();
213                 
214                 if (countGaps) {
215                         //if needed trim sequence
216                         if (keep == "front") {//you want to keep the beginning
217                                 int tempLength = temp.length();
218
219                                 if (tempLength > numbases) { //you have enough bases to remove some
220                                 
221                                         int stopSpot = 0;
222                                         int numBasesCounted = 0;
223                                         
224                                         for (int i = 0; i < temp.length(); i++) {
225                                                 //eliminate N's
226                                                 if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
227                                                 
228                                                 numBasesCounted++; 
229                                                 
230                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
231                                         }
232                                         
233                                         if (stopSpot == 0) { temp = ""; }
234                                         else {  temp = temp.substr(0, stopSpot);  }
235                                                         
236                                 }else { temp = ""; } //sequence too short
237                                 
238                         }else { //you are keeping the back
239                                 int tempLength = temp.length();
240                                 if (tempLength > numbases) { //you have enough bases to remove some
241                                         
242                                         int stopSpot = 0;
243                                         int numBasesCounted = 0;
244                                         
245                                         for (int i = (temp.length()-1); i >= 0; i--) {
246                                                 //eliminate N's
247                                                 if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
248                                                 
249                                                 numBasesCounted++; 
250
251                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
252                                         }
253                                 
254                                         if (stopSpot == 0) { temp = ""; }
255                                         else {  temp = temp.substr(stopSpot+1);  }
256                                 }
257                                 else {  temp = ""; } //sequence too short
258                         }
259
260                 }else{
261                                 
262                         //if needed trim sequence
263                         if (keep == "front") {//you want to keep the beginning
264                                 int tempLength = tempUnaligned.length();
265
266                                 if (tempLength > numbases) { //you have enough bases to remove some
267                                         
268                                         int stopSpot = 0;
269                                         int numBasesCounted = 0;
270                                         
271                                         for (int i = 0; i < temp.length(); i++) {
272                                                 //eliminate N's
273                                                 if (toupper(temp[i]) == 'N') { 
274                                                         temp[i] == '.'; 
275                                                         tempLength--;
276                                                         if (tempLength < numbases) { stopSpot = 0; break; }
277                                                 }
278                                                 
279                                                 if(isalpha(temp[i])) { numBasesCounted++; }
280                                                 
281                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
282                                         }
283                                         
284                                         if (stopSpot == 0) { temp = ""; }
285                                         else {  temp = temp.substr(0, stopSpot);  }
286                                                         
287                                 }else { temp = ""; } //sequence too short
288                                 
289                         }else { //you are keeping the back
290                                 int tempLength = tempUnaligned.length();
291                                 if (tempLength > numbases) { //you have enough bases to remove some
292                                         
293                                         int stopSpot = 0;
294                                         int numBasesCounted = 0;
295                                         
296                                         for (int i = (temp.length()-1); i >= 0; i--) {
297                                                 //eliminate N's
298                                                 if (toupper(temp[i]) == 'N') { 
299                                                         temp[i] == '.'; 
300                                                         tempLength--;
301                                                         if (tempLength < numbases) { stopSpot = 0; break; }
302                                                 }
303                                                 
304                                                 if(isalpha(temp[i])) { numBasesCounted++; }
305
306                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
307                                         }
308                                 
309                                         if (stopSpot == 0) { temp = ""; }
310                                         else {  temp = temp.substr(stopSpot+1);  }
311                                 }
312                                 else {  temp = ""; } //sequence too short
313                         }
314                 }
315                 
316                 return temp;
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "ChopSeqsCommand", "getChopped");
320                 exit(1);
321         }
322 }
323 //**********************************************************************************************************************
324
325