]> git.donarmstrong.com Git - mothur.git/blob - chopseqscommand.cpp
fixes while testing
[mothur.git] / chopseqscommand.cpp
1 /*
2  *  chopseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chopseqscommand.h"
11 #include "sequence.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> ChopSeqsCommand::getValidParameters(){   
15         try {
16                 string AlignArray[] =  {"fasta","short","numbases","countgaps","keep","outputdir","inputdir"};
17                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "ChopSeqsCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 ChopSeqsCommand::ChopSeqsCommand(){     
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["fasta"] = tempOutNames;
32                 outputTypes["accnos"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> ChopSeqsCommand::getRequiredParameters(){        
41         try {
42                 string Array[] =  {"fasta","numbases"};
43                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "ChopSeqsCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> ChopSeqsCommand::getRequiredFiles(){     
53         try {
54                 vector<string> myArray;
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "ChopSeqsCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 ChopSeqsCommand::ChopSeqsCommand(string option)  {
64         try {
65                 abort = false;
66                 
67                 //allow user to run help
68                 if(option == "help") { help(); abort = true; }
69                 
70                 else {
71                         //valid paramters for this command
72                         string Array[] =  {"fasta","numbases","countgaps","keep","short","outputdir","inputdir"};
73                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
74                         
75                         OptionParser parser(option);
76                         map<string,string> parameters = parser.getParameters();
77                         
78                         ValidParameters validParameter;
79                         map<string,string>::iterator it;
80                         
81                         //check to make sure all parameters are valid for command
82                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
83                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
84                         }
85                         
86                         //initialize outputTypes
87                         vector<string> tempOutNames;
88                         outputTypes["fasta"] = tempOutNames;
89                         outputTypes["accnos"] = tempOutNames;
90                 
91                         //if the user changes the input directory command factory will send this info to us in the output parameter 
92                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
93                         if (inputDir == "not found"){   inputDir = "";          }
94                         else {
95                                 string path;
96                                 it = parameters.find("fasta");
97                                 //user has given a template file
98                                 if(it != parameters.end()){ 
99                                         path = m->hasPath(it->second);
100                                         //if the user has not given a path then, add inputdir. else leave path alone.
101                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
102                                 }
103                         }
104
105                         //check for required parameters
106                         fastafile = validParameter.validFile(parameters, "fasta", true);
107                         if (fastafile == "not open") { abort = true; }
108                         else if (fastafile == "not found") {  m->mothurOut("You must provide a fasta file."); m->mothurOutEndLine(); abort = true; }    
109                         
110                         //if the user changes the output directory command factory will send this info to us in the output parameter 
111                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
112                         
113                         string temp = validParameter.validFile(parameters, "numbases", false);  if (temp == "not found") { temp = "0"; } 
114                         convert(temp, numbases);   
115                         
116                         temp = validParameter.validFile(parameters, "countgaps", false);        if (temp == "not found") { temp = "f"; } 
117                         countGaps = m->isTrue(temp);  
118                         
119                         temp = validParameter.validFile(parameters, "short", false);    if (temp == "not found") { temp = "f"; } 
120                         Short = m->isTrue(temp);   
121                 
122                         keep = validParameter.validFile(parameters, "keep", false);             if (keep == "not found") { keep = "front"; } 
123                                 
124                         if (numbases == 0)  { m->mothurOut("You must provide the number of bases you want to keep for the chops.seqs command."); m->mothurOutEndLine(); abort = true;  }
125                 }
126
127         }
128         catch(exception& e) {
129                 m->errorOut(e, "ChopSeqsCommand", "ChopSeqsCommand");
130                 exit(1);
131         }
132 }
133 //**********************************************************************************************************************
134
135 void ChopSeqsCommand::help(){
136         try {
137                 m->mothurOut("The chop.seqs command reads a fasta file and outputs a .chop.fasta containing the trimmed sequences.\n");
138                 m->mothurOut("The chop.seqs command parameters are fasta, numbases, countgaps and keep. fasta and numbases are required required.\n");
139                 m->mothurOut("The chop.seqs command should be in the following format: chop.seqs(fasta=yourFasta, numbases=yourNum, keep=yourKeep).\n");
140                 m->mothurOut("The numbases parameter allows you to specify the number of bases you want to keep.\n");
141                 m->mothurOut("The keep parameter allows you to specify whether you want to keep the front or the back of your sequence, default=front.\n");
142                 m->mothurOut("The countgaps parameter allows you to specify whether you want to count gaps as bases, default=false.\n");
143                 m->mothurOut("The short parameter allows you to specify you want to keep sequences that are too short to chop, default=false.\n");
144                 m->mothurOut("For example, if you ran chop.seqs with numbases=200 and short=t, if a sequence had 100 bases mothur would keep the sequence rather than eliminate it.\n");
145                 m->mothurOut("Example chop.seqs(fasta=amazon.fasta, numbases=200, keep=front).\n");
146                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
147         }
148         catch(exception& e) {
149                 m->errorOut(e, "ChopSeqsCommand", "help");
150                 exit(1);
151         }
152 }
153
154 //**********************************************************************************************************************
155
156 int ChopSeqsCommand::execute(){
157         try {
158                 
159                 if (abort == true) { return 0; }
160                 
161                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "chop.fasta";
162                 string outputFileNameAccnos = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "chop.accnos";
163                 
164                 ofstream out;
165                 m->openOutputFile(outputFileName, out);
166                 
167                 ofstream outAcc;
168                 m->openOutputFile(outputFileNameAccnos, outAcc);
169                 
170                 ifstream in;
171                 m->openInputFile(fastafile, in);
172                 
173                 bool wroteAccnos = false;
174                 
175                 while (!in.eof()) {
176                         
177                         Sequence seq(in);
178                         
179                         if (m->control_pressed) { outputTypes.clear(); in.close(); out.close(); outAcc.close(); remove(outputFileName.c_str()); remove(outputFileNameAccnos.c_str()); return 0;  }
180                         
181                         if (seq.getName() != "") {
182                                 string newSeqString = getChopped(seq);
183                                 
184                                 //output trimmed sequence
185                                 if (newSeqString != "") {
186                                         out << ">" << seq.getName() << endl << newSeqString << endl;
187                                 }else{
188                                         outAcc << seq.getName() << endl;
189                                         wroteAccnos = true;
190                                 }
191                         }
192                 }
193                 in.close();
194                 out.close();
195                 outAcc.close();
196                 
197                 m->mothurOutEndLine();
198                 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
199                 m->mothurOut(outputFileName); m->mothurOutEndLine();    outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
200                 
201                 if (wroteAccnos) { m->mothurOut(outputFileNameAccnos); m->mothurOutEndLine(); outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos); }
202                 else {  remove(outputFileNameAccnos.c_str());  }
203                 
204                 m->mothurOutEndLine();
205                 
206                 return 0;               
207         }
208
209         catch(exception& e) {
210                 m->errorOut(e, "ChopSeqsCommand", "execute");
211                 exit(1);
212         }
213 }
214 //**********************************************************************************************************************
215 string ChopSeqsCommand::getChopped(Sequence seq) {
216         try {
217                 string temp = seq.getAligned();
218                 string tempUnaligned = seq.getUnaligned();
219                 
220                 if (countGaps) {
221                         //if needed trim sequence
222                         if (keep == "front") {//you want to keep the beginning
223                                 int tempLength = temp.length();
224
225                                 if (tempLength > numbases) { //you have enough bases to remove some
226                                 
227                                         int stopSpot = 0;
228                                         int numBasesCounted = 0;
229                                         
230                                         for (int i = 0; i < temp.length(); i++) {
231                                                 //eliminate N's
232                                                 if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
233                                                 
234                                                 numBasesCounted++; 
235                                                 
236                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
237                                         }
238                                         
239                                         if (stopSpot == 0) { temp = ""; }
240                                         else {  temp = temp.substr(0, stopSpot);  }
241                                                         
242                                 }else { 
243                                         if (!Short) { temp = ""; } //sequence too short
244                                 }
245                         }else { //you are keeping the back
246                                 int tempLength = temp.length();
247                                 if (tempLength > numbases) { //you have enough bases to remove some
248                                         
249                                         int stopSpot = 0;
250                                         int numBasesCounted = 0;
251                                         
252                                         for (int i = (temp.length()-1); i >= 0; i--) {
253                                                 //eliminate N's
254                                                 if (toupper(temp[i]) == 'N') { temp[i] == '.'; }
255                                                 
256                                                 numBasesCounted++; 
257
258                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
259                                         }
260                                 
261                                         if (stopSpot == 0) { temp = ""; }
262                                         else {  temp = temp.substr(stopSpot+1);  }
263                                 }else { 
264                                         if (!Short) { temp = ""; } //sequence too short
265                                 }
266                         }
267
268                 }else{
269                                 
270                         //if needed trim sequence
271                         if (keep == "front") {//you want to keep the beginning
272                                 int tempLength = tempUnaligned.length();
273
274                                 if (tempLength > numbases) { //you have enough bases to remove some
275                                         
276                                         int stopSpot = 0;
277                                         int numBasesCounted = 0;
278                                         
279                                         for (int i = 0; i < temp.length(); i++) {
280                                                 //eliminate N's
281                                                 if (toupper(temp[i]) == 'N') { 
282                                                         temp[i] == '.'; 
283                                                         tempLength--;
284                                                         if (tempLength < numbases) { stopSpot = 0; break; }
285                                                 }
286                                                 
287                                                 if(isalpha(temp[i])) { numBasesCounted++; }
288                                                 
289                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
290                                         }
291                                         
292                                         if (stopSpot == 0) { temp = ""; }
293                                         else {  temp = temp.substr(0, stopSpot);  }
294                                                         
295                                 }else { 
296                                         if (!Short) { temp = ""; } //sequence too short
297                                 }                               
298                         }else { //you are keeping the back
299                                 int tempLength = tempUnaligned.length();
300                                 if (tempLength > numbases) { //you have enough bases to remove some
301                                         
302                                         int stopSpot = 0;
303                                         int numBasesCounted = 0;
304                                         
305                                         for (int i = (temp.length()-1); i >= 0; i--) {
306                                                 //eliminate N's
307                                                 if (toupper(temp[i]) == 'N') { 
308                                                         temp[i] == '.'; 
309                                                         tempLength--;
310                                                         if (tempLength < numbases) { stopSpot = 0; break; }
311                                                 }
312                                                 
313                                                 if(isalpha(temp[i])) { numBasesCounted++; }
314
315                                                 if (numBasesCounted >= numbases) { stopSpot = i; break; }
316                                         }
317                                 
318                                         if (stopSpot == 0) { temp = ""; }
319                                         else {  temp = temp.substr(stopSpot+1);  }
320                                 }else { 
321                                         if (!Short) { temp = ""; } //sequence too short
322                                 }
323                         }
324                 }
325                 
326                 return temp;
327         }
328         catch(exception& e) {
329                 m->errorOut(e, "ChopSeqsCommand", "getChopped");
330                 exit(1);
331         }
332 }
333 //**********************************************************************************************************************
334
335