]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.cpp
broke up globaldata and moved error checking and help into commands
[mothur.git] / screenseqscommand.cpp
1 /*
2  *  screenseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/3/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
12
13 //***************************************************************************************************************
14
15 ScreenSeqsCommand::ScreenSeqsCommand(string option){
16         try {
17                 globaldata = GlobalData::getInstance();
18                 abort = false;
19                 
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string AlignArray[] =  {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", "name", "group"};
26                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
27                         
28                         parser = new OptionParser();
29                         parser->parse(option, parameters);      delete parser; 
30                         
31                         ValidParameters* validParameter = new ValidParameters();
32                 
33                         //check to make sure all parameters are valid for command
34                         for (it = parameters.begin(); it != parameters.end(); it++) { 
35                                 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
36                         }
37                         
38                         //check for required parameters
39                         fastafile = validParameter->validFile(parameters, "fasta", true);
40                         if (fastafile == "not found") { cout << "fasta is a required parameter for the screen.seqs command." << endl; abort = true; }
41                         else if (fastafile == "not open") { abort = true; }     
42                         else { globaldata->setFastaFile(fastafile); }
43                 
44                         groupfile = validParameter->validFile(parameters, "group", true);
45                         if (groupfile == "not open") { abort = true; }  
46                         else if (groupfile == "not found") { groupfile = ""; }
47                         else { 
48                                 globaldata->setGroupFile(groupfile);
49                         }
50                         
51                         namefile = validParameter->validFile(parameters, "name", true);
52                         if (namefile == "not open") { abort = true; }
53                         else if (namefile == "not found") { namefile = ""; }    
54                         else { 
55                                 globaldata->setNameFile(namefile);
56                         }
57
58                 
59                         //check for optional parameter and set defaults
60                         // ...at some point should added some additional type checking...
61                         string temp;
62                         temp = validParameter->validFile(parameters, "start", false);                   if (temp == "not found") { temp = "-1"; }
63                         convert(temp, startPos); 
64                 
65                         temp = validParameter->validFile(parameters, "end", false);                             if (temp == "not found") { temp = "-1"; }
66                         convert(temp, endPos);  
67
68                         temp = validParameter->validFile(parameters, "maxambig", false);                if (temp == "not found") { temp = "-1"; }
69                         convert(temp, maxAmbig);  
70
71                         temp = validParameter->validFile(parameters, "maxhomop", false);                if (temp == "not found") { temp = "-1"; }
72                         convert(temp, maxHomoP);  
73
74                         temp = validParameter->validFile(parameters, "minlength", false);               if (temp == "not found") { temp = "-1"; }
75                         convert(temp, minLength); 
76                         
77                         temp = validParameter->validFile(parameters, "maxlength", false);               if (temp == "not found") { temp = "-1"; }
78                         convert(temp, maxLength); 
79                 
80                         delete validParameter;
81                 }
82
83         }
84         catch(exception& e) {
85                 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
86                 exit(1);
87         }
88         catch(...) {
89                 cout << "An unknown error has occurred in the ScreenSeqsCommand class function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
90                 exit(1);
91         }       
92 }
93 //**********************************************************************************************************************
94
95 void ScreenSeqsCommand::help(){
96         try {
97                 cout << "The screen.seqs command reads a fastafile and creates ....." << "\n";
98                 cout << "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group." << "\n";
99                 cout << "The fasta parameter is required." << "\n";
100                 cout << "The start parameter .... The default is -1." << "\n";
101                 cout << "The end parameter .... The default is -1." << "\n";
102                 cout << "The maxambig parameter .... The default is -1." << "\n";
103                 cout << "The maxhomop parameter .... The default is -1." << "\n";
104                 cout << "The minlength parameter .... The default is -1." << "\n";
105                 cout << "The maxlength parameter .... The default is -1." << "\n";
106                 cout << "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile." << "\n";
107                 cout << "The screen.seqs command should be in the following format: " << "\n";
108                 cout << "screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig,  " << "\n";
109                 cout << "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  " << "\n";    
110                 cout << "Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...)." << "\n";
111                 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n";
112
113         }
114         catch(exception& e) {
115                 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
116                 exit(1);
117         }
118         catch(...) {
119                 cout << "An unknown error has occurred in the ScreenSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
120                 exit(1);
121         }       
122 }
123
124 //***************************************************************************************************************
125
126 ScreenSeqsCommand::~ScreenSeqsCommand(){        /*      do nothing      */      }
127
128 //***************************************************************************************************************
129
130 int ScreenSeqsCommand::execute(){
131         try{
132                 
133                 if (abort == true) { return 0; }
134                                 
135                 ifstream inFASTA;
136                 openInputFile(fastafile, inFASTA);
137                 
138                 set<string> badSeqNames;
139                 
140                 string goodSeqFile = getRootName(fastafile) + "good" + getExtension(fastafile);
141                 string badSeqFile = getRootName(fastafile) + "bad" + getExtension(fastafile);
142                 
143                 ofstream goodSeqOut;    openOutputFile(goodSeqFile, goodSeqOut);
144                 ofstream badSeqOut;             openOutputFile(badSeqFile, badSeqOut);          
145                 
146                 while(!inFASTA.eof()){
147                         Sequence currSeq(inFASTA);
148                         bool goodSeq = 1;               //      innocent until proven guilty
149                         if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
150                         if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
151                         if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
152                         if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
153                         if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
154                         if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
155                         
156                         if(goodSeq == 1){
157                                 currSeq.printSequence(goodSeqOut);      
158                         }
159                         else{
160                                 currSeq.printSequence(badSeqOut);       
161                                 badSeqNames.insert(currSeq.getName());
162                         }
163                         gobble(inFASTA);
164                 }       
165                 if(namefile != ""){
166                         screenNameGroupFile(badSeqNames);
167                 }
168                 else if(groupfile != ""){
169                         screenGroupFile(badSeqNames);
170                 }
171                 
172                 return 0;
173         }
174         catch(exception& e) {
175                 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
176                 exit(1);
177         }
178         catch(...) {
179                 cout << "An unknown error has occurred in the ScreenSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
180                 exit(1);
181         }
182         
183 }
184
185 //***************************************************************************************************************
186
187 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
188
189         ifstream inputNames;
190         openInputFile(globaldata->getNameFile(), inputNames);
191         set<string> badSeqGroups;
192         string seqName, seqList, group;
193         set<string>::iterator it;
194
195         string goodNameFile = getRootName(globaldata->getNameFile()) + "good" + getExtension(globaldata->getNameFile());
196         string badNameFile = getRootName(globaldata->getNameFile()) + "bad" + getExtension(globaldata->getNameFile());
197         
198         ofstream goodNameOut;   openOutputFile(goodNameFile, goodNameOut);
199         ofstream badNameOut;    openOutputFile(badNameFile, badNameOut);                
200         
201         while(!inputNames.eof()){
202                 inputNames >> seqName >> seqList;
203                 it = badSeqNames.find(seqName);
204                 
205                 if(it != badSeqNames.end()){
206                         badSeqNames.erase(it);
207                         badNameOut << seqName << '\t' << seqList << endl;
208                         if(globaldata->getNameFile() != ""){
209                                 int start = 0;
210                                 for(int i=0;i<seqList.length();i++){
211                                         if(seqList[i] == ','){
212                                                 badSeqGroups.insert(seqList.substr(start,i-start));
213                                                 start = i+1;
214                                         }                                       
215                                 }
216                                 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
217                         }
218                 }
219                 else{
220                         goodNameOut << seqName << '\t' << seqList << endl;
221                 }
222                 gobble(inputNames);
223         }
224         inputNames.close();
225         goodNameOut.close();
226         badNameOut.close();
227         
228         if(globaldata->getGroupFile() != ""){
229                 
230                 ifstream inputGroups;
231                 openInputFile(globaldata->getGroupFile(), inputGroups);
232
233                 string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
234                 string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
235                 
236                 ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
237                 ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
238                 
239                 while(!inputGroups.eof()){
240                         inputGroups >> seqName >> group;
241
242                         it = badSeqGroups.find(seqName);
243                         
244                         if(it != badSeqGroups.end()){
245                                 badSeqGroups.erase(it);
246                                 badGroupOut << seqName << '\t' << group << endl;
247                         }
248                         else{
249                                 goodGroupOut << seqName << '\t' << group << endl;
250                         }
251                         gobble(inputGroups);
252                 }
253                 inputGroups.close();
254                 goodGroupOut.close();
255                 badGroupOut.close();
256         }
257 }
258
259 //***************************************************************************************************************
260
261 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
262
263         ifstream inputGroups;
264         openInputFile(globaldata->getGroupFile(), inputGroups);
265         string seqName, group;
266         set<string>::iterator it;
267         
268         string goodGroupFile = getRootName(globaldata->getGroupFile()) + "good" + getExtension(globaldata->getGroupFile());
269         string badGroupFile = getRootName(globaldata->getGroupFile()) + "bad" + getExtension(globaldata->getGroupFile());
270         
271         ofstream goodGroupOut;  openOutputFile(goodGroupFile, goodGroupOut);
272         ofstream badGroupOut;   openOutputFile(badGroupFile, badGroupOut);              
273         
274         while(!inputGroups.eof()){
275                 inputGroups >> seqName >> group;
276                 it = badSeqNames.find(seqName);
277                 
278                 if(it != badSeqNames.end()){
279                         badSeqNames.erase(it);
280                         badGroupOut << seqName << '\t' << group << endl;
281                 }
282                 else{
283                         goodGroupOut << seqName << '\t' << group << endl;
284                 }
285                 gobble(inputGroups);
286         }
287         inputGroups.close();
288         goodGroupOut.close();
289         badGroupOut.close();
290         
291 }
292
293 //***************************************************************************************************************
294
295