]> git.donarmstrong.com Git - mothur.git/blob - deuniqueseqscommand.cpp
working on pam
[mothur.git] / deuniqueseqscommand.cpp
1 /*
2  *  deuniqueseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/19/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "deuniqueseqscommand.h"
11 #include "sequence.hpp"
12 #include "counttable.h"
13
14 //**********************************************************************************************************************
15 vector<string> DeUniqueSeqsCommand::setParameters(){    
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "namecount", "none","name",false,false,true); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "namecount", "none","group",false,false,true); parameters.push_back(pcount);
20                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
21                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
22                 
23                 vector<string> myArray;
24                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
25                 return myArray;
26         }
27         catch(exception& e) {
28                 m->errorOut(e, "DeUniqueSeqsCommand", "setParameters");
29                 exit(1);
30         }
31 }
32 //**********************************************************************************************************************
33 string DeUniqueSeqsCommand::getHelpString(){    
34         try {
35                 string helpString = "";
36                 helpString += "The deunique.seqs command reads a fastafile and namefile or countfile, and creates a fastafile containing all the sequences. It you provide a count file with group information a group file is also created.\n";
37                 helpString += "The deunique.seqs command parameters are fasta, name and count. Fasta is required and you must provide either a name or count file.\n";
38                 helpString += "The deunique.seqs command should be in the following format: \n";
39                 helpString += "deunique.seqs(fasta=yourFastaFile, name=yourNameFile) \n";       
40                 helpString += "Example deunique.seqs(fasta=abrecovery.unique.fasta, name=abrecovery.names).\n";
41                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
42                 return helpString;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "DeUniqueSeqsCommand", "getHelpString");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string DeUniqueSeqsCommand::getOutputPattern(string type) {
51     try {
52         string pattern = "";
53         
54         if (type == "fasta") {  pattern = "[filename],redundant.fasta"; }
55         else if (type == "group") {  pattern = "[filename],redundant.groups"; }
56         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
57         
58         return pattern;
59     }
60     catch(exception& e) {
61         m->errorOut(e, "DeUniqueSeqsCommand", "getOutputPattern");
62         exit(1);
63     }
64 }
65 //**********************************************************************************************************************
66 DeUniqueSeqsCommand::DeUniqueSeqsCommand(){     
67         try {
68                 abort = true; calledHelp = true; 
69                 setParameters();
70                 vector<string> tempOutNames;
71                 outputTypes["fasta"] = tempOutNames;
72         outputTypes["group"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "DeUniqueSeqsCommand", "DeconvoluteCommand");
76                 exit(1);
77         }
78 }
79 /**************************************************************************************/
80 DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option)  {      
81         try {
82                 abort = false; calledHelp = false;   
83                 
84                 //allow user to run help
85                 if(option == "help") { help(); abort = true; calledHelp = true; }
86                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
87                 
88                 else {
89                         vector<string> myArray = setParameters();
90                         
91                         OptionParser parser(option);
92                         map<string,string> parameters = parser.getParameters();
93                         
94                         ValidParameters validParameter;
95                         map<string, string>::iterator it;
96                 
97                         //check to make sure all parameters are valid for command
98                         for (it = parameters.begin(); it != parameters.end(); it++) { 
99                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
100                         }
101                         
102                         //initialize outputTypes
103                         vector<string> tempOutNames;
104                         outputTypes["fasta"] = tempOutNames;
105             outputTypes["group"] = tempOutNames;
106                 
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("fasta");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
118                                 }
119                                 
120                                 it = parameters.find("name");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
126                                 }
127                 
128                 it = parameters.find("count");
129                                 //user has given a template file
130                                 if(it != parameters.end()){
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
134                                 }
135                         }
136
137                         
138                         //check for required parameters
139                         fastaFile = validParameter.validFile(parameters, "fasta", true);
140                         if (fastaFile == "not open") { abort = true; }
141                         else if (fastaFile == "not found") {                            
142                                 fastaFile = m->getFastaFile(); 
143                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
144                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
145                         }else { m->setFastaFile(fastaFile); }   
146                         
147                         //if the user changes the output directory command factory will send this info to us in the output parameter 
148                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
149                                 outputDir = ""; 
150                         }
151                         
152                         nameFile = validParameter.validFile(parameters, "name", true);
153                         if (nameFile == "not open") { abort = true; }
154                         else if (nameFile == "not found"){      nameFile = ""; }
155                         else { m->setNameFile(nameFile); }
156             
157             countfile = validParameter.validFile(parameters, "count", true);
158                         if (countfile == "not open") { abort = true;  }
159                         else if (countfile == "not found") { countfile = ""; }
160                         else { m->setCountTableFile(countfile); }
161                         
162             if ((countfile != "") && (nameFile != "")) { m->mothurOut("When executing a deunique.seqs command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
163                         
164             
165                         if ((countfile == "") && (nameFile == "")) { //look for currents
166                 nameFile = m->getNameFile();
167                                 if (nameFile != "") { m->mothurOut("Using " + nameFile + " as input file for the name parameter."); m->mothurOutEndLine(); }
168                                 else {
169                     countfile = m->getCountTableFile();
170                     if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
171                     else {  m->mothurOut("[ERROR]: You have no current name or count files one is required."); m->mothurOutEndLine(); abort = true; }
172                 }
173             }
174
175                 }
176
177         }
178         catch(exception& e) {
179                 m->errorOut(e, "DeUniqueSeqsCommand", "DeUniqueSeqsCommand");
180                 exit(1);
181         }
182 }
183 /**************************************************************************************/
184 int DeUniqueSeqsCommand::execute() {    
185         try {
186                 
187                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
188
189                 //prepare filenames and open files
190                 ofstream out;
191         string thisOutputDir = outputDir;
192                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastaFile);  }
193                 string outFastaFile = thisOutputDir + m->getRootName(m->getSimpleName(fastaFile));
194                 int pos = outFastaFile.find("unique");
195                 if (pos != string::npos) { outFastaFile = outputDir + outFastaFile.substr(0, pos); }
196         else { outFastaFile = outputDir + outFastaFile; }
197         map<string, string> variables; 
198         variables["[filename]"] = outFastaFile;
199         outFastaFile = getOutputFileName("fasta", variables);
200                 m->openOutputFile(outFastaFile, out);
201                 
202                 map<string, string> nameMap;
203         CountTable ct;
204         ofstream outGroup;
205         string outGroupFile;
206         vector<string> groups;
207         if (nameFile != "") { m->readNames(nameFile, nameMap); }
208         else {
209             ct.readTable(countfile, true, false);
210             
211             if (ct.hasGroupInfo()) {
212                 thisOutputDir = outputDir;
213                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
214                 outGroupFile = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
215                 variables["[filename]"] = outGroupFile;
216                 outGroupFile = getOutputFileName("group", variables);
217                 m->openOutputFile(outGroupFile, outGroup);
218                 groups = ct.getNamesOfGroups();
219             }
220         }
221         
222                 if (m->control_pressed) {  out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; }
223                 
224                 ifstream in;
225                 m->openInputFile(fastaFile, in);
226                 
227                 while (!in.eof()) {
228                 
229                         if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; }
230                         
231                         Sequence seq(in); m->gobble(in);
232                         
233                         if (seq.getName() != "") {
234                                 
235                 if (nameFile != "") {
236                     //look for sequence name in nameMap
237                     map<string, string>::iterator it = nameMap.find(seq.getName());
238                     
239                     if (it == nameMap.end()) {  m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
240                     else {
241                         vector<string> names;
242                         m->splitAtComma(it->second, names);
243                         
244                         //output sequences
245                         for (int i = 0; i < names.size(); i++) {
246                             out << ">" << names[i] << endl;
247                             out << seq.getAligned() << endl;
248                         }
249                         
250                         //remove seq from name map so we can check for seqs in namefile not in fastafile later
251                         nameMap.erase(it);
252                     }
253                 }else {
254                     if (ct.hasGroupInfo()) {
255                         vector<int> groupCounts = ct.getGroupCounts(seq.getName());
256                         int count = 1;
257                         for (int i = 0; i < groups.size(); i++) {
258                             for (int j = 0; j < groupCounts[i]; j++) {
259                                 outGroup << seq.getName()+"_"+toString(count) << '\t' << groups[i] << endl; count++;
260                             }
261                         }
262                         
263                     }
264                     
265                     int numReps = ct.getNumSeqs(seq.getName()); //will report error and set m->control_pressed if not found
266                     for (int i = 0; i < numReps; i++) {
267                         out << ">" << seq.getName()+"_"+toString(i+1) << endl;
268                         out << seq.getAligned() << endl;
269                     }
270                 }
271                         }
272                 }
273                 in.close();
274                 out.close();
275         if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); } }
276                 
277                                                 
278                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) {  m->mothurRemove(outGroupFile); } }return 0; }
279                 
280         outputNames.push_back(outFastaFile);  outputTypes["fasta"].push_back(outFastaFile);
281         if (countfile != "") { if (ct.hasGroupInfo()) { outputNames.push_back(outGroupFile);  outputTypes["group"].push_back(outGroupFile); } }
282         
283                 m->mothurOutEndLine();
284                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
285                 for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
286         m->mothurOutEndLine();
287                 
288                 
289                 //set fasta file as new current fastafile
290                 string current = "";
291                 itTypes = outputTypes.find("fasta");
292                 if (itTypes != outputTypes.end()) {
293                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
294                 }
295         
296         itTypes = outputTypes.find("group");
297                 if (itTypes != outputTypes.end()) {
298                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
299                 }
300
301                 
302                 return 0;
303         }
304         catch(exception& e) {
305                 m->errorOut(e, "DeUniqueSeqsCommand", "execute");
306                 exit(1);
307         }
308 }
309 /**************************************************************************************/