]> git.donarmstrong.com Git - mothur.git/blob - splitgroupscommand.cpp
added modify names parameter to set.dir
[mothur.git] / splitgroupscommand.cpp
1 /*
2  *  splitgroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitgroupscommand.h"
11 #include "sharedutilities.h"
12 #include "sequenceparser.h"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> SplitGroupCommand::setParameters(){      
17         try {           
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "CountGroup", "none","count",false,false,true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "CountGroup", "none","group",false,false,true); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SplitGroupCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SplitGroupCommand::getHelpString(){      
37         try {
38                 string helpString = "";
39                 helpString += "The split.groups command reads a group or count file, and parses your fasta and names or count files by groups. \n";
40                 helpString += "The split.groups command parameters are fasta, name, group, count and groups.\n";
41                 helpString += "The fasta and group or count parameters are required.\n";
42                 helpString += "The groups parameter allows you to select groups to create files for.  \n";
43                 helpString += "For example if you set groups=A-B-C, you will get a .A.fasta, .A.names, .B.fasta, .B.names, .C.fasta, .C.names files.  \n";
44                 helpString += "If you want .fasta and .names files for all groups, set groups=all.  \n";
45                 helpString += "The split.groups command should be used in the following format: split.group(fasta=yourFasta, group=yourGroupFile).\n";
46                 helpString += "Example: split.groups(fasta=abrecovery.fasta, group=abrecovery.groups).\n";
47                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "SplitGroupCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string SplitGroupCommand::getOutputPattern(string type) {
57     try {
58         string pattern = "";
59         
60         if (type == "fasta") {  pattern = "[filename],[group],fasta"; } 
61         else if (type == "name") {  pattern = "[filename],[group],names"; } 
62         else if (type == "count") {  pattern = "[filename],[group],count_table"; }
63         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
64         
65         return pattern;
66     }
67     catch(exception& e) {
68         m->errorOut(e, "SplitGroupCommand", "getOutputPattern");
69         exit(1);
70     }
71 }
72
73 //**********************************************************************************************************************
74 SplitGroupCommand::SplitGroupCommand(){ 
75         try {
76                 abort = true; calledHelp = true; 
77                 setParameters();
78                 vector<string> tempOutNames;
79                 outputTypes["fasta"] = tempOutNames;
80                 outputTypes["name"] = tempOutNames;
81         outputTypes["count"] = tempOutNames;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "SplitGroupCommand", "SplitGroupCommand");
85                 exit(1);
86         }
87 }
88 //**********************************************************************************************************************
89 SplitGroupCommand::SplitGroupCommand(string option)  {
90         try {
91                 abort = false; calledHelp = false;   
92                         
93                 //allow user to run help
94                 if(option == "help") { help(); abort = true; calledHelp = true; }
95                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
96                 
97                 else {
98                         vector<string> myArray = setParameters();
99                         
100                         OptionParser parser(option);
101                         map<string, string> parameters = parser.getParameters();
102                         
103                         ValidParameters validParameter;
104                         map<string, string>::iterator it;
105                 
106                         //check to make sure all parameters are valid for command
107                         for (it = parameters.begin(); it != parameters.end(); it++) { 
108                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
109                         }
110                         
111                         //initialize outputTypes
112                         vector<string> tempOutNames;
113                         outputTypes["fasta"] = tempOutNames;
114                         outputTypes["name"] = tempOutNames;
115             outputTypes["count"] = tempOutNames;
116                 
117                         //if the user changes the input directory command factory will send this info to us in the output parameter 
118                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
119                         if (inputDir == "not found"){   inputDir = "";          }
120                         else {
121                                 string path;
122                                 it = parameters.find("group");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
128                                 }
129                                 
130                                 it = parameters.find("fasta");
131                                 //user has given a template file
132                                 if(it != parameters.end()){ 
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
136                                 }
137                                 
138                                 it = parameters.find("name");
139                                 //user has given a template file
140                                 if(it != parameters.end()){ 
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
144                                 }
145                 
146                 it = parameters.find("count");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
152                                 }
153                         }
154
155                         
156                         namefile = validParameter.validFile(parameters, "name", true);
157                         if (namefile == "not open") { namefile = ""; abort = true; }
158                         else if (namefile == "not found") { namefile = ""; }    
159                         else { m->setNameFile(namefile); }
160                 
161                         fastafile = validParameter.validFile(parameters, "fasta", true);
162                         if (fastafile == "not open") { abort = true; }
163                         else if (fastafile == "not found") {                    
164                                 fastafile = m->getFastaFile(); 
165                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
166                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
167                         }else { m->setFastaFile(fastafile); }   
168                         
169                         groupfile = validParameter.validFile(parameters, "group", true);
170                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
171                         else if (groupfile == "not found") { groupfile = "";
172                         }else {  m->setGroupFile(groupfile); }
173             
174             countfile = validParameter.validFile(parameters, "count", true);
175                         if (countfile == "not open") { countfile = ""; abort = true; }
176                         else if (countfile == "not found") { countfile = ""; }  
177                         else { m->setCountTableFile(countfile); }
178             
179             if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
180             
181             if ((countfile != "") && (groupfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
182             
183             if ((countfile == "") && (groupfile == "")) {
184                 if (namefile == "") { //check for count then group
185                     countfile = m->getCountTableFile(); 
186                                         if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
187                                         else { 
188                                                 groupfile = m->getGroupFile(); 
189                         if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
190                         else { 
191                             m->mothurOut("You need to provide a count or group file."); m->mothurOutEndLine(); 
192                             abort = true; 
193                         }       
194                                         }       
195                 }else { //check for group
196                     groupfile = m->getGroupFile(); 
197                     if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
198                     else { 
199                         m->mothurOut("You need to provide a count or group file."); m->mothurOutEndLine(); 
200                         abort = true; 
201                     }   
202                 }
203             }
204                         
205                         groups = validParameter.validFile(parameters, "groups", false);         
206                         if (groups == "not found") { groups = ""; }
207                         else { m->splitAtDash(groups, Groups);  }
208                                                 
209                         //if the user changes the output directory command factory will send this info to us in the output parameter 
210                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
211                 if (groupfile != "") { outputDir = m->hasPath(groupfile); }
212                 else { outputDir = m->hasPath(countfile);  }
213             }
214                         
215             if (countfile == "") {
216                 if (namefile == "") {
217                     vector<string> files; files.push_back(fastafile);
218                     parser.getNameFile(files);
219                 }
220             }
221                 }
222
223         }
224         catch(exception& e) {
225                 m->errorOut(e, "SplitGroupCommand", "SplitAbundCommand");
226                 exit(1);
227         }
228 }
229 //**********************************************************************************************************************
230 int SplitGroupCommand::execute(){
231         try {
232         
233                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
234                 
235         if (countfile == "" ) {  runNameGroup();  }
236         else { runCount();  }
237                                 
238                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
239                 
240                 string current = "";
241                 itTypes = outputTypes.find("fasta");
242                 if (itTypes != outputTypes.end()) {
243                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
244                 }
245                 
246                 itTypes = outputTypes.find("name");
247                 if (itTypes != outputTypes.end()) {
248                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
249                 }
250         
251         itTypes = outputTypes.find("count");
252                 if (itTypes != outputTypes.end()) {
253                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
254                 }
255                 
256                 m->mothurOutEndLine();
257                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
258                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
259                 m->mothurOutEndLine();
260                 
261                 return 0;
262         }
263         catch(exception& e) {
264                 m->errorOut(e, "SplitGroupCommand", "execute");
265                 exit(1);
266         }
267 }
268 //**********************************************************************************************************************
269 int SplitGroupCommand::runNameGroup(){
270         try {
271         SequenceParser* parser;
272                 if (namefile == "") {   parser = new SequenceParser(groupfile, fastafile);                              }
273                 else                            {       parser = new SequenceParser(groupfile, fastafile, namefile);    }
274                 
275                 if (m->control_pressed) { delete parser; return 0; }
276         
277                 vector<string> namesGroups = parser->getNamesOfGroups();
278                 SharedUtil util;  util.setGroups(Groups, namesGroups);  
279                 
280                 string fastafileRoot = outputDir + m->getRootName(m->getSimpleName(fastafile));
281                 string namefileRoot = outputDir + m->getRootName(m->getSimpleName(namefile));
282                 
283                 m->mothurOutEndLine();
284                 for (int i = 0; i < Groups.size(); i++) {
285                         
286                         m->mothurOut("Processing group: " + Groups[i]); m->mothurOutEndLine();
287                         
288             map<string, string> variables; 
289             variables["[filename]"] = fastafileRoot;
290             variables["[group]"] = Groups[i];
291
292                         string newFasta = getOutputFileName("fasta",variables);
293             variables["[filename]"] = namefileRoot;
294                         string newName = getOutputFileName("name",variables);
295                         
296                         parser->getSeqs(Groups[i], newFasta, false);
297                         outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
298                         if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);        } return 0; }
299             
300                         if (namefile != "") { 
301                                 parser->getNameMap(Groups[i], newName); 
302                                 outputNames.push_back(newName); outputTypes["name"].push_back(newName);
303                         }
304                         
305                         if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);        } return 0; }
306                 }
307                 
308                 delete parser;
309         
310         return 0;
311
312     }
313         catch(exception& e) {
314                 m->errorOut(e, "SplitGroupCommand", "runNameGroup");
315                 exit(1);
316         }
317 }
318 //**********************************************************************************************************************
319 int SplitGroupCommand::runCount(){
320         try {
321         
322         CountTable ct;
323         ct.readTable(countfile, true);
324         if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; }
325         
326         if (m->control_pressed) { return 0; }
327         
328         vector<string> namesGroups = ct.getNamesOfGroups();
329         SharedUtil util;  util.setGroups(Groups, namesGroups); 
330         
331         //fill filehandles with neccessary ofstreams
332         map<string, ofstream*> ffiles;
333         map<string, ofstream*> cfiles;
334         ofstream* temp;
335         for (int i=0; i<Groups.size(); i++) {
336             temp = new ofstream;
337             ffiles[Groups[i]] = temp;
338             map<string, string> variables; 
339             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
340             variables["[group]"] = Groups[i];
341             string newFasta = getOutputFileName("fasta",variables);
342             outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
343             m->openOutputFile(newFasta, (*temp));
344             temp = new ofstream;
345             cfiles[Groups[i]] = temp;
346             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
347             string newCount = getOutputFileName("count",variables);
348             m->openOutputFile(newCount, (*temp));
349             outputNames.push_back(newCount); outputTypes["count"].push_back(newCount);
350             (*temp) << "Representative_Sequence\ttotal\t" << Groups[i] << endl;
351         }
352         
353         ifstream in; 
354         m->openInputFile(fastafile, in);
355         
356         while (!in.eof()) {
357             Sequence seq(in); m->gobble(in);
358             
359             if (m->control_pressed) { break; }
360             if (seq.getName() != "") {
361                 vector<string> thisSeqsGroups = ct.getGroups(seq.getName());
362                 for (int i = 0; i < thisSeqsGroups.size(); i++) {
363                     if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print
364                         seq.printSequence(*(ffiles[thisSeqsGroups[i]]));
365                         int numSeqs = ct.getGroupCount(seq.getName(), Groups[i]);
366                         (*(cfiles[thisSeqsGroups[i]])) << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl;
367                     }
368                 }
369             }
370         }
371         in.close();
372         
373         //close and delete ofstreams
374         for (int i=0; i<Groups.size(); i++) {  
375             (*ffiles[Groups[i]]).close(); delete ffiles[Groups[i]];
376             (*cfiles[Groups[i]]).close(); delete cfiles[Groups[i]];
377         }
378         
379         return 0;
380
381     }
382         catch(exception& e) {
383                 m->errorOut(e, "SplitGroupCommand", "runCount");
384                 exit(1);
385         }
386 }
387 //**********************************************************************************************************************
388
389