]> git.donarmstrong.com Git - mothur.git/blob - splitgroupscommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / splitgroupscommand.cpp
1 /*
2  *  splitgroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/20/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitgroupscommand.h"
11 #include "sharedutilities.h"
12 #include "sequenceparser.h"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> SplitGroupCommand::setParameters(){      
17         try {           
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "CountGroup", "none",false,false); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "CountGroup", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SplitGroupCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SplitGroupCommand::getHelpString(){      
37         try {
38                 string helpString = "";
39                 helpString += "The split.groups command reads a group or count file, and parses your fasta and names or count files by groups. \n";
40                 helpString += "The split.groups command parameters are fasta, name, group, count and groups.\n";
41                 helpString += "The fasta and group or count parameters are required.\n";
42                 helpString += "The groups parameter allows you to select groups to create files for.  \n";
43                 helpString += "For example if you set groups=A-B-C, you will get a .A.fasta, .A.names, .B.fasta, .B.names, .C.fasta, .C.names files.  \n";
44                 helpString += "If you want .fasta and .names files for all groups, set groups=all.  \n";
45                 helpString += "The split.groups command should be used in the following format: split.group(fasta=yourFasta, group=yourGroupFile).\n";
46                 helpString += "Example: split.groups(fasta=abrecovery.fasta, group=abrecovery.groups).\n";
47                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "SplitGroupCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string SplitGroupCommand::getOutputFileNameTag(string type, string inputName=""){       
57         try {
58         string outputFileName = "";
59                 map<string, vector<string> >::iterator it;
60         
61         //is this a type this command creates
62         it = outputTypes.find(type);
63         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
64         else {
65             if (type == "fasta")            {   outputFileName =  "fasta";   }
66             else if (type == "name")        {   outputFileName =  "names";   }
67             else if (type == "count")        {   outputFileName =  "count_table";   }
68             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
69         }
70         return outputFileName;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "SplitGroupCommand", "getOutputFileNameTag");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 SplitGroupCommand::SplitGroupCommand(){ 
79         try {
80                 abort = true; calledHelp = true; 
81                 setParameters();
82                 vector<string> tempOutNames;
83                 outputTypes["fasta"] = tempOutNames;
84                 outputTypes["name"] = tempOutNames;
85         outputTypes["count"] = tempOutNames;
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "SplitGroupCommand", "SplitGroupCommand");
89                 exit(1);
90         }
91 }
92 //**********************************************************************************************************************
93 SplitGroupCommand::SplitGroupCommand(string option)  {
94         try {
95                 abort = false; calledHelp = false;   
96                         
97                 //allow user to run help
98                 if(option == "help") { help(); abort = true; calledHelp = true; }
99                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100                 
101                 else {
102                         vector<string> myArray = setParameters();
103                         
104                         OptionParser parser(option);
105                         map<string, string> parameters = parser.getParameters();
106                         
107                         ValidParameters validParameter;
108                         map<string, string>::iterator it;
109                 
110                         //check to make sure all parameters are valid for command
111                         for (it = parameters.begin(); it != parameters.end(); it++) { 
112                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["fasta"] = tempOutNames;
118                         outputTypes["name"] = tempOutNames;
119             outputTypes["count"] = tempOutNames;
120                 
121                         //if the user changes the input directory command factory will send this info to us in the output parameter 
122                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
123                         if (inputDir == "not found"){   inputDir = "";          }
124                         else {
125                                 string path;
126                                 it = parameters.find("group");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
132                                 }
133                                 
134                                 it = parameters.find("fasta");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
140                                 }
141                                 
142                                 it = parameters.find("name");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
148                                 }
149                 
150                 it = parameters.find("count");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
156                                 }
157                         }
158
159                         
160                         namefile = validParameter.validFile(parameters, "name", true);
161                         if (namefile == "not open") { namefile = ""; abort = true; }
162                         else if (namefile == "not found") { namefile = ""; }    
163                         else { m->setNameFile(namefile); }
164                 
165                         fastafile = validParameter.validFile(parameters, "fasta", true);
166                         if (fastafile == "not open") { abort = true; }
167                         else if (fastafile == "not found") {                    
168                                 fastafile = m->getFastaFile(); 
169                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
170                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
171                         }else { m->setFastaFile(fastafile); }   
172                         
173                         groupfile = validParameter.validFile(parameters, "group", true);
174                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
175                         else if (groupfile == "not found") { groupfile = "";
176                         }else {  m->setGroupFile(groupfile); }
177             
178             countfile = validParameter.validFile(parameters, "count", true);
179                         if (countfile == "not open") { countfile = ""; abort = true; }
180                         else if (countfile == "not found") { countfile = ""; }  
181                         else { m->setCountTableFile(countfile); }
182             
183             if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
184             
185             if ((countfile != "") && (groupfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
186             
187             if ((countfile == "") && (groupfile == "")) {
188                 if (namefile == "") { //check for count then group
189                     countfile = m->getCountTableFile(); 
190                                         if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
191                                         else { 
192                                                 groupfile = m->getGroupFile(); 
193                         if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
194                         else { 
195                             m->mothurOut("You need to provide a count or group file."); m->mothurOutEndLine(); 
196                             abort = true; 
197                         }       
198                                         }       
199                 }else { //check for group
200                     groupfile = m->getGroupFile(); 
201                     if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
202                     else { 
203                         m->mothurOut("You need to provide a count or group file."); m->mothurOutEndLine(); 
204                         abort = true; 
205                     }   
206                 }
207             }
208                         
209                         groups = validParameter.validFile(parameters, "groups", false);         
210                         if (groups == "not found") { groups = ""; }
211                         else { m->splitAtDash(groups, Groups);  }
212                                                 
213                         //if the user changes the output directory command factory will send this info to us in the output parameter 
214                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
215                 if (groupfile != "") { outputDir = m->hasPath(groupfile); }
216                 else { outputDir = m->hasPath(countfile);  }
217             }
218                         
219             if (countfile == "") {
220                 if (namefile == "") {
221                     vector<string> files; files.push_back(fastafile);
222                     parser.getNameFile(files);
223                 }
224             }
225                 }
226
227         }
228         catch(exception& e) {
229                 m->errorOut(e, "SplitGroupCommand", "SplitAbundCommand");
230                 exit(1);
231         }
232 }
233 //**********************************************************************************************************************
234 int SplitGroupCommand::execute(){
235         try {
236         
237                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
238                 
239         if (countfile == "" ) {  runNameGroup();  }
240         else { runCount();  }
241                                 
242                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
243                 
244                 string current = "";
245                 itTypes = outputTypes.find("fasta");
246                 if (itTypes != outputTypes.end()) {
247                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
248                 }
249                 
250                 itTypes = outputTypes.find("name");
251                 if (itTypes != outputTypes.end()) {
252                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
253                 }
254         
255         itTypes = outputTypes.find("count");
256                 if (itTypes != outputTypes.end()) {
257                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
258                 }
259                 
260                 m->mothurOutEndLine();
261                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
262                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
263                 m->mothurOutEndLine();
264                 
265                 return 0;
266         }
267         catch(exception& e) {
268                 m->errorOut(e, "SplitGroupCommand", "execute");
269                 exit(1);
270         }
271 }
272 //**********************************************************************************************************************
273 int SplitGroupCommand::runNameGroup(){
274         try {
275         SequenceParser* parser;
276                 if (namefile == "") {   parser = new SequenceParser(groupfile, fastafile);                              }
277                 else                            {       parser = new SequenceParser(groupfile, fastafile, namefile);    }
278                 
279                 if (m->control_pressed) { delete parser; return 0; }
280         
281                 vector<string> namesGroups = parser->getNamesOfGroups();
282                 SharedUtil util;  util.setGroups(Groups, namesGroups);  
283                 
284                 string fastafileRoot = outputDir + m->getRootName(m->getSimpleName(fastafile));
285                 string namefileRoot = outputDir + m->getRootName(m->getSimpleName(namefile));
286                 
287                 m->mothurOutEndLine();
288                 for (int i = 0; i < Groups.size(); i++) {
289                         
290                         m->mothurOut("Processing group: " + Groups[i]); m->mothurOutEndLine();
291                         
292                         string newFasta = fastafileRoot + Groups[i] + "." + getOutputFileNameTag("fasta");
293                         string newName = namefileRoot + Groups[i] + "." + getOutputFileNameTag("name");
294                         
295                         parser->getSeqs(Groups[i], newFasta, false);
296                         outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
297                         if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);        } return 0; }
298             
299                         if (namefile != "") { 
300                                 parser->getNameMap(Groups[i], newName); 
301                                 outputNames.push_back(newName); outputTypes["name"].push_back(newName);
302                         }
303                         
304                         if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);        } return 0; }
305                 }
306                 
307                 delete parser;
308         
309         return 0;
310
311     }
312         catch(exception& e) {
313                 m->errorOut(e, "SplitGroupCommand", "runNameGroup");
314                 exit(1);
315         }
316 }
317 //**********************************************************************************************************************
318 int SplitGroupCommand::runCount(){
319         try {
320         
321         CountTable ct;
322         ct.readTable(countfile);
323         if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; }
324         
325         if (m->control_pressed) { return 0; }
326         
327         vector<string> namesGroups = ct.getNamesOfGroups();
328         SharedUtil util;  util.setGroups(Groups, namesGroups); 
329         
330         //fill filehandles with neccessary ofstreams
331         map<string, ofstream*> ffiles;
332         map<string, ofstream*> cfiles;
333         ofstream* temp;
334         for (int i=0; i<Groups.size(); i++) {
335             temp = new ofstream;
336             ffiles[Groups[i]] = temp;
337             string newFasta = outputDir + m->getRootName(m->getSimpleName(fastafile)) + Groups[i] + "." + getOutputFileNameTag("fasta");
338             outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
339             m->openOutputFile(newFasta, (*temp));
340             temp = new ofstream;
341             cfiles[Groups[i]] = temp;
342             string newCount = outputDir + m->getRootName(m->getSimpleName(countfile)) + Groups[i] + "." + getOutputFileNameTag("count");
343             m->openOutputFile(newCount, (*temp));
344             outputNames.push_back(newCount); outputTypes["count"].push_back(newCount);
345             (*temp) << "Representative_Sequence\ttotal\t" << Groups[i] << endl;
346         }
347         
348         ifstream in; 
349         m->openInputFile(fastafile, in);
350         
351         while (!in.eof()) {
352             Sequence seq(in); m->gobble(in);
353             
354             if (m->control_pressed) { break; }
355             if (seq.getName() != "") {
356                 vector<string> thisSeqsGroups = ct.getGroups(seq.getName());
357                 for (int i = 0; i < thisSeqsGroups.size(); i++) {
358                     if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print
359                         seq.printSequence(*(ffiles[thisSeqsGroups[i]]));
360                         int numSeqs = ct.getGroupCount(seq.getName(), Groups[i]);
361                         (*(cfiles[thisSeqsGroups[i]])) << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl;
362                     }
363                 }
364             }
365         }
366         in.close();
367         
368         //close and delete ofstreams
369         for (int i=0; i<Groups.size(); i++) {  
370             (*ffiles[Groups[i]]).close(); delete ffiles[Groups[i]];
371             (*cfiles[Groups[i]]).close(); delete cfiles[Groups[i]];
372         }
373         
374         return 0;
375
376     }
377         catch(exception& e) {
378                 m->errorOut(e, "SplitGroupCommand", "runCount");
379                 exit(1);
380         }
381 }
382 //**********************************************************************************************************************
383
384