]> git.donarmstrong.com Git - mothur.git/blob - renameseqscommand.cpp
added rename.seqs command.
[mothur.git] / renameseqscommand.cpp
1 //
2 //  renameseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 5/28/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "renameseqscommand.h"
10 #include "sequence.hpp"
11 #include "groupmap.h"
12
13 //**********************************************************************************************************************
14 vector<string> RenameSeqsCommand::setParameters(){
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
17         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
19                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
20                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
21                 
22                 vector<string> myArray;
23                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
24                 return myArray;
25         }
26         catch(exception& e) {
27                 m->errorOut(e, "RenameSeqsCommand", "setParameters");
28                 exit(1);
29         }
30 }
31 //**********************************************************************************************************************
32 string RenameSeqsCommand::getHelpString(){
33         try {
34                 string helpString = "";
35                 helpString += "The rename.seqs command reads a fastafile and groupfile with an optional namefile, and creates files with the sequence names concatenated with the group. For example if a line in the group file is 'seq1   group1', the new sequence name will be seq1_group1.\n";
36                 helpString += "The rename.seqs command parameters are fasta, name and group. Fasta and group are required, unless a current file is available for both.\n";
37                 helpString += "The rename.seqs command should be in the following format: \n";
38                 helpString += "rename.seqs(fasta=yourFastaFile, group=yourGroupFile) \n";
39                 helpString += "Example rename.seqs(fasta=abrecovery.unique.fasta, group=abrecovery.group).\n";
40                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
41                 return helpString;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "RenameSeqsCommand", "getHelpString");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49 string RenameSeqsCommand::getOutputPattern(string type) {
50     try {
51         string pattern = "";
52         
53         if (type == "fasta")        {  pattern = "[filename],renamed,[extension]"; }
54         else if (type == "name")    {  pattern = "[filename],renamed,[extension]"; }
55         else if (type == "group")   {  pattern = "[filename],renamed,[extension]"; }
56         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
57         
58         return pattern;
59     }
60     catch(exception& e) {
61         m->errorOut(e, "RenameSeqsCommand", "getOutputPattern");
62         exit(1);
63     }
64 }
65 //**********************************************************************************************************************
66 RenameSeqsCommand::RenameSeqsCommand(){
67         try {
68                 abort = true; calledHelp = true;
69                 setParameters();
70                 vector<string> tempOutNames;
71                 outputTypes["fasta"] = tempOutNames;
72         outputTypes["name"] = tempOutNames;
73         outputTypes["group"] = tempOutNames;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "RenameSeqsCommand", "RenameSeqsCommand");
77                 exit(1);
78         }
79 }
80 /**************************************************************************************/
81 RenameSeqsCommand::RenameSeqsCommand(string option)  {
82         try {
83                 abort = false; calledHelp = false;
84                 
85                 //allow user to run help
86                 if(option == "help") { help(); abort = true; calledHelp = true; }
87                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
88                 
89                 else {
90                         vector<string> myArray = setParameters();
91                         
92                         OptionParser parser(option);
93                         map<string,string> parameters = parser.getParameters();
94                         
95                         ValidParameters validParameter;
96                         map<string, string>::iterator it;
97             
98                         //check to make sure all parameters are valid for command
99                         for (it = parameters.begin(); it != parameters.end(); it++) {
100                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
101                         }
102                         
103                         vector<string> tempOutNames;
104             outputTypes["fasta"] = tempOutNames;
105             outputTypes["name"] = tempOutNames;
106             outputTypes["group"] = tempOutNames;
107             
108                         //if the user changes the input directory command factory will send this info to us in the output parameter
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("fasta");
114                                 //user has given a template file
115                                 if(it != parameters.end()){
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
119                                 }
120                                 
121                                 it = parameters.find("name");
122                                 //user has given a template file
123                                 if(it != parameters.end()){
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
127                                 }
128                 
129                 it = parameters.find("group");
130                                 //user has given a template file
131                                 if(it != parameters.end()){
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
135                                 }
136
137                         }
138             
139                         
140                         //check for required parameters
141                         fastaFile = validParameter.validFile(parameters, "fasta", true);
142                         if (fastaFile == "not open") { abort = true; }
143                         else if (fastaFile == "not found") {
144                                 fastaFile = m->getFastaFile();
145                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
146                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
147                         }else { m->setFastaFile(fastaFile); }
148                         
149                         //if the user changes the output directory command factory will send this info to us in the output parameter
150                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
151                                 outputDir = "";
152                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
153                         }
154                         
155             groupfile = validParameter.validFile(parameters, "group", true);
156             if (groupfile == "not open") { abort = true; }
157                         else if (groupfile == "not found") {
158                                 groupfile = m->getGroupFile();
159                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
160                                 else {  m->mothurOut("You have no current groupfile and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
161                         }else { m->setGroupFile(groupfile); }
162                         
163                         //if the user changes the output directory command factory will send this info to us in the output parameter
164                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){ outputDir = ""; }
165
166                         nameFile = validParameter.validFile(parameters, "name", true);
167                         if (nameFile == "not open") { abort = true; }
168                         else if (nameFile == "not found"){ nameFile =""; }
169                         else { m->setNameFile(nameFile); }
170             
171             if (nameFile == "") {
172                 vector<string> files; files.push_back(fastaFile);
173                 parser.getNameFile(files);
174             }
175
176                 }
177         
178         }
179         catch(exception& e) {
180                 m->errorOut(e, "RenameSeqsCommand", "RenameSeqsCommand");
181                 exit(1);
182         }
183 }
184 /**************************************************************************************/
185 int RenameSeqsCommand::execute() {
186         try {
187                 
188                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
189         
190         GroupMap groupMap(groupfile);
191         groupMap.readMap();
192                 
193                 //prepare filenames and open files
194         string thisOutputDir = outputDir;
195                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastaFile);  }
196                 string outFastaFile = thisOutputDir + m->getRootName(m->getSimpleName(fastaFile));
197         map<string, string> variables;
198         variables["[filename]"] = outFastaFile;
199         variables["[extension]"] = m->getExtension(fastaFile);
200         outFastaFile = getOutputFileName("fasta", variables);
201         outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
202         
203         ofstream outFasta;
204                 m->openOutputFile(outFastaFile, outFasta);
205         
206         ifstream in;
207         m->openInputFile(fastaFile, in);
208         
209         while (!in.eof()) {
210             if (m->control_pressed) { break; }
211             
212             Sequence seq(in); m->gobble(in);
213             string group = groupMap.getGroup(seq.getName());
214             if (group == "not found") {  m->mothurOut("[ERROR]: " + seq.getName() + " is not in your group file, please correct.\n"); m->control_pressed = true; }
215             else {
216                 string newName = seq.getName() + "_" + group;
217                 seq.setName(newName);
218                 seq.printSequence(outFasta);
219             }
220             
221         }
222         in.close();
223         
224         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0; }
225         
226         bool notDone = true;
227         if (nameFile != "") {
228             thisOutputDir = outputDir;
229             if (outputDir == "") {  thisOutputDir += m->hasPath(nameFile);  }
230             string outNameFile = thisOutputDir + m->getRootName(m->getSimpleName(nameFile));
231             variables["[filename]"] = outNameFile;
232             variables["[extension]"] = m->getExtension(nameFile);
233             outNameFile = getOutputFileName("group", variables);
234             outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile);
235             
236             ofstream outName;
237             m->openOutputFile(outNameFile, outName);
238             
239             map<string, vector<string> > nameMap;
240             m->readNames(nameFile, nameMap);
241             
242             //process name file changing names
243             for (map<string, vector<string> >::iterator it = nameMap.begin(); it != nameMap.end(); it++) {
244                 for (int i = 0; i < (it->second).size()-1; i++) {
245                     if (m->control_pressed) { break; }
246                     string group = groupMap.getGroup((it->second)[i]);
247                     if (group == "not found") {  m->mothurOut("[ERROR]: " + (it->second)[i] + " is not in your group file, please correct.\n"); m->control_pressed = true;  }
248                     else {
249                         string newName = (it->second)[i] + "_" + group;
250                         groupMap.renameSeq((it->second)[i], newName); //change in group file
251                         (it->second)[i] = newName; //change in namefile
252                     }
253                     if (i == 0) {  outName << (it->second)[i] << '\t' << (it->second)[i] << ','; }
254                     else { outName << (it->second)[i] << ','; }
255                 }
256                 
257                 //print last one
258                 if ((it->second).size() == 1) {
259                     string group = groupMap.getGroup((it->second)[0]);
260                     if (group == "not found") {  m->mothurOut("[ERROR]: " + (it->second)[0] + " is not in your group file, please correct.\n"); m->control_pressed = true;  }
261                     else {
262                         string newName = (it->second)[0] + "_" + group;
263                         groupMap.renameSeq((it->second)[0], newName); //change in group file
264                         (it->second)[0] = newName; //change in namefile
265
266                         outName << (it->second)[0] << '\t' << (it->second)[0] << endl;
267                     }
268                 }
269                 else {
270                     string group = groupMap.getGroup((it->second)[(it->second).size()-1]);
271                     if (group == "not found") {  m->mothurOut("[ERROR]: " + (it->second)[(it->second).size()-1] + " is not in your group file, please correct.\n"); m->control_pressed = true;  }
272                     else {
273                         string newName = (it->second)[(it->second).size()-1] + "_" + group;
274                         groupMap.renameSeq((it->second)[(it->second).size()-1], newName); //change in group file
275                         (it->second)[(it->second).size()-1] = newName; //change in namefile
276
277                         outName << (it->second)[(it->second).size()-1] << endl;
278                     }
279                 }
280             }
281             notDone = false;
282             outName.close();
283         }
284         
285         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0; }
286         
287         if (notDone) {
288             vector<string> seqs = groupMap.getNamesSeqs();
289             for (int i = 0; i < seqs.size(); i++) {
290                 if (m->control_pressed) { break; }
291                 string group = groupMap.getGroup(seqs[i]);
292                 string newName = seqs[i] + "_" + group;
293                 groupMap.renameSeq(seqs[i], newName);
294             }
295         }
296         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0; }
297         
298         thisOutputDir = outputDir;
299         if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
300                 string outGroupFile = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
301         variables["[filename]"] = outGroupFile;
302         variables["[extension]"] = m->getExtension(groupfile);
303         outGroupFile = getOutputFileName("group", variables);
304         outputNames.push_back(outGroupFile); outputTypes["group"].push_back(outGroupFile);
305         
306         ofstream outGroup;
307                 m->openOutputFile(outGroupFile, outGroup);
308         groupMap.print(outGroup);
309         outGroup.close();
310         
311         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0; }
312
313         m->mothurOutEndLine();
314         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
315         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
316         m->mothurOutEndLine();
317         
318         //set fasta file as new current fastafile
319         string current = "";
320         itTypes = outputTypes.find("fasta");
321         if (itTypes != outputTypes.end()) {
322             if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
323         }
324         
325         itTypes = outputTypes.find("name");
326         if (itTypes != outputTypes.end()) {
327             if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
328         }
329         
330         itTypes = outputTypes.find("group");
331         if (itTypes != outputTypes.end()) {
332             if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
333         }
334                                 
335                 return 0;
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "RenameSeqsCommand", "execute");
339                 exit(1);
340         }
341 }
342 /**************************************************************************************/
343