]> git.donarmstrong.com Git - mothur.git/blob - parsefastaqcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / parsefastaqcommand.cpp
1 /*
2  *  parsefastaqcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "parsefastaqcommand.h"
11 #include "sequence.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> ParseFastaQCommand::setParameters(){     
15         try {
16                 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfastq);
17                 CommandParameter pfasta("fasta", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
18                 CommandParameter pqual("qfile", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pqual);
19                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
20                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
21                 
22                 vector<string> myArray;
23                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
24                 return myArray;
25         }
26         catch(exception& e) {
27                 m->errorOut(e, "ParseFastaQCommand", "setParameters");
28                 exit(1);
29         }
30 }
31 //**********************************************************************************************************************
32 string ParseFastaQCommand::getHelpString(){     
33         try {
34                 string helpString = "";
35                 helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
36                 helpString += "The fastq.info command parameters are fastq, fasta and qfile; fastq is required.\n";
37                 helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
38         helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
39         helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
40                 helpString += "Example fastq.info(fastaq=test.fastaq).\n";
41                 helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n";
42                 return helpString;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "ParseFastaQCommand", "getHelpString");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string ParseFastaQCommand::getOutputFileNameTag(string type, string inputName=""){      
51         try {
52         string outputFileName = "";
53                 map<string, vector<string> >::iterator it;
54         
55         //is this a type this command creates
56         it = outputTypes.find(type);
57         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
58         else {
59             if (type == "fasta") {  outputFileName =  "fasta"; }
60             else if (type == "qfile") {  outputFileName =  "qual"; }
61             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
62         }
63         return outputFileName;
64         }
65         catch(exception& e) {
66                 m->errorOut(e, "ParseFastaQCommand", "getOutputFileNameTag");
67                 exit(1);
68         }
69 }
70
71 //**********************************************************************************************************************
72 ParseFastaQCommand::ParseFastaQCommand(){       
73         try {
74                 abort = true; calledHelp = true; 
75                 setParameters();
76                 vector<string> tempOutNames;
77                 outputTypes["fasta"] = tempOutNames;
78                 outputTypes["qfile"] = tempOutNames;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86 ParseFastaQCommand::ParseFastaQCommand(string option){
87         try {
88                 abort = false; calledHelp = false;   
89                 
90                 if(option == "help") {  help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         map<string,string>::iterator it;
101
102                         //check to make sure all parameters are valid for command
103                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //initialize outputTypes
108                         vector<string> tempOutNames;
109                         outputTypes["fasta"] = tempOutNames;
110                         outputTypes["qfile"] = tempOutNames;
111                         
112                         //if the user changes the input directory command factory will send this info to us in the output parameter 
113                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
114                         if (inputDir == "not found"){   inputDir = "";          }
115                         else {
116                                 string path;
117                                 it = parameters.find("fastq");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
123                                 }
124                         }
125                         
126                         //check for required parameters
127                         fastaQFile = validParameter.validFile(parameters, "fastq", true);
128                         if (fastaQFile == "not found") {        m->mothurOut("fastq is a required parameter for the fastq.info command.");      m->mothurOutEndLine();  abort = true;   }
129                         else if (fastaQFile == "not open")      {       fastaQFile = ""; abort = true;  }       
130                         
131                         //if the user changes the output directory command factory will send this info to us in the output parameter 
132                         outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
133                         
134                         string temp;
135                         temp = validParameter.validFile(parameters, "fasta", false);    if(temp == "not found"){        temp = "T";     }
136                         fasta = m->isTrue(temp); 
137
138                         temp = validParameter.validFile(parameters, "qfile", false);    if(temp == "not found"){        temp = "T";     }
139                         qual = m->isTrue(temp); 
140                         
141                         if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
142
143                 }               
144         }
145         catch(exception& e) {
146                 m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
147                 exit(1);
148         }
149 }
150 //**********************************************************************************************************************
151
152 int ParseFastaQCommand::execute(){
153         try {
154                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
155                 
156                 //open Output Files
157                 string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + getOutputFileNameTag("fasta");
158                 string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + getOutputFileNameTag("qfile");
159                 ofstream outFasta, outQual;
160                 
161                 if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
162                 if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
163                 
164                 ifstream in;
165                 m->openInputFile(fastaQFile, in);
166                 
167                 while (!in.eof()) {
168                         
169                         if (m->control_pressed) { break; }
170                 
171                         //read sequence name
172                         string name = m->getline(in); m->gobble(in);
173                         if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
174                         else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
175                         else { name = name.substr(1); }
176                         
177                         //read sequence
178                         string sequence = m->getline(in); m->gobble(in);
179                         if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; break; }
180                         
181                         //read sequence name
182                         string name2 = m->getline(in); m->gobble(in);
183                         if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
184                         else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
185                         else { name2 = name2.substr(1);  }
186                         
187                         //read quality scores
188                         string quality = m->getline(in); m->gobble(in);
189                         if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
190                         
191                         //sanity check sequence length and number of quality scores match
192                         if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
193                         if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
194                         
195                         //print sequence info to files
196                         if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
197                         
198                         if (qual) { 
199                                 vector<int> qualScores = convertQual(quality);
200                                 outQual << ">" << name << endl;
201                                 for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
202                                 outQual << endl;
203                         }
204                 }
205                 
206                 in.close();
207                 if (fasta)      { outFasta.close();     }
208                 if (qual)       { outQual.close();      }
209                 
210                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
211                 
212                 //set fasta file as new current fastafile
213                 string current = "";
214                 itTypes = outputTypes.find("fasta");
215                 if (itTypes != outputTypes.end()) {
216                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
217                 }
218                 
219                 itTypes = outputTypes.find("qfile");
220                 if (itTypes != outputTypes.end()) {
221                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
222                 }               
223                 
224                 m->mothurOutEndLine();
225                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
226                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
227                 m->mothurOutEndLine();
228
229                 return 0;
230         }
231         catch(exception& e) {
232                 m->errorOut(e, "ParseFastaQCommand", "execute");
233                 exit(1);
234         }
235 }
236 //**********************************************************************************************************************
237 vector<int> ParseFastaQCommand::convertQual(string qual) {
238         try {
239                 vector<int> qualScores;
240                 
241                 int controlChar = int('@');
242                 
243                 for (int i = 0; i < qual.length(); i++) { 
244                         int temp = int(qual[i]);
245                         temp -= controlChar;
246                         
247                         qualScores.push_back(temp);
248                 }
249                 
250                 return qualScores;
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "ParseFastaQCommand", "convertQual");
254                 exit(1);
255         }
256 }
257 //**********************************************************************************************************************
258
259
260