5 // Created by SarahsWork on 10/28/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "sracommand.h"
10 #include "sffinfocommand.h"
11 #include "parsefastaqcommand.h"
13 //**********************************************************************************************************************
14 vector<string> SRACommand::setParameters(){
16 CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
17 CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pgroup);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos);
19 CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
20 CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
21 CommandParameter pcontact("contact", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
22 //choose only one multiple options
23 CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
24 CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
25 CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
26 CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
27 CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
29 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
30 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
31 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
35 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
36 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
37 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
39 vector<string> myArray;
40 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
44 m->errorOut(e, "SRACommand", "setParameters");
48 //**********************************************************************************************************************
49 string SRACommand::getHelpString(){
51 string helpString = "";
52 helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
53 helpString += "The sra command parameters are: sff, fastq, file, oligos, contact, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, group, platform, libstrategy, libsource, libselection and instrument.\n";
54 helpString += "The sff parameter is used to provide the original sff file.\n";
55 helpString += "The fastq parameter is used to provide the original fastq file.\n";
56 helpString += "The contact parameter is used to provide your contact file.\n";
57 helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by.\n";
58 helpString += "The group parameter is used to provide the group file to parse your sff or fastq file by.\n";
59 helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
60 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
61 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
62 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
63 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
64 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
65 helpString += "The platform parameter is used to specify platfrom you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
66 helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
67 helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
68 helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
69 helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
71 helpString += "The sra should be in the following format: \n";
72 helpString += "sra(...)\n";
76 m->errorOut(e, "SRACommand", "getHelpString");
80 //**********************************************************************************************************************
81 string SRACommand::getOutputPattern(string type) {
85 if (type == "xml") { pattern = "[filename],xml"; }
86 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
91 m->errorOut(e, "SRACommand", "getOutputPattern");
95 //**********************************************************************************************************************
96 SRACommand::SRACommand(){
98 abort = true; calledHelp = true;
100 vector<string> tempOutNames;
101 outputTypes["xml"] = tempOutNames;
103 catch(exception& e) {
104 m->errorOut(e, "SRACommand", "SRACommand");
108 //**********************************************************************************************************************
109 SRACommand::SRACommand(string option) {
111 abort = false; calledHelp = false;
112 libLayout = "single"; //controlled vocab
114 //allow user to run help
115 if(option == "help") { help(); abort = true; calledHelp = true; }
116 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
119 //valid paramters for this command
120 vector<string> myArray = setParameters();
122 OptionParser parser(option);
123 map<string,string> parameters = parser.getParameters();
125 ValidParameters validParameter;
126 map<string,string>::iterator it;
127 //check to make sure all parameters are valid for command
128 for (it = parameters.begin(); it != parameters.end(); it++) {
129 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
132 vector<string> tempOutNames;
133 outputTypes["xml"] = tempOutNames;
135 //if the user changes the input directory command factory will send this info to us in the output parameter
136 string inputDir = validParameter.validFile(parameters, "inputdir", false);
137 if (inputDir == "not found"){ inputDir = ""; }
141 it = parameters.find("sff");
142 //user has given a template file
143 if(it != parameters.end()){
144 path = m->hasPath(it->second);
145 //if the user has not given a path then, add inputdir. else leave path alone.
146 if (path == "") { parameters["sff"] = inputDir + it->second; }
149 it = parameters.find("fastq");
150 //user has given a template file
151 if(it != parameters.end()){
152 path = m->hasPath(it->second);
153 //if the user has not given a path then, add inputdir. else leave path alone.
154 if (path == "") { parameters["fastq"] = inputDir + it->second; }
157 it = parameters.find("file");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["file"] = inputDir + it->second; }
165 it = parameters.find("group");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["group"] = inputDir + it->second; }
173 it = parameters.find("oligos");
174 //user has given a template file
175 if(it != parameters.end()){
176 path = m->hasPath(it->second);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { parameters["oligos"] = inputDir + it->second; }
181 it = parameters.find("contact");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["contact"] = inputDir + it->second; }
190 //check for parameters
191 fastqfile = validParameter.validFile(parameters, "fastq", true);
192 if (fastqfile == "not open") { fastqfile = ""; abort = true; }
193 else if (fastqfile == "not found") { fastqfile = ""; }
195 sfffile = validParameter.validFile(parameters, "sff", true);
196 if (sfffile == "not open") { sfffile = ""; abort = true; }
197 else if (sfffile == "not found") { sfffile = ""; }
199 file = validParameter.validFile(parameters, "file", true);
200 if (file == "not open") { file = ""; abort = true; }
201 else if (file == "not found") { file = ""; }
203 groupfile = validParameter.validFile(parameters, "group", true);
204 if (groupfile == "not open") { groupfile = ""; abort = true; }
205 else if (groupfile == "not found") { groupfile = ""; }
206 else { m->setGroupFile(groupfile); }
208 oligosfile = validParameter.validFile(parameters, "oligos", true);
209 if (oligosfile == "not found") { oligosfile = ""; }
210 else if(oligosfile == "not open") { abort = true; }
211 else { m->setOligosFile(oligosfile); }
213 contactfile = validParameter.validFile(parameters, "contact", true);
214 if (contactfile == "not found") { contactfile = ""; m->mothurOut("[ERROR]: You must provide a contact file before you can use the sra command."); m->mothurOutEndLine(); abort = true; }
215 else if(contactfile == "not open") { abort = true; }
217 file = validParameter.validFile(parameters, "file", true);
218 if (file == "not open") { file = ""; abort = true; }
219 else if (file == "not found") { file = ""; }
221 if ((fastqfile == "") && (sfffile == "") && (sfffile == "")) {
222 m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
225 if ((groupfile != "") && (oligosfile != "")) {
226 m->mothurOut("[ERROR]: You may not use a group file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
229 if ((fastqfile != "") || (sfffile != "")) {
230 if ((groupfile == "") && (oligosfile == "")) {
231 oligosfile = m->getOligosFile();
232 if (oligosfile != "") { m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
234 groupfile = m->getGroupFile();
235 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
237 m->mothurOut("[ERROR]: You must provide groupfile or oligos file if splitting a fastq or sff file."); m->mothurOutEndLine(); abort = true;
243 //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
244 platform = validParameter.validFile(parameters, "platform", false); if (platform == "not found") { platform = "_LS454"; }
245 if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
247 if (!abort) { //don't check instrument model is platform is bad
248 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
249 instrumentModel = validParameter.validFile(parameters, "instrument", false); if (instrumentModel == "not found") { instrumentModel = "454_GS"; }
250 if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function
252 //turn _ to spaces mothur's work around
253 for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } }
255 libStrategy = validParameter.validFile(parameters, "libstrategy", false); if (libStrategy == "not found") { libStrategy = "AMPLICON"; }
256 if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function
258 //turn _ to spaces mothur's work around
259 for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; } }
261 libSource = validParameter.validFile(parameters, "libsource", false); if (libSource == "not found") { libSource = "METAGENOMIC"; }
262 if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function
264 //turn _ to spaces mothur's work around
265 for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; } }
267 libSelection = validParameter.validFile(parameters, "libselection", false); if (libSelection == "not found") { libSelection = "PCR"; }
268 if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function
270 //turn _ to spaces mothur's work around
271 for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; } }
274 string temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
275 m->mothurConvert(temp, bdiffs);
277 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; }
278 m->mothurConvert(temp, pdiffs);
280 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
281 m->mothurConvert(temp, ldiffs);
283 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
284 m->mothurConvert(temp, sdiffs);
286 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
287 m->mothurConvert(temp, tdiffs);
289 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
294 catch(exception& e) {
295 m->errorOut(e, "SRACommand", "SRACommand");
299 //**********************************************************************************************************************
300 int SRACommand::execute(){
303 if (abort == true) { if (calledHelp) { return 0; } return 2; }
306 if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
307 if (groupfile != "") { GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); Groups.push_back("scrap"); }
309 if (m->control_pressed) { return 0; }
312 map<string, vector<string> > filesBySample;
315 if (file != "") { readFile(filesBySample); }
316 else if (sfffile != "") { parseSffFile(filesBySample); }
317 else if (fastqfile != "") { parseFastqFile(filesBySample); }
319 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
320 checkGroups(filesBySample);
323 string thisOutputDir = outputDir;
324 if (outputDir == "") { thisOutputDir += m->hasPath(inputfile); }
325 map<string, string> variables;
326 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile));
327 string outputFileName = getOutputFileName("xml", variables);
328 outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName);
330 m->openOutputFile(outputFileName, out);
333 ////////////////////////////////////////////////////////
334 out << "<Submission>\n";
335 out << "\t<Description>\n";
336 out << "\t\t<Comment> New Submission. Generated by mothur version " + m->getVersion() + " </Comment> \n";
337 out << "\t\t<Submitter user_name=\"" + submissionName + "\"/>\n";
338 out << "\t\t<Organization type=\"" + centerType + "\">\n";
339 out << "\t\t<Name>" + centerName + "</Name>\n";
340 out << "\t\t<Contact> email=\"" + email + "\">\n";
341 out << "\t\t\t<Name>\n";
342 out << "\t\t\t\t<First>" + firstName + "</First>\n";
343 out << "\t\t\t\t<Last>" + firstName + "</Last>\n";
344 out << "\t\t\t</Name>\n";
345 out << "\t\t</Contact>\n";
346 out << "\t\t</Organization>\n";
347 out << "\t</Description>\n";
348 ////////////////////////////////////////////////////////
351 ////////////////////////////////////////////////////////
352 out << "\t<Action>\n";
353 out << "\t\t<AddData target_db=\"BioProject\">\n";
354 out << "\t\t\t<Data content_type=\"XML\">\n";
355 out << "\t\t\t\t<XmlContent>\n";
356 out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
357 out << "\t\t\t\t\t\t<ProjectID>\n";
358 ///////////////////////out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + ProjectID + " </SPUID> \n";
359 out << "\t\t\t\t\t\t</ProjectID>\n";
360 out << "\t\t\t\t\t\t<Descriptor>\n";
361 ////////////////////out << "\t\t\t\t\t\t\t<Title>" + title + " </Title> \n";
362 out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
363 out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
364 /////////////////////////out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
365 out << "\t\t\t\t\t\t\t</ExternalLink>\n";
366 out << "\t\t\t\t\t\t\t<Relevance>\n";
367 //////////////////////out << "\t\t\t\t\t\t\t\t<Medical>" + medicalRelevance + "</Medical>\n";
368 out << "\t\t\t\t\t\t\t</Relevance>\n";
369 out << "\t\t\t\t\t\t</Descriptor>\n";
370 out << "\t\t\t\t\t\t<ProjectType>\n";
371 /////////////////////////out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eMultiisolate\">\n"; //<!-- controlled vocabulary? -->
372 out << "\t\t\t\t\t\t\t\t<Organism>\n";
373 ////////////////////out << "\t\t\t\t\t\t\t\t\t<OrganismName>" + scientificName + " </OrganismName> \n";
374 out << "\t\t\t\t\t\t\t\t</Organism>\n";
375 out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
376 ////////////////////out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n"; <!-- controlled vocabulary? -->
377 out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
378 out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
379 out << "\t\t\t\t\t\t</ProjectType>\n";
380 out << "\t\t\t\t\t</Project>\n";
381 out << "\t\t\t\t</XmlContent>\n";
382 out << "\t\t\t</Data>\n";
383 out << "\t\t\t<Identifier>\n";
384 ////////////////////////////out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + ProjectID + " </SPUID>\n";
385 out << "\t\t\t</Identifier>\n";
386 out << "\t\t</AddData>\n";
387 out << "\t</Action>\n";
388 ////////////////////////////////////////////////////////
391 ////////////////////////////////////////////////////////
392 for (int i = 0; i < Groups.size(); i++) {
394 vector<string> thisGroupsFiles = filesBySample[Groups[i]];
395 string barcodeForThisSample = Group2Barcode[Groups[i]];
397 for (int j = 0; j < thisGroupsFiles.size(); j++) {
398 if (m->control_pressed) { break; }
399 out << "\t<Action>\n";
400 out << "\t\t<AddData target_db=\"BioSample\">\n";
401 out << "\t\t\t<Data content_type=\"XML\">\n";
402 out << "\t\t\t\t<XmlContent>\n";
403 out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
404 out << "\t\t\t\t\t\t<SampleId>\n";
405 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + Groups[i] + " </SPUID> \n";
406 out << "\t\t\t\t\t\t</SampleId>\n";
407 out << "\t\t\t\t\t\t<Descriptor>\n";
408 ////////////////////out << "\t\t\t\t\t\t\t<Title>" + title + " </Title> \n";
409 out << "\t\t\t\t\t\t</Descriptor>\n";
410 out << "\t\t\t\t\t\t<Organism>\n";
411 ////////////////////out << "\t\t\t\t\t\t\t<OrganismName>" + scientificName + " </OrganismName> \n";
412 out << "\t\t\t\t\t\t</Organism>\n";
413 out << "\t\t\t\t\t\t<BioProject>\n";
414 ///////////////////////out << "\t\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + BioProject + " </SPUID> \n";
415 out << "\t\t\t\t\t\t</BioProject>\n";
416 out << "\t\t\t\t\t\t<Package>MIMARKS.specimen</Package>n";
417 out << "\t\t\t\t\t\t<Attributes>n";
418 //add biosample required attributes
419 ///////////////////////////////////////////////////////////////////////
421 out << "\t\t\t\t\t\t</Attributes>n";
422 out << "\t\t\t\t\t</BioSample>\n";
423 out << "\t\t\t\t</XmlContent>\n";
424 out << "\t\t\t</Data>\n";
427 out << "\t\t\t<Identifier>\n";
428 string libId = thisGroupsFiles[j] + barcodeForThisSample;
429 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
430 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
431 libId = pieces[0] + barcodeForThisSample;
433 out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + libId + " </SPUID>\n";
434 out << "\t\t\t</Identifier>\n";
436 out << "\t\t</AddData>\n";
437 out << "\t</Action>\n";
441 for (int i = 0; i < Groups.size(); i++) {
443 vector<string> thisGroupsFiles = filesBySample[Groups[i]];
444 string barcodeForThisSample = Group2Barcode[Groups[i]];
446 for (int j = 0; j < thisGroupsFiles.size(); j++) {
447 if (m->control_pressed) { break; }
448 out << "\t<Action>\n";
449 out << "\t\t<AddFiles target_db=\"SRA\">\n";
450 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
451 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
452 out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
453 ////////////////////out << "\t\t\t\t<DataType>fastq</DataType> \n"; //since its paired we know its fastq, is the dataType the fileType???
454 out << "\t\t\t</File>\n";
455 out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
456 ////////////////////out << "\t\t\t\t<DataType>fastq</DataType> \n"; //since its paired we know its fastq, is the dataType the fileType???
457 out << "\t\t\t</File>\n";
459 out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
460 string dataType = "fastq";
461 if (isSFF) { dataType = "sff"; }
462 ////////////////////out << "\t\t\t\t<DataType>" + dataType + " </DataType> \n"; //is the dataType the fileType???
463 out << "\t\t\t</File>\n";
466 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
467 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
468 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
469 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
470 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
472 //////////////////bioSample info
473 ///////////////////bioProject info
476 out << "\t\t\t<Identifier>\n";
477 string libId = thisGroupsFiles[j] + barcodeForThisSample;
478 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
479 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
480 libId = pieces[0] + barcodeForThisSample;
482 out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + libId + " </SPUID>\n";
483 out << "\t\t\t</Identifier>\n";
484 out << "\t\t</AddFiles>\n";
485 out << "\t</Action>\n";
489 ////////////////////////////////////////////////////////
490 out << "</Submission>\n";
493 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
495 //output files created by command
496 m->mothurOutEndLine();
497 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
498 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
499 m->mothurOutEndLine();
503 catch(exception& e) {
504 m->errorOut(e, "SRACommand", "SRACommand");
508 //**********************************************************************************************************************
509 int SRACommand::readContactFile(){
511 lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = "";
514 m->openInputFile(contactfile, in);
518 if (m->control_pressed) { break; }
521 in >> key; m->gobble(in);
522 value = m->getline(in); m->gobble(in);
524 for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
526 if (key == "USERNAME") { submissionName = value; }
527 else if (key == "LAST") { lastName = value; }
528 else if (key == "FIRST") { firstName = value; }
529 else if (key == "EMAIL") { email = value; }
530 else if (key == "CENTER") { centerName = value; }
531 else if (key == "TYPE") {
533 for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
534 if ((centerType == "consortium") || (centerType == "center") || (centerType == "institute") || (centerType == "lab")) {}
535 else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option. Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
536 }else if (key == "DESCRIPTION") { description = value; }
540 if (lastName == "") { m->mothurOut("[ERROR]: missing last name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
541 if (firstName == "") { m->mothurOut("[ERROR]: missing first name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
542 if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
543 if (email == "") { m->mothurOut("[ERROR]: missing email from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
544 if (centerName == "") { m->mothurOut("[ERROR]: missing center name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
545 if (centerType == "") { m->mothurOut("[ERROR]: missing center type from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
546 if (description == "") { m->mothurOut("[ERROR]: missing description from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
550 catch(exception& e) {
551 m->errorOut(e, "SRACommand", "readContactFile");
556 //**********************************************************************************************************************
557 // going to have to rework this to allow for other options --
567 fastqfile1 oligosfile1
568 fastqfile2 oligosfile2
573 fastqfile fastqfile group
574 fastqfile fastqfile group
575 fastqfile fastqfile group
580 int SRACommand::readFile(map<string, vector<string> >& files){
582 vector<string> theseFiles;
587 m->openInputFile(file, in);
591 if (m->control_pressed) { return 0; }
593 string line = m->getline(in); m->gobble(in);
594 vector<string> pieces = m->splitWhiteSpace(line);
597 string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
598 if (pieces.size() == 2) {
599 thisFileName1 = pieces[0];
600 thisFileName2 = pieces[1];
601 }else if (pieces.size() == 3) {
602 thisFileName1 = pieces[1];
603 thisFileName2 = pieces[2];
604 string group = pieces[0];
605 libLayout = "paired";
607 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
610 if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
612 //check to make sure both are able to be opened
614 int openForward = m->openInputFile(thisFileName1, in2, "noerror");
616 //if you can't open it, try default location
617 if (openForward == 1) {
618 if (m->getDefaultPath() != "") { //default path is set
619 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
620 m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
622 openForward = m->openInputFile(tryPath, in3, "noerror");
624 thisFileName1 = tryPath;
628 //if you can't open it, try output location
629 if (openForward == 1) {
630 if (m->getOutputDir() != "") { //default path is set
631 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
632 m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
634 openForward = m->openInputFile(tryPath, in4, "noerror");
635 thisFileName1 = tryPath;
640 if (openForward == 1) { //can't find it
641 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
642 }else{ in2.close(); }
645 int openReverse = m->openInputFile(thisFileName2, in3, "noerror");
647 //if you can't open it, try default location
648 if (openReverse == 1) {
649 if (m->getDefaultPath() != "") { //default path is set
650 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
651 m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
653 openReverse = m->openInputFile(tryPath, in3, "noerror");
655 thisFileName2 = tryPath;
659 //if you can't open it, try output location
660 if (openReverse == 1) {
661 if (m->getOutputDir() != "") { //default path is set
662 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
663 m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
665 openReverse = m->openInputFile(tryPath, in4, "noerror");
666 thisFileName2 = tryPath;
671 if (openReverse == 1) { //can't find it
672 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
673 }else{ in3.close(); }
677 if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
679 int pos = theseFiles[0].find(".sff");
680 if (pos != string::npos) {//these files are sff files
682 sfffile = thisFileName1; oligosfile = thisFileName2;
687 fastqfile = thisFileName1; oligosfile = thisFileName2;
689 parseFastqFile(files);
692 }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
693 map<string, vector<string> >::iterator it = files.find(group);
694 if (it == files.end()) {
695 vector<string> temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp;
697 files[group].push_back(thisFileName1 + " " + thisFileName2);
707 catch(exception& e) {
708 m->errorOut(e, "SRACommand", "readFile");
712 //**********************************************************************************************************************
713 int SRACommand::parseSffFile(map<string, vector<string> >& files){
715 vector<string> theseFiles;
717 libLayout = "single"; //controlled vocab
720 //run sffinfo to parse sff file into individual sampled sff files
721 string commandString = "sff=" + sfffile;
722 if (groupfile != "") { commandString += ", group=" + groupfile; }
723 else if (oligosfile != "") {
724 commandString += ", oligos=" + oligosfile;
725 //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
726 if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
727 if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
728 if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
729 if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
730 if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
732 m->mothurOutEndLine();
733 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
734 m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
735 m->mothurCalling = true;
737 Command* sffinfoCommand = new SffInfoCommand(commandString);
738 sffinfoCommand->execute();
740 map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
741 map<string, vector<string> >::iterator it = filenames.find("sff");
742 if (it != filenames.end()) { theseFiles = it->second; }
743 else { m->control_pressed = true; } // error in sffinfo
745 delete sffinfoCommand;
746 m->mothurCalling = false;
747 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
749 mapGroupToFile(files, theseFiles);
753 catch(exception& e) {
754 m->errorOut(e, "SRACommand", "readFile");
759 //**********************************************************************************************************************
760 int SRACommand::parseFastqFile(map<string, vector<string> >& files){
762 vector<string> theseFiles;
763 inputfile = fastqfile;
764 libLayout = "single"; //controlled vocab
766 //run sffinfo to parse sff file into individual sampled sff files
767 string commandString = "fastq=" + fastqfile;
768 if (groupfile != "") { commandString += ", group=" + groupfile; }
769 else if (oligosfile != "") {
770 commandString += ", oligos=" + oligosfile;
771 //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
772 if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
773 if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
774 if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
775 if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
776 if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
778 m->mothurOutEndLine();
779 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
780 m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
781 m->mothurCalling = true;
783 Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
784 fastqinfoCommand->execute();
786 map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
787 map<string, vector<string> >::iterator it = filenames.find("fastq");
788 if (it != filenames.end()) { theseFiles = it->second; }
789 else { m->control_pressed = true; } // error in sffinfo
791 delete fastqinfoCommand;
792 m->mothurCalling = false;
793 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
795 mapGroupToFile(files, theseFiles);
799 catch(exception& e) {
800 m->errorOut(e, "SRACommand", "readFile");
804 //***************************************************************************************************************
806 int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
809 for (int i = 0; i < Groups.size(); i++) {
812 for (int j = 0; j < theseFiles.size(); j++) {
813 int pos = theseFiles[j].find(Groups[i]);
814 if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
815 if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
821 if(matches.size() == 1) {
822 map<string, vector<string> >::iterator it = files.find(Groups[i]);
823 if (it == files.end()) {
824 vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
826 files[Groups[i]].push_back(theseFiles[*matches.begin()]);
832 catch(exception& e) {
833 m->errorOut(e, "SRACommand", "checkGroups");
838 //***************************************************************************************************************
839 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
840 int SRACommand::checkGroups(map<string, vector<string> >& files){
842 vector<string> newGroups;
843 for (int i = 0; i < Groups.size(); i++) {
845 map<string, vector<string> >::iterator it = files.find(Groups[i]);
846 //no files for this group, remove it
847 if (it == files.end()) { }
848 else { newGroups.push_back(Groups[i]); }
855 catch(exception& e) {
856 m->errorOut(e, "SRACommand", "checkGroups");
860 //***************************************************************************************************************
861 int SRACommand::readOligos(){
864 m->openInputFile(oligosfile, inOligos);
866 string type, oligo, roligo, group;
867 bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
870 int indexBarcode = 0;
871 int indexPairedPrimer = 0;
872 int indexPairedBarcode = 0;
873 set<string> uniquePrimers;
874 set<string> uniqueBarcodes;
876 while(!inOligos.eof()){
880 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
883 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
888 //make type case insensitive
889 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
893 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
895 for(int i=0;i<oligo.length();i++){
896 oligo[i] = toupper(oligo[i]);
897 if(oligo[i] == 'U') { oligo[i] = 'T'; }
900 if(type == "FORWARD"){
903 // get rest of line in case there is a primer name
904 while (!inOligos.eof()) {
905 char c = inOligos.get();
906 if (c == 10 || c == 13 || c == -1){ break; }
907 else if (c == 32 || c == 9){;} //space or tab
911 //check for repeat barcodes
912 map<string, int>::iterator itPrime = primers.find(oligo);
913 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
915 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
917 primers[oligo] = indexPrimer; indexPrimer++;
918 primerNameVector.push_back(group);
920 else if (type == "PRIMER"){
925 for(int i=0;i<roligo.length();i++){
926 roligo[i] = toupper(roligo[i]);
927 if(roligo[i] == 'U') { roligo[i] = 'T'; }
929 roligo = reverseOligo(roligo);
933 // get rest of line in case there is a primer name
934 while (!inOligos.eof()) {
935 char c = inOligos.get();
936 if (c == 10 || c == 13 || c == -1){ break; }
937 else if (c == 32 || c == 9){;} //space or tab
941 oligosPair newPrimer(oligo, roligo);
943 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
945 //check for repeat barcodes
946 string tempPair = oligo+roligo;
947 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
948 else { uniquePrimers.insert(tempPair); }
950 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
952 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
953 primerNameVector.push_back(group);
956 else if(type == "REVERSE"){
957 //Sequence oligoRC("reverse", oligo);
958 //oligoRC.reverseComplement();
959 string oligoRC = reverseOligo(oligo);
960 revPrimer.push_back(oligoRC);
962 else if(type == "BARCODE"){
965 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
966 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
969 while (!inOligos.eof()) {
970 char c = inOligos.get();
971 if (c == 10 || c == 13 || c == -1){ break; }
972 else if (c == 32 || c == 9){;} //space or tab
976 //then this is illumina data with 4 columns
978 hasPairedBarcodes = true;
979 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
982 for(int i=0;i<reverseBarcode.length();i++){
983 reverseBarcode[i] = toupper(reverseBarcode[i]);
984 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
987 reverseBarcode = reverseOligo(reverseBarcode);
988 oligosPair newPair(oligo, reverseBarcode);
990 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
991 //check for repeat barcodes
992 string tempPair = oligo+reverseBarcode;
993 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
994 else { uniqueBarcodes.insert(tempPair); }
996 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
997 barcodeNameVector.push_back(group);
999 //check for repeat barcodes
1000 map<string, int>::iterator itBar = barcodes.find(oligo);
1001 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1003 barcodes[oligo]=indexBarcode; indexBarcode++;
1004 barcodeNameVector.push_back(group);
1006 }else if(type == "LINKER"){
1007 linker.push_back(oligo);
1008 }else if(type == "SPACER"){
1009 spacer.push_back(oligo);
1011 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1013 m->gobble(inOligos);
1017 if (hasPairedBarcodes || hasPrimer) {
1018 pairedOligos = true;
1019 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1023 //add in potential combos
1024 if(barcodeNameVector.size() == 0){
1025 barcodeNameVector.push_back("");
1028 if(primerNameVector.size() == 0){
1029 primerNameVector.push_back("");
1032 set<string> uniqueNames; //used to cleanup outputFileNames
1034 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1035 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1037 string primerName = primerNameVector[itPrimer->first];
1038 string barcodeName = barcodeNameVector[itBar->first];
1040 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1042 string comboGroupName = "";
1043 string fastqFileName = "";
1045 if(primerName == ""){
1046 comboGroupName = barcodeNameVector[itBar->first];
1049 if(barcodeName == ""){
1050 comboGroupName = primerNameVector[itPrimer->first];
1053 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1056 uniqueNames.insert(comboGroupName);
1057 Group2Barcode[comboGroupName] = (itBar->second).forward+"."+(itBar->second).reverse;
1062 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1063 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1065 string primerName = primerNameVector[itPrimer->second];
1066 string barcodeName = barcodeNameVector[itBar->second];
1068 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1070 string comboGroupName = "";
1071 string fastqFileName = "";
1073 if(primerName == ""){
1074 comboGroupName = barcodeNameVector[itBar->second];
1077 if(barcodeName == ""){
1078 comboGroupName = primerNameVector[itPrimer->second];
1081 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1084 uniqueNames.insert(comboGroupName);
1085 Group2Barcode[comboGroupName] = itBar->first;
1092 if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
1094 for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
1099 catch(exception& e) {
1100 m->errorOut(e, "SRACommand", "readOligos");
1104 //********************************************************************/
1105 string SRACommand::reverseOligo(string oligo){
1107 string reverse = "";
1109 for(int i=oligo.length()-1;i>=0;i--){
1111 if(oligo[i] == 'A') { reverse += 'T'; }
1112 else if(oligo[i] == 'T'){ reverse += 'A'; }
1113 else if(oligo[i] == 'U'){ reverse += 'A'; }
1115 else if(oligo[i] == 'G'){ reverse += 'C'; }
1116 else if(oligo[i] == 'C'){ reverse += 'G'; }
1118 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1119 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1121 else if(oligo[i] == 'M'){ reverse += 'K'; }
1122 else if(oligo[i] == 'K'){ reverse += 'M'; }
1124 else if(oligo[i] == 'W'){ reverse += 'W'; }
1125 else if(oligo[i] == 'S'){ reverse += 'S'; }
1127 else if(oligo[i] == 'B'){ reverse += 'V'; }
1128 else if(oligo[i] == 'V'){ reverse += 'B'; }
1130 else if(oligo[i] == 'D'){ reverse += 'H'; }
1131 else if(oligo[i] == 'H'){ reverse += 'D'; }
1133 else { reverse += 'N'; }
1139 catch(exception& e) {
1140 m->errorOut(e, "SRACommand", "reverseOligo");
1144 //********************************************************************/
1145 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1146 bool SRACommand::checkCasesPlatforms(string& platform){
1148 string original = platform;
1151 //remove users possible case errors
1152 for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
1154 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1156 if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
1157 else { isOkay = false; }
1160 if (platform == "454") { platform = "_LS454"; }
1162 m->mothurOut("[ERROR]: " + original + " is not a valid platform option. Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
1167 catch(exception& e) {
1168 m->errorOut(e, "SRACommand", "checkCasesPlatforms");
1172 //********************************************************************/
1173 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
1174 bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
1176 string original = instrumentModel;
1179 //remove users possible case errors
1180 for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
1182 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1183 if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
1184 if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
1185 else { isOkay = false; }
1187 if (instrumentModel == "454_GS_FLX_TITANIUM") { instrumentModel = "454_GS_FLX_Titanium"; }
1188 if (instrumentModel == "454_GS_JUNIOR") { instrumentModel = "454_GS_Junior"; }
1189 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1191 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
1194 }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
1195 if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
1196 else { isOkay = false; }
1199 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER") { instrumentModel = "Illumina_Genome_Analyzer"; }
1200 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") { instrumentModel = "Illumina_Genome_Analyzer_II"; }
1201 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") { instrumentModel = "Illumina_Genome_Analyzer_IIx"; }
1202 if (instrumentModel == "ILLUMINA_HISEQ_2000") { instrumentModel = "Illumina_HiSeq_2000"; }
1203 if (instrumentModel == "ILLUMINA_HISEQ_1000") { instrumentModel = "Illumina_HiSeq_1000"; }
1204 if (instrumentModel == "ILLUMINA_MISEQ") { instrumentModel = "Illumina_MiSeq"; }
1205 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1207 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
1210 }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
1211 if ((instrumentModel == "ION_TORRENT_PGM") || (instrumentModel == "UNSPECIFIED")) { }
1212 else { isOkay = false; }
1215 if (instrumentModel == "ION_TORRENT_PGM") { instrumentModel = "Ion_Torrent_PGM"; }
1216 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1218 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
1220 }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
1221 if ((instrumentModel == "PACBIO_RS") || (instrumentModel == "UNSPECIFIED")) { }
1222 else { isOkay = false; }
1225 if (instrumentModel == "PACBIO_RS") { instrumentModel = "PacBio_RS"; }
1226 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1228 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
1233 catch(exception& e) {
1234 m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
1238 //**********************************************************************************************************************
1239 //AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER
1240 bool SRACommand::checkCasesLibStrategy(string& libStrategy){
1242 string original = libStrategy;
1245 //remove users possible case errors
1246 for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); }
1248 if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { }
1249 else { isOkay = false; }
1252 if (libStrategy == "RNA-SEQ") { libStrategy = "RNA-Seq"; }
1253 if (libStrategy == "MIRNA-SEQ") { libStrategy = "miRNA-Seq"; }
1254 if (libStrategy == "CHIP-SEQ") { libStrategy = "ChIP-Seq"; }
1255 if (libStrategy == "MNASE-SEQ") { libStrategy = "MNase-Seq"; }
1256 if (libStrategy == "DNASE-HYPERSENSITIVITY") { libStrategy = "DNase-Hypersensitivity"; }
1257 if (libStrategy == "BISULFITE-SEQ") { libStrategy = "Bisulfite-Seq"; }
1258 if (libStrategy == "TN-SEQ") { libStrategy = "Tn-Seq"; }
1259 if (libStrategy == "FL-CDNA") { libStrategy = "FL-cDNA"; }
1260 if (libStrategy == "MRE-SEQ") { libStrategy = "MRE-Seq"; }
1261 if (libStrategy == "MEDIP-SEQ") { libStrategy = "MeDIP-Seq"; }
1263 m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option. Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true;
1268 catch(exception& e) {
1269 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1273 //**********************************************************************************************************************
1274 //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
1275 bool SRACommand::checkCasesLibSource(string& libSource){
1277 string original = libSource;
1280 //remove users possible case errors
1281 for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); }
1283 if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { }
1284 else { isOkay = false; }
1289 m->mothurOut("[ERROR]: " + original + " is not a valid libsource option. Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true;
1294 catch(exception& e) {
1295 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1300 //**********************************************************************************************************************
1301 //PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified
1302 bool SRACommand::checkCasesLibSelection(string& libSelection){
1304 string original = libSelection;
1307 //remove users possible case errors
1308 for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); }
1310 if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { }
1311 else { isOkay = false; }
1314 if (libSelection == "CDNA") { libSelection = "cDNA"; }
1315 if (libSelection == "CHIP") { libSelection = "ChIP"; }
1316 if (libSelection == "MNASE") { libSelection = "MNase"; }
1317 if (libSelection == "DNASE") { libSelection = "DNAse"; }
1318 if (libSelection == "HYBRID_SELECTION") { libSelection = "Hybrid_Selection"; }
1319 if (libSelection == "REDUCED_REPRESENTATION") { libSelection = "Reduced_Representation"; }
1320 if (libSelection == "RESTRICTION_DIGEST") { libSelection = "Restriction_Digest"; }
1321 if (libSelection == "5-METHYLCYTIDINE_ANTIBODY") { libSelection = "5-methylcytidine_antibody"; }
1322 if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") { libSelection = "MBD2_protein_methyl-CpG_binding_domain"; }
1323 if (libSelection == "SIZE_FRACTIONATION") { libSelection = "size_fractionation"; }
1324 if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") { libSelection = "Padlock_probes_capture_method"; }
1325 if (libSelection == "OTHER") { libSelection = "other"; }
1326 if (libSelection == "UNSPECIFIED") { libSelection = "unspecified"; }
1329 m->mothurOut("[ERROR]: " + original + " is not a valid libselection option. Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true;
1334 catch(exception& e) {
1335 m->errorOut(e, "SRACommand", "checkCasesLibSelection");
1340 //**********************************************************************************************************************