5 // Created by SarahsWork on 10/28/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "sracommand.h"
10 #include "sffinfocommand.h"
11 #include "parsefastaqcommand.h"
13 //**********************************************************************************************************************
14 vector<string> SRACommand::setParameters(){
16 CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "oligos", "none", "none","",false,false,true); parameters.push_back(poligos);
18 CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
19 CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
20 CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
21 CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
22 CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
23 //choose only one multiple options
24 CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
25 CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
26 CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
27 CommandParameter pdatatype("datatype", "String", "METAGENOME", "", "", "", "","",false,false); parameters.push_back(pdatatype);
28 CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
29 CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
30 CommandParameter porientation("orientation", "Multiple", "forward-reverse", "forward", "", "", "","",false,false); parameters.push_back(porientation);
31 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
32 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
33 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
34 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
35 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
37 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
38 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
39 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
41 vector<string> myArray;
42 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
46 m->errorOut(e, "SRACommand", "setParameters");
50 //**********************************************************************************************************************
51 string SRACommand::getHelpString(){
53 string helpString = "";
54 helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
55 helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, checkorient, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
56 helpString += "The sff parameter is used to provide the original sff file.\n";
57 helpString += "The fastq parameter is used to provide the original fastq file.\n";
58 helpString += "The project parameter is used to provide your project file.\n";
59 helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by. It is required and must contain barcodes and primers, or you must provide a file option. \n";
60 helpString += "The mimark parameter is used to provide your mimarks file. You can create the template for this file using the get.mimarkspackage command.\n";
61 helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
62 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
63 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
64 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
65 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
66 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
67 helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";
68 helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
69 helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
70 helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
71 helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
72 helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
73 helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
74 helpString += "The datatype parameter is used to specify datatype. Default=METAGENOME. Choices are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
75 helpString += "sra(sff=sff=GHL4YHV01.sff, GHL4YHV01.oligos, project=test.project, mimark=MIMarksData.txt)\n";
79 m->errorOut(e, "SRACommand", "getHelpString");
83 //**********************************************************************************************************************
84 string SRACommand::getOutputPattern(string type) {
88 if (type == "xml") { pattern = "[filename],xml"; }
89 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
94 m->errorOut(e, "SRACommand", "getOutputPattern");
98 //**********************************************************************************************************************
99 SRACommand::SRACommand(){
101 abort = true; calledHelp = true;
103 vector<string> tempOutNames;
104 outputTypes["xml"] = tempOutNames;
106 catch(exception& e) {
107 m->errorOut(e, "SRACommand", "SRACommand");
111 //**********************************************************************************************************************
112 SRACommand::SRACommand(string option) {
114 abort = false; calledHelp = false;
115 libLayout = "single"; //controlled vocab
117 //allow user to run help
118 if(option == "help") { help(); abort = true; calledHelp = true; }
119 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
122 //valid paramters for this command
123 vector<string> myArray = setParameters();
125 OptionParser parser(option);
126 map<string,string> parameters = parser.getParameters();
128 ValidParameters validParameter;
129 map<string,string>::iterator it;
130 //check to make sure all parameters are valid for command
131 for (it = parameters.begin(); it != parameters.end(); it++) {
132 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
135 vector<string> tempOutNames;
136 outputTypes["xml"] = tempOutNames;
138 //if the user changes the input directory command factory will send this info to us in the output parameter
139 inputDir = validParameter.validFile(parameters, "inputdir", false);
140 if (inputDir == "not found"){ inputDir = ""; }
144 it = parameters.find("sff");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["sff"] = inputDir + it->second; }
152 it = parameters.find("fastq");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["fastq"] = inputDir + it->second; }
160 it = parameters.find("file");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["file"] = inputDir + it->second; }
168 it = parameters.find("oligos");
169 //user has given a template file
170 if(it != parameters.end()){
171 path = m->hasPath(it->second);
172 //if the user has not given a path then, add inputdir. else leave path alone.
173 if (path == "") { parameters["oligos"] = inputDir + it->second; }
176 it = parameters.find("project");
177 //user has given a template file
178 if(it != parameters.end()){
179 path = m->hasPath(it->second);
180 //if the user has not given a path then, add inputdir. else leave path alone.
181 if (path == "") { parameters["project"] = inputDir + it->second; }
184 it = parameters.find("mimark");
185 //user has given a template file
186 if(it != parameters.end()){
187 path = m->hasPath(it->second);
188 //if the user has not given a path then, add inputdir. else leave path alone.
189 if (path == "") { parameters["mimark"] = inputDir + it->second; }
193 //check for parameters
194 fastqfile = validParameter.validFile(parameters, "fastq", true);
195 if (fastqfile == "not open") { fastqfile = ""; abort = true; }
196 else if (fastqfile == "not found") { fastqfile = ""; }
198 sfffile = validParameter.validFile(parameters, "sff", true);
199 if (sfffile == "not open") { sfffile = ""; abort = true; }
200 else if (sfffile == "not found") { sfffile = ""; }
202 file = validParameter.validFile(parameters, "file", true);
203 if (file == "not open") { file = ""; abort = true; }
204 else if (file == "not found") { file = ""; }
206 oligosfile = validParameter.validFile(parameters, "oligos", true);
207 if (oligosfile == "not found") { oligosfile = ""; }
208 else if(oligosfile == "not open") { abort = true; }
209 else { m->setOligosFile(oligosfile); }
211 contactfile = validParameter.validFile(parameters, "project", true);
212 if (contactfile == "not found") { contactfile = ""; m->mothurOut("[ERROR]: You must provide a project file before you can use the sra command."); m->mothurOutEndLine(); abort = true; }
213 else if(contactfile == "not open") { abort = true; }
215 mimarksfile = validParameter.validFile(parameters, "mimark", true);
216 if (mimarksfile == "not found") { mimarksfile = ""; m->mothurOut("[ERROR]: You must provide a mimark file before you can use the sra command. You can create a template for this file using the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true; }
217 else if(mimarksfile == "not open") { abort = true; }
219 file = validParameter.validFile(parameters, "file", true);
220 if (file == "not open") { file = ""; abort = true; }
221 else if (file == "not found") { file = ""; }
223 if ((file == "") && (oligosfile == "")) {
224 m->mothurOut("[ERROR]: You must provide an oligos file or file with oligos files in them before you can use the sra command."); m->mothurOutEndLine(); abort = true;
227 if ((fastqfile == "") && (file == "") && (sfffile == "")) {
228 m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
231 //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
232 platform = validParameter.validFile(parameters, "platform", false); if (platform == "not found") { platform = "_LS454"; }
233 if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
235 if (!abort) { //don't check instrument model is platform is bad
236 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
237 instrumentModel = validParameter.validFile(parameters, "instrument", false); if (instrumentModel == "not found") { instrumentModel = "454_GS"; }
238 if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function
240 //turn _ to spaces mothur's work around
241 for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } }
243 libStrategy = validParameter.validFile(parameters, "libstrategy", false); if (libStrategy == "not found") { libStrategy = "AMPLICON"; }
244 if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function
246 //turn _ to spaces mothur's work around
247 for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; } }
249 libSource = validParameter.validFile(parameters, "libsource", false); if (libSource == "not found") { libSource = "METAGENOMIC"; }
250 if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function
252 //turn _ to spaces mothur's work around
253 for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; } }
255 libSelection = validParameter.validFile(parameters, "libselection", false); if (libSelection == "not found") { libSelection = "PCR"; }
256 if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function
258 //turn _ to spaces mothur's work around
259 for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; } }
261 dataType = validParameter.validFile(parameters, "datatype", false); if (dataType == "not found") { dataType = "METAGENOME"; }
262 if (!checkCasesDataType(dataType)) { abort = true; } //error message in function
264 //turn _ to spaces mothur's work around
265 for (int i = 0; i < dataType.length(); i++) { if (dataType[i] == '_') { dataType[i] = ' '; } }
267 orientation = validParameter.validFile(parameters, "orientation", false); if (orientation == "not found") { orientation = "forward"; }
269 if ((orientation == "forward") || (orientation == "reverse")) { }
270 else { m->mothurOut("[ERROR]: " + orientation + " is not a valid orientation option. Choices are: forward and reverse.\n"); m->mothurOutEndLine(); abort = true; }
273 string temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
274 m->mothurConvert(temp, bdiffs);
276 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; }
277 m->mothurConvert(temp, pdiffs);
279 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
280 m->mothurConvert(temp, ldiffs);
282 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
283 m->mothurConvert(temp, sdiffs);
285 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
286 m->mothurConvert(temp, tdiffs);
288 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
290 checkorient = validParameter.validFile(parameters, "checkorient", false); if (temp == "not found") { temp = "F"; }
295 catch(exception& e) {
296 m->errorOut(e, "SRACommand", "SRACommand");
300 //**********************************************************************************************************************
301 int SRACommand::execute(){
304 if (abort == true) { if (calledHelp) { return 0; } return 2; }
308 if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
310 if (m->control_pressed) { return 0; }
313 map<string, vector<string> > filesBySample;
316 if (file != "") { readFile(filesBySample); }
317 else if (sfffile != "") { parseSffFile(filesBySample); }
318 else if (fastqfile != "") { parseFastqFile(filesBySample); }
320 for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
322 sanityCheckMiMarksGroups();
324 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
325 checkGroups(filesBySample);
328 string thisOutputDir = outputDir;
329 if (outputDir == "") { thisOutputDir += m->hasPath(inputfile); }
330 map<string, string> variables;
331 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile));
332 string outputFileName = getOutputFileName("xml", variables);
333 outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName);
335 m->openOutputFile(outputFileName, out);
338 ////////////////////////////////////////////////////////
339 out << "<Submission>\n";
340 out << "\t<Description>\n";
341 out << "\t\t<Comment> New Submission. Generated by mothur version " + m->getVersion() + " </Comment> \n";
342 out << "\t\t<Submitter user_name=\"" + submissionName + "\"/>\n";
343 out << "\t\t<Organization type=\"" + centerType + "\">\n";
344 out << "\t\t<Name>" + centerName + "</Name>\n";
345 out << "\t\t<Contact> email=\"" + email + "\">\n";
346 out << "\t\t\t<Name>\n";
347 out << "\t\t\t\t<First>" + firstName + "</First>\n";
348 out << "\t\t\t\t<Last>" + firstName + "</Last>\n";
349 out << "\t\t\t</Name>\n";
350 out << "\t\t</Contact>\n";
351 out << "\t\t</Organization>\n";
352 out << "\t</Description>\n";
353 ////////////////////////////////////////////////////////
356 ////////////////////////////////////////////////////////
357 out << "\t<Action>\n";
358 out << "\t\t<AddData target_db=\"BioProject\">\n";
359 out << "\t\t\t<Data content_type=\"XML\">\n";
360 out << "\t\t\t\t<XmlContent>\n";
361 out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
362 out << "\t\t\t\t\t\t<ProjectID>\n";
363 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
364 out << "\t\t\t\t\t\t</ProjectID>\n";
365 out << "\t\t\t\t\t\t<Descriptor>\n";
366 out << "\t\t\t\t\t\t\t<Title>" + projectTitle + " </Title> \n";
367 out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
369 out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
370 out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
371 out << "\t\t\t\t\t\t\t</ExternalLink>\n";
373 out << "\t\t\t\t\t\t</Descriptor>\n";
374 out << "\t\t\t\t\t\t<ProjectType>\n";
375 out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eEnvironment\">\n";
376 out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
377 out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n";
378 out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
379 out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
380 out << "\t\t\t\t\t\t</ProjectType>\n";
381 out << "\t\t\t\t\t</Project>\n";
382 out << "\t\t\t\t</XmlContent>\n";
383 out << "\t\t\t</Data>\n";
384 out << "\t\t\t<Identifier>\n";
385 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
386 out << "\t\t\t</Identifier>\n";
387 out << "\t\t</AddData>\n";
388 out << "\t</Action>\n";
389 ////////////////////////////////////////////////////////
392 ////////////////////////////////////////////////////////
393 for (int i = 0; i < Groups.size(); i++) {
395 string barcodeForThisSample = Group2Barcode[Groups[i]][0];
397 if (m->control_pressed) { break; }
398 out << "\t<Action>\n";
399 out << "\t\t<AddData target_db=\"BioSample\">\n";
400 out << "\t\t\t<Data content_type=\"XML\">\n";
401 out << "\t\t\t\t<XmlContent>\n";
402 out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
403 out << "\t\t\t\t\t\t<SampleId>\n";
404 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID> \n";
405 out << "\t\t\t\t\t\t</SampleId>\n";
406 out << "\t\t\t\t\t\t<Organism>\n";
407 string organismName = "metagenome";
408 map<string, string>::iterator itOrganism = Group2Organism.find(Groups[i]);
409 if (itOrganism != Group2Organism.end()) { organismName = itOrganism->second; } //user supplied acceptable organism, so use it.
410 out << "\t\t\t\t\t\t\t<OrganismName>" + organismName + " </OrganismName> \n";
411 out << "\t\t\t\t\t\t</Organism>\n";
412 out << "\t\t\t\t\t\t<Package>" + packageType + "</Package>n";
413 out << "\t\t\t\t\t\t<Attributes>n";
414 //add biosample required attributes
415 map<string, map<string, string> >:: iterator it = mimarks.find(Groups[i]);
416 if (it != mimarks.end()) {
417 map<string, string> categories = it->second;
418 for (map<string, string>:: iterator it2 = categories.begin(); it2 != categories.end(); it2++) {
419 if (m->control_pressed) { break; }
420 out << "\t\t\t\t\t\t\t<Attribute attribute_name=\"" + it2->first + "\">\"" + it2->second + "\"</Attribute>\n";
423 out << "\t\t\t\t\t\t</Attributes>n";
424 out << "\t\t\t\t\t</BioSample>\n";
425 out << "\t\t\t\t</XmlContent>\n";
426 out << "\t\t\t</Data>\n";
427 out << "\t\t\t<Identifier>\n";
428 out << "\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
429 out << "\t\t\t</Identifier>\n";
430 out << "\t\t</AddData>\n";
431 out << "\t</Action>\n";
435 ////////////////////////////////////////////////////////
436 for (int i = 0; i < Groups.size(); i++) {
438 vector<string> thisGroupsFiles = filesBySample[Groups[i]];
439 string barcodeForThisSample = Group2Barcode[Groups[i]][0];
441 for (int j = 0; j < thisGroupsFiles.size(); j++) {
442 string libId = thisGroupsFiles[j] + "." + barcodeForThisSample;
444 if (m->control_pressed) { break; }
445 out << "\t<Action>\n";
446 out << "\t\t<AddFiles target_db=\"SRA\">\n";
447 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
448 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
449 libId = pieces[0] + barcodeForThisSample;
450 out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
451 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
452 out << "\t\t\t</File>\n";
453 vector<string> thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]][0], thisBarcodes, '.');
454 string forwardBarcode = thisBarcodes[0];
455 string reverseBarcode = thisBarcodes[1];
456 vector<string> thisPrimers; m->splitAtChar(Group2Primer[Groups[i]][0], thisPrimers, '.');
457 string forwardPrimer = thisPrimers[0];
458 string reversePrimer = thisPrimers[1];
460 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
461 out << "\t\t\t<Attribute name=\"BarCode\">" + forwardBarcode + "</Attribute>\n";
462 out << "\t\t\t<Attribute name=\"primer\">" + forwardPrimer + "</Attribute>\n";
463 out << "\t\t\t<Attribute name=\"read_type\">forward</Attribute>\n";
464 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
465 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
466 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
467 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
468 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
469 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
470 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
472 out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
473 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
474 out << "\t\t\t</File>\n";
475 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
476 out << "\t\t\t<Attribute name=\"BarCode\">" + reverseBarcode + "</Attribute>\n";
477 out << "\t\t\t<Attribute name=\"primer\">" + reversePrimer + "</Attribute>\n";
478 out << "\t\t\t<Attribute name=\"read_type\">reverse</Attribute>\n";
479 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
480 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
481 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
482 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
483 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
484 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
485 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
488 out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
489 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
490 out << "\t\t\t</File>\n";
492 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
493 out << "\t\t\t<Attribute name=\"BarCode\">" + Group2Barcode[Groups[i]][0] + "</Attribute>\n";
494 out << "\t\t\t<Attribute name=\"primer\">" + Group2Primer[Groups[i]][0] + "</Attribute>\n";
495 out << "\t\t\t<Attribute name=\"read_type\">" + orientation + "</Attribute>\n";
496 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
497 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
498 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
499 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
500 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
501 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
502 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
505 ///////////////////bioProject info
506 out << "\t\t\t<AttributeRefId name=\"BioProject\">\n";
507 out << "\t\t\t\t<RefId>\n";
508 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
509 out << "\t\t\t\t</RefId>\n";
510 out << "\t\t\t</AttributeRefId>\n";
511 //////////////////bioSample info
512 out << "\t\t\t<AttributeRefId name=\"BioSample\">\n";
513 out << "\t\t\t\t<RefId>\n";
514 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
515 out << "\t\t\t\t</RefId>\n";
516 out << "\t\t\t</AttributeRefId>\n";
518 out << "\t\t\t<Identifier>\n";
519 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
520 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
521 libId = pieces[0] + barcodeForThisSample;
523 out << "\t\t\t\t<LocalId>" + libId + " </LocalId>\n";
524 out << "\t\t\t</Identifier>\n";
525 out << "\t\t</AddFiles>\n";
526 out << "\t</Action>\n";
529 out << "</Submission>\n";
532 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
534 //output files created by command
535 m->mothurOutEndLine();
536 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
537 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
538 m->mothurOutEndLine();
542 catch(exception& e) {
543 m->errorOut(e, "SRACommand", "SRACommand");
547 //**********************************************************************************************************************
548 int SRACommand::readContactFile(){
550 lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; website = ""; projectName = "";
551 projectTitle = ""; grantAgency = ""; grantId = ""; grantTitle = "";
554 m->openInputFile(contactfile, in);
558 if (m->control_pressed) { break; }
561 in >> key; m->gobble(in);
562 value = m->getline(in); m->gobble(in);
564 for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
566 if (key == "USERNAME") { submissionName = value; }
567 else if (key == "LAST") { lastName = value; }
568 else if (key == "FIRST") { firstName = value; }
569 else if (key == "EMAIL") { email = value; }
570 else if (key == "CENTER") { centerName = value; }
571 else if (key == "TYPE") {
573 for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
574 if ((centerType == "consortium") || (centerType == "center") || (centerType == "institute") || (centerType == "lab")) {}
575 else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option. Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
576 }else if (key == "DESCRIPTION") { description = value; }
577 else if (key == "WEBSITE") { website = value; }
578 else if (key == "PROJECTNAME") { projectName = value; }
579 else if (key == "PROJECTTITLE") { projectTitle = value; }
580 else if (key == "GRANTID") { grantId = value; }
581 else if (key == "GRANTTITLE") { grantTitle = value; }
582 else if (key == "GRANTAGENCY") { grantAgency = value; }
586 if (lastName == "") { m->mothurOut("[ERROR]: missing last name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
587 if (firstName == "") { m->mothurOut("[ERROR]: missing first name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
588 if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
589 if (email == "") { m->mothurOut("[ERROR]: missing email from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
590 if (centerName == "") { m->mothurOut("[ERROR]: missing center name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
591 if (centerType == "") { m->mothurOut("[ERROR]: missing center type from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
592 if (description == "") { m->mothurOut("[ERROR]: missing description from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
593 if (projectTitle == "") { m->mothurOut("[ERROR]: missing project title from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
594 if (projectName == "") { m->mothurOut("[ERROR]: missing project name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
598 catch(exception& e) {
599 m->errorOut(e, "SRACommand", "readContactFile");
603 //**********************************************************************************************************************
604 //air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water
605 //all packages require: *sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon
607 //host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, plant_associated: *host
608 //microbial, sediment, soil: *depth *elev
610 int SRACommand::readMIMarksFile(){
612 //acceptable organisms
613 vector<string> acceptableOrganisms;
614 bool organismError = false;
616 acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("air metagenome"); acceptableOrganisms.push_back("anaerobic digester metagenome"); acceptableOrganisms.push_back("ant fungus garden metagenome"); acceptableOrganisms.push_back("aquatic metagenome"); acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("beach sand metagenome"); acceptableOrganisms.push_back("biofilm metagenome"); acceptableOrganisms.push_back("biofilter metagenome"); acceptableOrganisms.push_back("biogas fermenter metagenome"); acceptableOrganisms.push_back("bioreactor metagenome"); acceptableOrganisms.push_back("bioreactor sludge metagenome"); acceptableOrganisms.push_back("clinical metagenome"); acceptableOrganisms.push_back("coal metagenome"); acceptableOrganisms.push_back("compost metagenome"); acceptableOrganisms.push_back("dust metagenome"); acceptableOrganisms.push_back("fermentation metagenome"); acceptableOrganisms.push_back("food fermentation metagenome"); acceptableOrganisms.push_back("food metagenome"); acceptableOrganisms.push_back("freshwater metagenome"); acceptableOrganisms.push_back("freshwater sediment metagenome"); acceptableOrganisms.push_back("groundwater metagenome"); acceptableOrganisms.push_back("halite metagenome"); acceptableOrganisms.push_back("hot springs metagenome"); acceptableOrganisms.push_back("hydrocarbon metagenome"); acceptableOrganisms.push_back("hydrothermal vent metagenome"); acceptableOrganisms.push_back("hypersaline lake metagenome"); acceptableOrganisms.push_back("ice metagenome"); acceptableOrganisms.push_back("indoor metagenome"); acceptableOrganisms.push_back("industrial waste metagenome"); acceptableOrganisms.push_back("mangrove metagenome"); acceptableOrganisms.push_back("marine metagenome"); acceptableOrganisms.push_back("marine sediment metagenome"); acceptableOrganisms.push_back("microbial mat metagenome"); acceptableOrganisms.push_back("mine drainage metagenome"); acceptableOrganisms.push_back("mixed culture metagenome"); acceptableOrganisms.push_back("oil production facility metagenome"); acceptableOrganisms.push_back("paper pulp metagenome"); acceptableOrganisms.push_back("permafrost metagenome"); acceptableOrganisms.push_back("plastisphere metagenome"); acceptableOrganisms.push_back("power plant metagenome"); acceptableOrganisms.push_back("retting rhizosphere metagenome"); acceptableOrganisms.push_back("rock metagenome"); acceptableOrganisms.push_back("salt lake metagenome"); acceptableOrganisms.push_back("saltern metagenome"); acceptableOrganisms.push_back("sediment metagenome"); acceptableOrganisms.push_back("snow metagenome"); acceptableOrganisms.push_back("soil metagenome"); acceptableOrganisms.push_back("stromatolite metagenome"); acceptableOrganisms.push_back("terrestrial metagenome"); acceptableOrganisms.push_back("tomb wall metagenome"); acceptableOrganisms.push_back("wastewater metagenome"); acceptableOrganisms.push_back("wetland metagenome"); acceptableOrganisms.push_back("whale fall metagenome");
618 acceptableOrganisms.push_back("algae metagenome"); acceptableOrganisms.push_back("ant metagenome"); acceptableOrganisms.push_back("bat metagenome"); acceptableOrganisms.push_back("beetle metagenome"); acceptableOrganisms.push_back("bovine gut metagenome"); acceptableOrganisms.push_back("bovine metagenome"); acceptableOrganisms.push_back("chicken gut metagenome"); acceptableOrganisms.push_back("coral metagenome"); acceptableOrganisms.push_back("echinoderm metagenome"); acceptableOrganisms.push_back("endophyte metagenome"); acceptableOrganisms.push_back("epibiont metagenome"); acceptableOrganisms.push_back("fish metagenome"); acceptableOrganisms.push_back("fossil metagenome"); acceptableOrganisms.push_back("gill metagenome"); acceptableOrganisms.push_back("gut metagenome"); acceptableOrganisms.push_back("honeybee metagenome"); acceptableOrganisms.push_back("human gut metagenome"); acceptableOrganisms.push_back("human lung metagenome"); acceptableOrganisms.push_back("human metagenome"); acceptableOrganisms.push_back("human nasal/pharyngeal metagenome"); acceptableOrganisms.push_back("human oral metagenome"); acceptableOrganisms.push_back("human skin metagenome"); acceptableOrganisms.push_back("insect gut metagenome"); acceptableOrganisms.push_back("insect metagenome"); acceptableOrganisms.push_back("mollusc metagenome"); acceptableOrganisms.push_back("mosquito metagenome"); acceptableOrganisms.push_back("mouse gut metagenome"); acceptableOrganisms.push_back("mouse metagenome"); acceptableOrganisms.push_back("mouse skin metagenome"); acceptableOrganisms.push_back("nematode metagenome"); acceptableOrganisms.push_back("oral metagenome"); acceptableOrganisms.push_back("phyllosphere metagenome"); acceptableOrganisms.push_back("pig metagenome"); acceptableOrganisms.push_back("plant metagenome"); acceptableOrganisms.push_back("primate metagenome"); acceptableOrganisms.push_back("rat metagenome"); acceptableOrganisms.push_back("root metagenome"); acceptableOrganisms.push_back("sea squirt metagenome"); acceptableOrganisms.push_back("seed metagenome"); acceptableOrganisms.push_back("shoot metagenome"); acceptableOrganisms.push_back("skin metagenome"); acceptableOrganisms.push_back("snake metagenome"); acceptableOrganisms.push_back("sponge metagenome"); acceptableOrganisms.push_back("stomach metagenome"); acceptableOrganisms.push_back("symbiont metagenome"); acceptableOrganisms.push_back("termite gut metagenome"); acceptableOrganisms.push_back("termite metagenome"); acceptableOrganisms.push_back("upper respiratory tract metagenome"); acceptableOrganisms.push_back("urine metagenome"); acceptableOrganisms.push_back("viral metagenome"); acceptableOrganisms.push_back("wallaby gut metagenome"); acceptableOrganisms.push_back("wasp metagenome"); acceptableOrganisms.push_back("sythetic metagenome"); acceptableOrganisms.push_back("metagenome");
620 vector<string> requiredFieldsForPackage;
621 requiredFieldsForPackage.push_back("sample_name"); requiredFieldsForPackage.push_back("organism");
622 requiredFieldsForPackage.push_back("collection_date"); requiredFieldsForPackage.push_back("biome");
623 requiredFieldsForPackage.push_back("feature"); requiredFieldsForPackage.push_back("material");
624 requiredFieldsForPackage.push_back("geo_loc_name"); requiredFieldsForPackage.push_back("lat_lon");
625 requiredFieldsForPackage.push_back("seq_methods"); requiredFieldsForPackage.push_back("title");
626 vector<string> chooseAtLeastOneForPackage;
629 m->openInputFile(mimarksfile, in);
632 string temp; packageType = "";
635 if (m->control_pressed) { break; }
636 temp = m->getline(in); m->gobble(in);
638 if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
640 if (temp[0] == '#') {
641 int pos = temp.find("Environmental");
642 if (pos != string::npos) {
643 for (int i = pos+14; i < temp.length(); i++) {
644 if (!isspace(temp[i])) { packageType += temp[i]; }
645 else { i+= temp.length(); }
649 else{ break; } //hit headers line
652 vector<string> headers; m->splitAtChar(temp, headers, '\t');
653 m->removeBlanks(headers);
654 //remove * from required's
655 for (int i = 0; i < headers.size(); i++) {
656 if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); }
657 if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); chooseAtLeastOneForPackage.push_back(headers[i]); } //secondary condition
658 if (m->debug) { m->mothurOut("[DEBUG]: " + headers[i] + "\n"); }
661 if (m->debug) { m->mothurOut("[DEBUG]: packageType = '" + packageType + "'\n"); }
663 //check to make sure package has all its required parts
664 //MIMARKS.specimen.water.3.0
665 if (packageType == "MIMARKS.specimen.air.3.0") { requiredFieldsForPackage.push_back("altitude"); }
666 else if ((packageType == "MIMARKS.specimen.host-associated.3.0") || (packageType == "MIMARKS.specimen.human-associated.3.0") || (packageType == "MIMARKS.specimen.human-gut.3.0") || (packageType == "MIMARKS.specimen.human-oral.3.0") || (packageType == "MIMARKS.specimen.human-skin.3.0") || (packageType == "MIMARKS.specimen.human-vaginal.3.0") || (packageType == "MIMARKS.specimen.plant-associated.3.0")) { requiredFieldsForPackage.push_back("host"); }
667 else if ((packageType == "MIMARKS.specimen.microbial.3.0") || (packageType == "MIMARKS.specimen.sediment.3.0") || (packageType == "soil")) { requiredFieldsForPackage.push_back("depth"); requiredFieldsForPackage.push_back("elev"); }
668 else if (packageType == "MIMARKS.specimen.water.3.0") { requiredFieldsForPackage.push_back("depth"); }
669 else if ((packageType == "MIMARKS.specimen.miscellaneous.3.0") || (packageType == "wastewater")) { }
671 m->mothurOut("[ERROR]: unknown package " + packageType + ", please correct.\n"); m->control_pressed = true; in.close(); return 0;
674 if (!m->isSubset(headers, requiredFieldsForPackage)){
675 string requiredFields = "";
676 for (int i = 0; i < requiredFieldsForPackage.size()-1; i++) { requiredFields += requiredFieldsForPackage[i] + ", "; } requiredFields += requiredFieldsForPackage[requiredFieldsForPackage.size()-1];
677 m->mothurOut("[ERROR]: missing required fields for package, please correct. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
680 if (m->debug) { m->mothurOut("[DEBUG]: chooseAtLeastOneForPackage.size() = " + toString(chooseAtLeastOneForPackage.size()) + "\n"); }
682 if (!m->inUsersGroups(chooseAtLeastOneForPackage, headers)){ //returns true if any of the choose at least ones are in headers
683 string requiredFields = "";
684 for (int i = 0; i < chooseAtLeastOneForPackage.size()-1; i++) { requiredFields += chooseAtLeastOneForPackage[i] + ", "; cout << chooseAtLeastOneForPackage[i] << endl; }
685 if (chooseAtLeastOneForPackage.size() < 1) { requiredFields += chooseAtLeastOneForPackage[chooseAtLeastOneForPackage.size()-1]; }
686 m->mothurOut("[ERROR]: missing a choose at least one fields for the package, please correct. These are marked with '**'. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
689 map<string, bool> allNA; for (int i = 1; i < headers.size(); i++) { allNA[headers[i]] = true; }
692 if (m->control_pressed) { break; }
694 temp = m->getline(in); m->gobble(in);
696 if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
698 string original = temp;
699 vector<string> linePieces; m->splitAtChar(temp, linePieces, '\t');
700 m->removeBlanks(linePieces);
702 if (linePieces.size() != headers.size()) { m->mothurOut("[ERROR]: line: " + original + " contains " + toString(linePieces.size()) + " columns, but you have " + toString(headers.size()) + " column headers, please correct.\n"); m->control_pressed = true; }
704 map<string, map<string, string> >:: iterator it = mimarks.find(linePieces[0]);
706 if (it == mimarks.end()) {
707 map<string, string> categories;
708 //start after *sample_name
709 for (int i = 1; i < headers.size(); i++) {
710 categories[headers[i]] = linePieces[i];
711 //check the users inputs for appropriate organisms
712 if (headers[i] == "organism") {
713 if (!m->inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism
714 organismError = true;
715 m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to acceptable 'metagenome'. NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i];
717 Group2Organism[linePieces[0]] = linePieces[i];
719 if (linePieces[i] != "NA") { allNA[headers[i]] = false; }
722 //does this sample already match an existing sample?
723 bool isOkaySample = true;
724 for (map<string, map<string, string> >:: iterator it2 = mimarks.begin(); it2 != mimarks.end(); it2++) {
725 if (m->control_pressed) { break; }
727 for (int i = 1; i < headers.size(); i++) {
728 if ((it2->second)[headers[i]] != categories[headers[i]]) { allSame = false; }
730 if (allSame) { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sample to " + it2->first + ". It has all the same attributes in the MIMarks file. Samples must have distinguishing features to be uploaded to the NCBI library, please correct.\n"); m->control_pressed = true; isOkaySample = false; }
732 if (isOkaySample) { mimarks[linePieces[0]] = categories; }
734 m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sampleName. Sample names must be unique, please correct.\n"); m->control_pressed = true;
740 //add in values for "scrap" group
741 map<string, string> categories;
742 //start after *sample_name
743 for (int i = 1; i < headers.size(); i++) {
744 categories[headers[i]] = "NA";
745 if (headers[i] == "organism") { categories[headers[i]] = "metagenome"; }
746 if (headers[i] == "seq_methods") { categories[headers[i]] = "these sequences were scrapped"; }
747 if (headers[i] == "title") { categories[headers[i]] = "these sequences were scrapped"; }
749 mimarks["scrap"] = categories;
750 Group2Organism["scrap"] = "metagenome";
753 string organismTypes = "";
754 for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; }
755 organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1];
756 m->mothurOut("\n[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n\n\n");
761 catch(exception& e) {
762 m->errorOut(e, "SRACommand", "readMIMarksFile");
767 //**********************************************************************************************************************
768 // going to have to rework this to allow for other options --
778 fastqfile1 oligosfile1
779 fastqfile2 oligosfile2
784 fastqfile fastqfile group
785 fastqfile fastqfile group
786 fastqfile fastqfile group
791 int SRACommand::readFile(map<string, vector<string> >& files){
793 //vector<string> theseFiles;
798 m->openInputFile(file, in);
802 if (m->control_pressed) { return 0; }
804 string line = m->getline(in); m->gobble(in);
805 vector<string> pieces = m->splitWhiteSpace(line);
808 string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
809 if (pieces.size() == 2) {
810 thisFileName1 = pieces[0];
811 thisFileName2 = pieces[1];
812 }else if (pieces.size() == 3) {
813 thisFileName1 = pieces[1];
814 thisFileName2 = pieces[2];
815 string group = pieces[0];
816 libLayout = "paired";
818 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
821 if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
823 if (inputDir != "") {
824 string path = m->hasPath(thisFileName1);
825 if (path == "") { thisFileName1 = inputDir + thisFileName1; }
827 path = m->hasPath(thisFileName2);
828 if (path == "") { thisFileName2 = inputDir + thisFileName2; }
831 //check to make sure both are able to be opened
833 int openForward = m->openInputFile(thisFileName1, in2, "noerror");
835 //if you can't open it, try default location
836 if (openForward == 1) {
838 if (m->getDefaultPath() != "") { //default path is set
839 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
840 m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
842 openForward = m->openInputFile(tryPath, in3, "noerror");
844 thisFileName1 = tryPath;
848 //if you can't open it, try output location
849 if (openForward == 1) {
850 if (m->getOutputDir() != "") { //default path is set
851 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
852 m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
854 openForward = m->openInputFile(tryPath, in4, "noerror");
855 thisFileName1 = tryPath;
860 if (openForward == 1) { //can't find it
861 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
862 }else{ in2.close(); }
867 openReverse = m->openInputFile(thisFileName2, in3, "noerror");
869 //if you can't open it, try default location
870 if (openReverse == 1) {
871 if (m->getDefaultPath() != "") { //default path is set
872 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
873 m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
875 openReverse = m->openInputFile(tryPath, in3, "noerror");
877 thisFileName2 = tryPath;
881 //if you can't open it, try output location
882 if (openReverse == 1) {
883 if (m->getOutputDir() != "") { //default path is set
884 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
885 m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
887 openReverse = m->openInputFile(tryPath, in4, "noerror");
888 thisFileName2 = tryPath;
893 if (openReverse == 1) { //can't find it
894 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
895 }else{ in3.close(); }
898 if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
900 int pos = thisFileName1.find(".sff");
901 if (pos != string::npos) {//these files are sff files
903 sfffile = thisFileName1; oligosfile = thisFileName2;
904 if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
906 if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
908 if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + sfffile + "\n"); }
911 fastqfile = thisFileName1; oligosfile = thisFileName2;
912 if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
914 if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
915 parseFastqFile(files);
916 if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + fastqfile + "\n"); }
919 }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
920 map<string, vector<string> >::iterator it = files.find(group);
921 if (it == files.end()) {
922 vector<string> temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp;
924 files[group].push_back(thisFileName1 + " " + thisFileName2);
934 catch(exception& e) {
935 m->errorOut(e, "SRACommand", "readFile");
939 //**********************************************************************************************************************
940 int SRACommand::parseSffFile(map<string, vector<string> >& files){
942 vector<string> theseFiles;
944 libLayout = "single"; //controlled vocab
947 //run sffinfo to parse sff file into individual sampled sff files
948 string commandString = "sff=" + sfffile;
950 commandString += ", oligos=" + oligosfile;
951 //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
952 if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
953 if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
954 if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
955 if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
956 if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
957 if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; }
959 m->mothurOutEndLine();
960 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
961 m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
962 m->mothurCalling = true;
964 Command* sffinfoCommand = new SffInfoCommand(commandString);
965 sffinfoCommand->execute();
967 map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
968 map<string, vector<string> >::iterator it = filenames.find("sff");
969 if (it != filenames.end()) { theseFiles = it->second; }
970 else { m->control_pressed = true; } // error in sffinfo
972 delete sffinfoCommand;
973 m->mothurCalling = false;
974 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
976 mapGroupToFile(files, theseFiles);
980 catch(exception& e) {
981 m->errorOut(e, "SRACommand", "readFile");
986 //**********************************************************************************************************************
987 int SRACommand::parseFastqFile(map<string, vector<string> >& files){
989 vector<string> theseFiles;
990 inputfile = fastqfile;
991 libLayout = "single"; //controlled vocab
993 //run sffinfo to parse sff file into individual sampled sff files
994 string commandString = "fastq=" + fastqfile;
996 commandString += ", oligos=" + oligosfile;
997 //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
998 if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
999 if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
1000 if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
1001 if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
1002 if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
1003 if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; }
1005 m->mothurOutEndLine();
1006 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1007 m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
1008 m->mothurCalling = true;
1010 Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
1011 fastqinfoCommand->execute();
1013 map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
1014 map<string, vector<string> >::iterator it = filenames.find("fastq");
1015 if (it != filenames.end()) { theseFiles = it->second; }
1016 else { m->control_pressed = true; } // error in sffinfo
1018 delete fastqinfoCommand;
1019 m->mothurCalling = false;
1020 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1022 mapGroupToFile(files, theseFiles);
1026 catch(exception& e) {
1027 m->errorOut(e, "SRACommand", "readFile");
1031 //***************************************************************************************************************
1032 //maps group to file
1033 int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
1036 for (int i = 0; i < Groups.size(); i++) {
1039 for (int j = 0; j < theseFiles.size(); j++) {
1040 int pos = theseFiles[j].find(Groups[i]);
1041 if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
1042 if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
1048 if(matches.size() == 1) {
1049 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1050 if (it == files.end()) {
1051 vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
1053 files[Groups[i]].push_back(theseFiles[*matches.begin()]);
1059 catch(exception& e) {
1060 m->errorOut(e, "SRACommand", "checkGroups");
1065 //***************************************************************************************************************
1066 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
1067 int SRACommand::checkGroups(map<string, vector<string> >& files){
1069 vector<string> newGroups;
1070 for (int i = 0; i < Groups.size(); i++) {
1072 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1073 //no files for this group, remove it
1074 if (it == files.end()) { }
1075 else { newGroups.push_back(Groups[i]); }
1082 catch(exception& e) {
1083 m->errorOut(e, "SRACommand", "checkGroups");
1087 //***************************************************************************************************************
1088 int SRACommand::readOligos(){
1090 Oligos oligos(oligosfile);
1092 if (m->control_pressed) { return false; } //error in reading oligos
1094 if (oligos.hasPairedBarcodes()) { pairedOligos = true; }
1095 else { pairedOligos = false; }
1097 set<string> uniqueNames; //used to cleanup outputFileNames
1099 map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
1100 map<int, oligosPair> primers = oligos.getPairedPrimers();
1101 for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1102 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1104 string primerName = oligos.getPrimerName(itPrimer->first);
1105 string barcodeName = oligos.getBarcodeName(itBar->first);
1107 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1108 else if ((primerName == "") && (barcodeName == "")) { } //do nothing
1110 string comboGroupName = "";
1111 string fastaFileName = "";
1112 string qualFileName = "";
1113 string nameFileName = "";
1114 string countFileName = "";
1116 if(primerName == ""){
1117 comboGroupName = barcodeName;
1119 if(barcodeName == ""){
1120 comboGroupName = primerName;
1123 comboGroupName = barcodeName + "." + primerName;
1126 uniqueNames.insert(comboGroupName);
1128 map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
1129 if (itGroup2Barcode == Group2Barcode.end()) {
1130 vector<string> tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse);
1131 Group2Barcode[comboGroupName] = tempBarcodes;
1133 Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse);
1136 itGroup2Barcode = Group2Primer.find(comboGroupName);
1137 if (itGroup2Barcode == Group2Primer.end()) {
1138 vector<string> tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
1139 Group2Primer[comboGroupName] = tempPrimers;
1141 Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
1147 map<string, int> barcodes = oligos.getBarcodes() ;
1148 map<string, int> primers = oligos.getPrimers();
1149 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1150 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1152 string primerName = oligos.getPrimerName(itPrimer->second);
1153 string barcodeName = oligos.getBarcodeName(itBar->second);
1155 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1156 else if ((primerName == "") && (barcodeName == "")) { } //do nothing
1158 string comboGroupName = "";
1159 string fastaFileName = "";
1160 string qualFileName = "";
1161 string nameFileName = "";
1162 string countFileName = "";
1164 if(primerName == ""){
1165 comboGroupName = barcodeName;
1167 if(barcodeName == ""){
1168 comboGroupName = primerName;
1171 comboGroupName = barcodeName + "." + primerName;
1174 uniqueNames.insert(comboGroupName);
1176 map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
1177 if (itGroup2Barcode == Group2Barcode.end()) {
1178 vector<string> tempBarcodes; tempBarcodes.push_back(itBar->first);
1179 Group2Barcode[comboGroupName] = tempBarcodes;
1181 Group2Barcode[comboGroupName].push_back(itBar->first);
1184 itGroup2Barcode = Group2Primer.find(comboGroupName);
1185 if (itGroup2Barcode == Group2Primer.end()) {
1186 vector<string> tempPrimers; tempPrimers.push_back(itPrimer->first);
1187 Group2Primer[comboGroupName] = tempPrimers;
1189 Group2Primer[comboGroupName].push_back(itPrimer->first);
1196 if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
1201 catch(exception& e) {
1202 m->errorOut(e, "SRACommand", "readOligos");
1206 //********************************************************************/
1207 string SRACommand::reverseOligo(string oligo){
1209 string reverse = "";
1211 for(int i=oligo.length()-1;i>=0;i--){
1213 if(oligo[i] == 'A') { reverse += 'T'; }
1214 else if(oligo[i] == 'T'){ reverse += 'A'; }
1215 else if(oligo[i] == 'U'){ reverse += 'A'; }
1217 else if(oligo[i] == 'G'){ reverse += 'C'; }
1218 else if(oligo[i] == 'C'){ reverse += 'G'; }
1220 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1221 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1223 else if(oligo[i] == 'M'){ reverse += 'K'; }
1224 else if(oligo[i] == 'K'){ reverse += 'M'; }
1226 else if(oligo[i] == 'W'){ reverse += 'W'; }
1227 else if(oligo[i] == 'S'){ reverse += 'S'; }
1229 else if(oligo[i] == 'B'){ reverse += 'V'; }
1230 else if(oligo[i] == 'V'){ reverse += 'B'; }
1232 else if(oligo[i] == 'D'){ reverse += 'H'; }
1233 else if(oligo[i] == 'H'){ reverse += 'D'; }
1235 else { reverse += 'N'; }
1241 catch(exception& e) {
1242 m->errorOut(e, "SRACommand", "reverseOligo");
1246 //********************************************************************/
1247 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1248 bool SRACommand::checkCasesPlatforms(string& platform){
1250 string original = platform;
1253 //remove users possible case errors
1254 for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
1256 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1258 if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
1259 else { isOkay = false; }
1262 if (platform == "454") { platform = "_LS454"; }
1264 m->mothurOut("[ERROR]: " + original + " is not a valid platform option. Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
1269 catch(exception& e) {
1270 m->errorOut(e, "SRACommand", "checkCasesPlatforms");
1274 //********************************************************************/
1275 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
1276 bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
1278 string original = instrumentModel;
1281 //remove users possible case errors
1282 for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
1284 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1285 if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
1286 if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
1287 else { isOkay = false; }
1289 if (instrumentModel == "454_GS_FLX_TITANIUM") { instrumentModel = "454_GS_FLX_Titanium"; }
1290 if (instrumentModel == "454_GS_JUNIOR") { instrumentModel = "454_GS_Junior"; }
1291 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1293 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
1296 }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
1297 if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
1298 else { isOkay = false; }
1301 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER") { instrumentModel = "Illumina_Genome_Analyzer"; }
1302 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") { instrumentModel = "Illumina_Genome_Analyzer_II"; }
1303 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") { instrumentModel = "Illumina_Genome_Analyzer_IIx"; }
1304 if (instrumentModel == "ILLUMINA_HISEQ_2000") { instrumentModel = "Illumina_HiSeq_2000"; }
1305 if (instrumentModel == "ILLUMINA_HISEQ_1000") { instrumentModel = "Illumina_HiSeq_1000"; }
1306 if (instrumentModel == "ILLUMINA_MISEQ") { instrumentModel = "Illumina_MiSeq"; }
1307 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1309 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
1312 }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
1313 if ((instrumentModel == "ION_TORRENT_PGM") || (instrumentModel == "UNSPECIFIED")) { }
1314 else { isOkay = false; }
1317 if (instrumentModel == "ION_TORRENT_PGM") { instrumentModel = "Ion_Torrent_PGM"; }
1318 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1320 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
1322 }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
1323 if ((instrumentModel == "PACBIO_RS") || (instrumentModel == "UNSPECIFIED")) { }
1324 else { isOkay = false; }
1327 if (instrumentModel == "PACBIO_RS") { instrumentModel = "PacBio_RS"; }
1328 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1330 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
1335 catch(exception& e) {
1336 m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
1340 //**********************************************************************************************************************
1341 //AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER
1342 bool SRACommand::checkCasesLibStrategy(string& libStrategy){
1344 string original = libStrategy;
1347 //remove users possible case errors
1348 for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); }
1350 if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { }
1351 else { isOkay = false; }
1354 if (libStrategy == "RNA-SEQ") { libStrategy = "RNA-Seq"; }
1355 if (libStrategy == "MIRNA-SEQ") { libStrategy = "miRNA-Seq"; }
1356 if (libStrategy == "CHIP-SEQ") { libStrategy = "ChIP-Seq"; }
1357 if (libStrategy == "MNASE-SEQ") { libStrategy = "MNase-Seq"; }
1358 if (libStrategy == "DNASE-HYPERSENSITIVITY") { libStrategy = "DNase-Hypersensitivity"; }
1359 if (libStrategy == "BISULFITE-SEQ") { libStrategy = "Bisulfite-Seq"; }
1360 if (libStrategy == "TN-SEQ") { libStrategy = "Tn-Seq"; }
1361 if (libStrategy == "FL-CDNA") { libStrategy = "FL-cDNA"; }
1362 if (libStrategy == "MRE-SEQ") { libStrategy = "MRE-Seq"; }
1363 if (libStrategy == "MEDIP-SEQ") { libStrategy = "MeDIP-Seq"; }
1365 m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option. Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true;
1370 catch(exception& e) {
1371 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1376 //**********************************************************************************************************************
1377 //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
1378 bool SRACommand::checkCasesLibSource(string& libSource){
1380 string original = libSource;
1383 //remove users possible case errors
1384 for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); }
1386 if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { }
1387 else { isOkay = false; }
1392 m->mothurOut("[ERROR]: " + original + " is not a valid libsource option. Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true;
1397 catch(exception& e) {
1398 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1403 //**********************************************************************************************************************
1404 //PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified
1405 bool SRACommand::checkCasesLibSelection(string& libSelection){
1407 string original = libSelection;
1410 //remove users possible case errors
1411 for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); }
1413 if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { }
1414 else { isOkay = false; }
1417 if (libSelection == "CDNA") { libSelection = "cDNA"; }
1418 if (libSelection == "CHIP") { libSelection = "ChIP"; }
1419 if (libSelection == "MNASE") { libSelection = "MNase"; }
1420 if (libSelection == "DNASE") { libSelection = "DNAse"; }
1421 if (libSelection == "HYBRID_SELECTION") { libSelection = "Hybrid_Selection"; }
1422 if (libSelection == "REDUCED_REPRESENTATION") { libSelection = "Reduced_Representation"; }
1423 if (libSelection == "RESTRICTION_DIGEST") { libSelection = "Restriction_Digest"; }
1424 if (libSelection == "5-METHYLCYTIDINE_ANTIBODY") { libSelection = "5-methylcytidine_antibody"; }
1425 if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") { libSelection = "MBD2_protein_methyl-CpG_binding_domain"; }
1426 if (libSelection == "SIZE_FRACTIONATION") { libSelection = "size_fractionation"; }
1427 if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") { libSelection = "Padlock_probes_capture_method"; }
1428 if (libSelection == "OTHER") { libSelection = "other"; }
1429 if (libSelection == "UNSPECIFIED") { libSelection = "unspecified"; }
1432 m->mothurOut("[ERROR]: " + original + " is not a valid libselection option. Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true;
1437 catch(exception& e) {
1438 m->errorOut(e, "SRACommand", "checkCasesLibSelection");
1442 //**********************************************************************************************************************
1443 //METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER
1444 bool SRACommand::checkCasesDataType(string& dataType){
1446 string original = dataType;
1449 //remove users possible case errors
1450 for (int i = 0; i < dataType.size(); i++) { dataType[i] = toupper(dataType[i]); }
1452 if ((dataType == "METAGENOME") || (dataType == "GENOME_SEQUENCING") || (dataType == "METAGENOMIC_ASSEMBLY") || (dataType == "ASSEMBLY") || (dataType == "TRANSCRIPTOME") || (dataType == "PROTEOMIC") || (dataType == "MAP") || (dataType == "CLONE_ENDS") || (dataType == "TARGETED_LOCI") || (dataType == "RANDOM_SURVEY") || (dataType == "EXOME") || (dataType == "VARIATION") || (dataType == "EPIGENOMICS") || (dataType == "PHENOTYPE") || (dataType == "GENOTYPE") || (dataType == "OTHER")) { }
1453 else { isOkay = false; }
1458 m->mothurOut("[ERROR]: " + original + " is not a valid datatype option. Valid datatype options are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER."); m->mothurOutEndLine(); abort = true;
1463 catch(exception& e) {
1464 m->errorOut(e, "SRACommand", "checkCasesDataType");
1468 //**********************************************************************************************************************
1469 bool SRACommand::sanityCheckMiMarksGroups(){
1473 for (int i = 0; i < Groups.size(); i++) {
1474 if (m->control_pressed) { break; }
1476 map<string, map<string, string> >::iterator it = mimarks.find(Groups[i]);
1477 if (it == mimarks.end()) {
1479 m->mothurOut("[ERROR]: MIMarks file is missing group " + Groups[i] + ", please correct.\n");
1483 if (!isOkay) { m->control_pressed = true; }
1487 catch(exception& e) {
1488 m->errorOut(e, "SRACommand", "sanityCheckMiMarksGroups");
1493 //**********************************************************************************************************************