5 // Created by SarahsWork on 10/28/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "sracommand.h"
10 #include "sffinfocommand.h"
11 #include "parsefastaqcommand.h"
13 //**********************************************************************************************************************
14 vector<string> SRACommand::setParameters(){
16 CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "oligos", "none", "none","",false,false,true); parameters.push_back(poligos);
18 CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
19 CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
20 CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
21 CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
22 //choose only one multiple options
23 CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
24 CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
25 CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
26 CommandParameter pdatatype("datatype", "String", "METAGENOME", "", "", "", "","",false,false); parameters.push_back(pdatatype);
27 CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
28 CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
29 CommandParameter porientation("orientation", "Multiple", "forward-reverse", "forward", "", "", "","",false,false); parameters.push_back(porientation);
30 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
31 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
32 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
33 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
34 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
36 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
37 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
38 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
40 vector<string> myArray;
41 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
45 m->errorOut(e, "SRACommand", "setParameters");
49 //**********************************************************************************************************************
50 string SRACommand::getHelpString(){
52 string helpString = "";
53 helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
54 helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
55 helpString += "The sff parameter is used to provide the original sff file.\n";
56 helpString += "The fastq parameter is used to provide the original fastq file.\n";
57 helpString += "The project parameter is used to provide your project file.\n";
58 helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by. It is required and must contain barcodes and primers, or you must provide a file option. \n";
59 helpString += "The mimark parameter is used to provide your mimarks file. You can create the template for this file using the get.mimarkspackage command.\n";
60 helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
61 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
62 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
63 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
64 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
65 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
66 helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
67 helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
68 helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
69 helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
70 helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
71 helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
72 helpString += "The datatype parameter is used to specify datatype. Default=METAGENOME. Choices are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
73 helpString += "sra(sff=sff=GHL4YHV01.sff, GHL4YHV01.oligos, project=test.project, mimark=MIMarksData.txt)\n";
77 m->errorOut(e, "SRACommand", "getHelpString");
81 //**********************************************************************************************************************
82 string SRACommand::getOutputPattern(string type) {
86 if (type == "xml") { pattern = "[filename],xml"; }
87 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
92 m->errorOut(e, "SRACommand", "getOutputPattern");
96 //**********************************************************************************************************************
97 SRACommand::SRACommand(){
99 abort = true; calledHelp = true;
101 vector<string> tempOutNames;
102 outputTypes["xml"] = tempOutNames;
104 catch(exception& e) {
105 m->errorOut(e, "SRACommand", "SRACommand");
109 //**********************************************************************************************************************
110 SRACommand::SRACommand(string option) {
112 abort = false; calledHelp = false;
113 libLayout = "single"; //controlled vocab
115 //allow user to run help
116 if(option == "help") { help(); abort = true; calledHelp = true; }
117 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
120 //valid paramters for this command
121 vector<string> myArray = setParameters();
123 OptionParser parser(option);
124 map<string,string> parameters = parser.getParameters();
126 ValidParameters validParameter;
127 map<string,string>::iterator it;
128 //check to make sure all parameters are valid for command
129 for (it = parameters.begin(); it != parameters.end(); it++) {
130 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
133 vector<string> tempOutNames;
134 outputTypes["xml"] = tempOutNames;
136 //if the user changes the input directory command factory will send this info to us in the output parameter
137 string inputDir = validParameter.validFile(parameters, "inputdir", false);
138 if (inputDir == "not found"){ inputDir = ""; }
142 it = parameters.find("sff");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["sff"] = inputDir + it->second; }
150 it = parameters.find("fastq");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["fastq"] = inputDir + it->second; }
158 it = parameters.find("file");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["file"] = inputDir + it->second; }
166 it = parameters.find("oligos");
167 //user has given a template file
168 if(it != parameters.end()){
169 path = m->hasPath(it->second);
170 //if the user has not given a path then, add inputdir. else leave path alone.
171 if (path == "") { parameters["oligos"] = inputDir + it->second; }
174 it = parameters.find("project");
175 //user has given a template file
176 if(it != parameters.end()){
177 path = m->hasPath(it->second);
178 //if the user has not given a path then, add inputdir. else leave path alone.
179 if (path == "") { parameters["project"] = inputDir + it->second; }
182 it = parameters.find("mimark");
183 //user has given a template file
184 if(it != parameters.end()){
185 path = m->hasPath(it->second);
186 //if the user has not given a path then, add inputdir. else leave path alone.
187 if (path == "") { parameters["mimark"] = inputDir + it->second; }
191 //check for parameters
192 fastqfile = validParameter.validFile(parameters, "fastq", true);
193 if (fastqfile == "not open") { fastqfile = ""; abort = true; }
194 else if (fastqfile == "not found") { fastqfile = ""; }
196 sfffile = validParameter.validFile(parameters, "sff", true);
197 if (sfffile == "not open") { sfffile = ""; abort = true; }
198 else if (sfffile == "not found") { sfffile = ""; }
200 file = validParameter.validFile(parameters, "file", true);
201 if (file == "not open") { file = ""; abort = true; }
202 else if (file == "not found") { file = ""; }
204 oligosfile = validParameter.validFile(parameters, "oligos", true);
205 if (oligosfile == "not found") { oligosfile = ""; }
206 else if(oligosfile == "not open") { abort = true; }
207 else { m->setOligosFile(oligosfile); }
209 contactfile = validParameter.validFile(parameters, "project", true);
210 if (contactfile == "not found") { contactfile = ""; m->mothurOut("[ERROR]: You must provide a project file before you can use the sra command."); m->mothurOutEndLine(); abort = true; }
211 else if(contactfile == "not open") { abort = true; }
213 mimarksfile = validParameter.validFile(parameters, "mimark", true);
214 if (mimarksfile == "not found") { mimarksfile = ""; m->mothurOut("[ERROR]: You must provide a mimark file before you can use the sra command. You can create a template for this file using the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true; }
215 else if(mimarksfile == "not open") { abort = true; }
217 file = validParameter.validFile(parameters, "file", true);
218 if (file == "not open") { file = ""; abort = true; }
219 else if (file == "not found") { file = ""; }
221 if ((file == "") && (oligosfile == "")) {
222 m->mothurOut("[ERROR]: You must provide an oligos file or file with oligos files in them before you can use the sra command."); m->mothurOutEndLine(); abort = true;
225 if ((fastqfile == "") && (file == "") && (sfffile == "")) {
226 m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
229 //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
230 platform = validParameter.validFile(parameters, "platform", false); if (platform == "not found") { platform = "_LS454"; }
231 if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
233 if (!abort) { //don't check instrument model is platform is bad
234 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
235 instrumentModel = validParameter.validFile(parameters, "instrument", false); if (instrumentModel == "not found") { instrumentModel = "454_GS"; }
236 if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function
238 //turn _ to spaces mothur's work around
239 for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } }
241 libStrategy = validParameter.validFile(parameters, "libstrategy", false); if (libStrategy == "not found") { libStrategy = "AMPLICON"; }
242 if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function
244 //turn _ to spaces mothur's work around
245 for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; } }
247 libSource = validParameter.validFile(parameters, "libsource", false); if (libSource == "not found") { libSource = "METAGENOMIC"; }
248 if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function
250 //turn _ to spaces mothur's work around
251 for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; } }
253 libSelection = validParameter.validFile(parameters, "libselection", false); if (libSelection == "not found") { libSelection = "PCR"; }
254 if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function
256 //turn _ to spaces mothur's work around
257 for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; } }
259 dataType = validParameter.validFile(parameters, "datatype", false); if (dataType == "not found") { dataType = "METAGENOME"; }
260 if (!checkCasesDataType(dataType)) { abort = true; } //error message in function
262 //turn _ to spaces mothur's work around
263 for (int i = 0; i < dataType.length(); i++) { if (dataType[i] == '_') { dataType[i] = ' '; } }
265 orientation = validParameter.validFile(parameters, "orientation", false); if (orientation == "not found") { orientation = "forward"; }
267 if ((orientation == "forward") || (orientation == "reverse")) { }
268 else { m->mothurOut("[ERROR]: " + orientation + " is not a valid orientation option. Choices are: forward and reverse.\n"); m->mothurOutEndLine(); abort = true; }
271 string temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
272 m->mothurConvert(temp, bdiffs);
274 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; }
275 m->mothurConvert(temp, pdiffs);
277 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
278 m->mothurConvert(temp, ldiffs);
280 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
281 m->mothurConvert(temp, sdiffs);
283 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
284 m->mothurConvert(temp, tdiffs);
286 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
291 catch(exception& e) {
292 m->errorOut(e, "SRACommand", "SRACommand");
296 //**********************************************************************************************************************
297 int SRACommand::execute(){
300 if (abort == true) { if (calledHelp) { return 0; } return 2; }
304 if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
306 if (m->control_pressed) { return 0; }
309 map<string, vector<string> > filesBySample;
312 if (file != "") { readFile(filesBySample); }
313 else if (sfffile != "") { parseSffFile(filesBySample); }
314 else if (fastqfile != "") { parseFastqFile(filesBySample); }
316 sanityCheckMiMarksGroups();
318 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
319 checkGroups(filesBySample);
322 string thisOutputDir = outputDir;
323 if (outputDir == "") { thisOutputDir += m->hasPath(inputfile); }
324 map<string, string> variables;
325 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile));
326 string outputFileName = getOutputFileName("xml", variables);
327 outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName);
329 m->openOutputFile(outputFileName, out);
332 ////////////////////////////////////////////////////////
333 out << "<Submission>\n";
334 out << "\t<Description>\n";
335 out << "\t\t<Comment> New Submission. Generated by mothur version " + m->getVersion() + " </Comment> \n";
336 out << "\t\t<Submitter user_name=\"" + submissionName + "\"/>\n";
337 out << "\t\t<Organization type=\"" + centerType + "\">\n";
338 out << "\t\t<Name>" + centerName + "</Name>\n";
339 out << "\t\t<Contact> email=\"" + email + "\">\n";
340 out << "\t\t\t<Name>\n";
341 out << "\t\t\t\t<First>" + firstName + "</First>\n";
342 out << "\t\t\t\t<Last>" + firstName + "</Last>\n";
343 out << "\t\t\t</Name>\n";
344 out << "\t\t</Contact>\n";
345 out << "\t\t</Organization>\n";
346 out << "\t</Description>\n";
347 ////////////////////////////////////////////////////////
350 ////////////////////////////////////////////////////////
351 out << "\t<Action>\n";
352 out << "\t\t<AddData target_db=\"BioProject\">\n";
353 out << "\t\t\t<Data content_type=\"XML\">\n";
354 out << "\t\t\t\t<XmlContent>\n";
355 out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
356 out << "\t\t\t\t\t\t<ProjectID>\n";
357 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
358 out << "\t\t\t\t\t\t</ProjectID>\n";
359 out << "\t\t\t\t\t\t<Descriptor>\n";
360 out << "\t\t\t\t\t\t\t<Title>" + projectTitle + " </Title> \n";
361 out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
363 out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
364 out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
365 out << "\t\t\t\t\t\t\t</ExternalLink>\n";
367 out << "\t\t\t\t\t\t</Descriptor>\n";
368 out << "\t\t\t\t\t\t<ProjectType>\n";
369 out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eEnvironment\">\n";
370 out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
371 out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n";
372 out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
373 out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
374 out << "\t\t\t\t\t\t</ProjectType>\n";
375 out << "\t\t\t\t\t</Project>\n";
376 out << "\t\t\t\t</XmlContent>\n";
377 out << "\t\t\t</Data>\n";
378 out << "\t\t\t<Identifier>\n";
379 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
380 out << "\t\t\t</Identifier>\n";
381 out << "\t\t</AddData>\n";
382 out << "\t</Action>\n";
383 ////////////////////////////////////////////////////////
386 ////////////////////////////////////////////////////////
387 for (int i = 0; i < Groups.size(); i++) {
389 string barcodeForThisSample = Group2Barcode[Groups[i]];
391 if (m->control_pressed) { break; }
392 out << "\t<Action>\n";
393 out << "\t\t<AddData target_db=\"BioSample\">\n";
394 out << "\t\t\t<Data content_type=\"XML\">\n";
395 out << "\t\t\t\t<XmlContent>\n";
396 out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
397 out << "\t\t\t\t\t\t<SampleId>\n";
398 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID> \n";
399 out << "\t\t\t\t\t\t</SampleId>\n";
400 out << "\t\t\t\t\t\t<Organism>\n";
401 string organismName = "metagenome";
402 map<string, string>::iterator itOrganism = Group2Organism.find(Groups[i]);
403 if (itOrganism != Group2Organism.end()) { organismName = itOrganism->second; } //user supplied acceptable organism, so use it.
404 out << "\t\t\t\t\t\t\t<OrganismName>" + organismName + " </OrganismName> \n";
405 out << "\t\t\t\t\t\t</Organism>\n";
406 out << "\t\t\t\t\t\t<Package>" + packageType + "</Package>n";
407 out << "\t\t\t\t\t\t<Attributes>n";
408 //add biosample required attributes
409 map<string, map<string, string> >:: iterator it = mimarks.find(Groups[i]);
410 if (it != mimarks.end()) {
411 map<string, string> categories = it->second;
412 for (map<string, string>:: iterator it2 = categories.begin(); it2 != categories.end(); it2++) {
413 if (m->control_pressed) { break; }
414 out << "\t\t\t\t\t\t\t<Attribute attribute_name=\"" + it2->first + "\">\"" + it2->second + "\"</Attribute>\n";
417 out << "\t\t\t\t\t\t</Attributes>n";
418 out << "\t\t\t\t\t</BioSample>\n";
419 out << "\t\t\t\t</XmlContent>\n";
420 out << "\t\t\t</Data>\n";
421 out << "\t\t\t<Identifier>\n";
422 out << "\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
423 out << "\t\t\t</Identifier>\n";
424 out << "\t\t</AddData>\n";
425 out << "\t</Action>\n";
429 ////////////////////////////////////////////////////////
430 for (int i = 0; i < Groups.size(); i++) {
432 vector<string> thisGroupsFiles = filesBySample[Groups[i]];
433 string barcodeForThisSample = Group2Barcode[Groups[i]];
435 for (int j = 0; j < thisGroupsFiles.size(); j++) {
436 string libId = thisGroupsFiles[j] + "." + barcodeForThisSample;
438 if (m->control_pressed) { break; }
439 out << "\t<Action>\n";
440 out << "\t\t<AddFiles target_db=\"SRA\">\n";
441 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
442 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
443 libId = pieces[0] + barcodeForThisSample;
444 out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
445 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
446 out << "\t\t\t</File>\n";
447 vector<string> thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]], thisBarcodes, '.');
448 string forwardBarcode = thisBarcodes[0];
449 string reverseBarcode = thisBarcodes[1];
450 vector<string> thisPrimers; m->splitAtChar(Group2Primer[Groups[i]], thisPrimers, '.');
451 string forwardPrimer = thisPrimers[0];
452 string reversePrimer = thisPrimers[1];
454 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
455 out << "\t\t\t<Attribute name=\"BarCode\">" + forwardBarcode + "</Attribute>\n";
456 out << "\t\t\t<Attribute name=\"primer\">" + forwardPrimer + "</Attribute>\n";
457 out << "\t\t\t<Attribute name=\"read_type\">forward</Attribute>\n";
458 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
459 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
460 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
461 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
462 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
463 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
464 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
466 out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
467 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
468 out << "\t\t\t</File>\n";
469 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
470 out << "\t\t\t<Attribute name=\"BarCode\">" + reverseBarcode + "</Attribute>\n";
471 out << "\t\t\t<Attribute name=\"primer\">" + reversePrimer + "</Attribute>\n";
472 out << "\t\t\t<Attribute name=\"read_type\">reverse</Attribute>\n";
473 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
474 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
475 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
476 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
477 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
478 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
479 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
482 out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
483 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
484 out << "\t\t\t</File>\n";
486 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
487 out << "\t\t\t<Attribute name=\"BarCode\">" + Group2Barcode[Groups[i]] + "</Attribute>\n";
488 out << "\t\t\t<Attribute name=\"primer\">" + Group2Primer[Groups[i]] + "</Attribute>\n";
489 out << "\t\t\t<Attribute name=\"read_type\">" + orientation + "</Attribute>\n";
490 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
491 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
492 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
493 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
494 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
495 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
496 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
499 ///////////////////bioProject info
500 out << "\t\t\t<AttributeRefId name=\"BioProject\">\n";
501 out << "\t\t\t\t<RefId>\n";
502 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
503 out << "\t\t\t\t</RefId>\n";
504 out << "\t\t\t</AttributeRefId>\n";
505 //////////////////bioSample info
506 out << "\t\t\t<AttributeRefId name=\"BioSample\">\n";
507 out << "\t\t\t\t<RefId>\n";
508 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
509 out << "\t\t\t\t</RefId>\n";
510 out << "\t\t\t</AttributeRefId>\n";
512 out << "\t\t\t<Identifier>\n";
513 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
514 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
515 libId = pieces[0] + barcodeForThisSample;
517 out << "\t\t\t\t<LocalId>" + libId + " </LocalId>\n";
518 out << "\t\t\t</Identifier>\n";
519 out << "\t\t</AddFiles>\n";
520 out << "\t</Action>\n";
523 out << "</Submission>\n";
526 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
528 //output files created by command
529 m->mothurOutEndLine();
530 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
531 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
532 m->mothurOutEndLine();
536 catch(exception& e) {
537 m->errorOut(e, "SRACommand", "SRACommand");
541 //**********************************************************************************************************************
542 int SRACommand::readContactFile(){
544 lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; website = ""; projectName = "";
545 projectTitle = ""; grantAgency = ""; grantId = ""; grantTitle = "";
548 m->openInputFile(contactfile, in);
552 if (m->control_pressed) { break; }
555 in >> key; m->gobble(in);
556 value = m->getline(in); m->gobble(in);
558 for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
560 if (key == "USERNAME") { submissionName = value; }
561 else if (key == "LAST") { lastName = value; }
562 else if (key == "FIRST") { firstName = value; }
563 else if (key == "EMAIL") { email = value; }
564 else if (key == "CENTER") { centerName = value; }
565 else if (key == "TYPE") {
567 for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
568 if ((centerType == "consortium") || (centerType == "center") || (centerType == "institute") || (centerType == "lab")) {}
569 else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option. Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
570 }else if (key == "DESCRIPTION") { description = value; }
571 else if (key == "WEBSITE") { website = value; }
572 else if (key == "PROJECTNAME") { projectName = value; }
573 else if (key == "PROJECTTITLE") { projectTitle = value; }
574 else if (key == "GRANTID") { grantId = value; }
575 else if (key == "GRANTTITLE") { grantTitle = value; }
576 else if (key == "GRANTAGENCY") { grantAgency = value; }
580 if (lastName == "") { m->mothurOut("[ERROR]: missing last name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
581 if (firstName == "") { m->mothurOut("[ERROR]: missing first name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
582 if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
583 if (email == "") { m->mothurOut("[ERROR]: missing email from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
584 if (centerName == "") { m->mothurOut("[ERROR]: missing center name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
585 if (centerType == "") { m->mothurOut("[ERROR]: missing center type from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
586 if (description == "") { m->mothurOut("[ERROR]: missing description from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
587 if (projectTitle == "") { m->mothurOut("[ERROR]: missing project title from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
588 if (projectName == "") { m->mothurOut("[ERROR]: missing project name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
592 catch(exception& e) {
593 m->errorOut(e, "SRACommand", "readContactFile");
597 //**********************************************************************************************************************
598 //air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water
599 //all packages require: *sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon
601 //host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, plant_associated: *host
602 //microbial, sediment, soil: *depth *elev
604 int SRACommand::readMIMarksFile(){
606 //acceptable organisms
607 vector<string> acceptableOrganisms;
608 bool organismError = false;
610 acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("air metagenome"); acceptableOrganisms.push_back("anaerobic digester metagenome"); acceptableOrganisms.push_back("ant fungus garden metagenome"); acceptableOrganisms.push_back("aquatic metagenome"); acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("beach sand metagenome"); acceptableOrganisms.push_back("biofilm metagenome"); acceptableOrganisms.push_back("biofilter metagenome"); acceptableOrganisms.push_back("biogas fermenter metagenome"); acceptableOrganisms.push_back("bioreactor metagenome"); acceptableOrganisms.push_back("bioreactor sludge metagenome"); acceptableOrganisms.push_back("clinical metagenome"); acceptableOrganisms.push_back("coal metagenome"); acceptableOrganisms.push_back("compost metagenome"); acceptableOrganisms.push_back("dust metagenome"); acceptableOrganisms.push_back("fermentation metagenome"); acceptableOrganisms.push_back("food fermentation metagenome"); acceptableOrganisms.push_back("food metagenome"); acceptableOrganisms.push_back("freshwater metagenome"); acceptableOrganisms.push_back("freshwater sediment metagenome"); acceptableOrganisms.push_back("groundwater metagenome"); acceptableOrganisms.push_back("halite metagenome"); acceptableOrganisms.push_back("hot springs metagenome"); acceptableOrganisms.push_back("hydrocarbon metagenome"); acceptableOrganisms.push_back("hydrothermal vent metagenome"); acceptableOrganisms.push_back("hypersaline lake metagenome"); acceptableOrganisms.push_back("ice metagenome"); acceptableOrganisms.push_back("indoor metagenome"); acceptableOrganisms.push_back("industrial waste metagenome"); acceptableOrganisms.push_back("mangrove metagenome"); acceptableOrganisms.push_back("marine metagenome"); acceptableOrganisms.push_back("marine sediment metagenome"); acceptableOrganisms.push_back("microbial mat metagenome"); acceptableOrganisms.push_back("mine drainage metagenome"); acceptableOrganisms.push_back("mixed culture metagenome"); acceptableOrganisms.push_back("oil production facility metagenome"); acceptableOrganisms.push_back("paper pulp metagenome"); acceptableOrganisms.push_back("permafrost metagenome"); acceptableOrganisms.push_back("plastisphere metagenome"); acceptableOrganisms.push_back("power plant metagenome"); acceptableOrganisms.push_back("retting rhizosphere metagenome"); acceptableOrganisms.push_back("rock metagenome"); acceptableOrganisms.push_back("salt lake metagenome"); acceptableOrganisms.push_back("saltern metagenome"); acceptableOrganisms.push_back("sediment metagenome"); acceptableOrganisms.push_back("snow metagenome"); acceptableOrganisms.push_back("soil metagenome"); acceptableOrganisms.push_back("stromatolite metagenome"); acceptableOrganisms.push_back("terrestrial metagenome"); acceptableOrganisms.push_back("tomb wall metagenome"); acceptableOrganisms.push_back("wastewater metagenome"); acceptableOrganisms.push_back("wetland metagenome"); acceptableOrganisms.push_back("whale fall metagenome");
612 acceptableOrganisms.push_back("algae metagenome"); acceptableOrganisms.push_back("ant metagenome"); acceptableOrganisms.push_back("bat metagenome"); acceptableOrganisms.push_back("beetle metagenome"); acceptableOrganisms.push_back("bovine gut metagenome"); acceptableOrganisms.push_back("bovine metagenome"); acceptableOrganisms.push_back("chicken gut metagenome"); acceptableOrganisms.push_back("coral metagenome"); acceptableOrganisms.push_back("echinoderm metagenome"); acceptableOrganisms.push_back("endophyte metagenome"); acceptableOrganisms.push_back("epibiont metagenome"); acceptableOrganisms.push_back("fish metagenome"); acceptableOrganisms.push_back("fossil metagenome"); acceptableOrganisms.push_back("gill metagenome"); acceptableOrganisms.push_back("gut metagenome"); acceptableOrganisms.push_back("honeybee metagenome"); acceptableOrganisms.push_back("human gut metagenome"); acceptableOrganisms.push_back("human lung metagenome"); acceptableOrganisms.push_back("human metagenome"); acceptableOrganisms.push_back("human nasal/pharyngeal metagenome"); acceptableOrganisms.push_back("human oral metagenome"); acceptableOrganisms.push_back("human skin metagenome"); acceptableOrganisms.push_back("insect gut metagenome"); acceptableOrganisms.push_back("insect metagenome"); acceptableOrganisms.push_back("mollusc metagenome"); acceptableOrganisms.push_back("mosquito metagenome"); acceptableOrganisms.push_back("mouse gut metagenome"); acceptableOrganisms.push_back("mouse metagenome"); acceptableOrganisms.push_back("mouse skin metagenome"); acceptableOrganisms.push_back("nematode metagenome"); acceptableOrganisms.push_back("oral metagenome"); acceptableOrganisms.push_back("phyllosphere metagenome"); acceptableOrganisms.push_back("pig metagenome"); acceptableOrganisms.push_back("plant metagenome"); acceptableOrganisms.push_back("primate metagenome"); acceptableOrganisms.push_back("rat metagenome"); acceptableOrganisms.push_back("root metagenome"); acceptableOrganisms.push_back("sea squirt metagenome"); acceptableOrganisms.push_back("seed metagenome"); acceptableOrganisms.push_back("shoot metagenome"); acceptableOrganisms.push_back("skin metagenome"); acceptableOrganisms.push_back("snake metagenome"); acceptableOrganisms.push_back("sponge metagenome"); acceptableOrganisms.push_back("stomach metagenome"); acceptableOrganisms.push_back("symbiont metagenome"); acceptableOrganisms.push_back("termite gut metagenome"); acceptableOrganisms.push_back("termite metagenome"); acceptableOrganisms.push_back("upper respiratory tract metagenome"); acceptableOrganisms.push_back("urine metagenome"); acceptableOrganisms.push_back("viral metagenome"); acceptableOrganisms.push_back("wallaby gut metagenome"); acceptableOrganisms.push_back("wasp metagenome"); acceptableOrganisms.push_back("sythetic metagenome"); acceptableOrganisms.push_back("metagenome");
614 vector<string> requiredFieldsForPackage;
615 requiredFieldsForPackage.push_back("sample_name"); requiredFieldsForPackage.push_back("organism");
616 requiredFieldsForPackage.push_back("collection_date"); requiredFieldsForPackage.push_back("biome");
617 requiredFieldsForPackage.push_back("feature"); requiredFieldsForPackage.push_back("material");
618 requiredFieldsForPackage.push_back("geo_loc_name"); requiredFieldsForPackage.push_back("lat_lon");
619 requiredFieldsForPackage.push_back("seq_methods"); requiredFieldsForPackage.push_back("title");
620 vector<string> chooseAtLeastOneForPackage;
623 m->openInputFile(mimarksfile, in);
626 string temp; packageType = "";
629 if (m->control_pressed) { break; }
630 temp = m->getline(in); m->gobble(in);
632 if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
634 if (temp[0] == '#') {
635 int pos = temp.find("Environmental");
636 if (pos != string::npos) {
637 for (int i = pos+14; i < temp.length(); i++) {
638 if (!isspace(temp[i])) { packageType += temp[i]; }
639 else { i+= temp.length(); }
643 else{ break; } //hit headers line
646 vector<string> headers; m->splitAtChar(temp, headers, '\t');
647 m->removeBlanks(headers);
648 //remove * from required's
649 for (int i = 0; i < headers.size(); i++) {
650 if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); }
651 if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); chooseAtLeastOneForPackage.push_back(headers[i]); } //secondary condition
652 if (m->debug) { m->mothurOut("[DEBUG]: " + headers[i] + "\n"); }
655 if (m->debug) { m->mothurOut("[DEBUG]: packageType = '" + packageType + "'\n"); }
657 //check to make sure package has all its required parts
658 //MIMARKS.specimen.water.3.0
659 if (packageType == "MIMARKS.specimen.air.3.0") { requiredFieldsForPackage.push_back("altitude"); }
660 else if ((packageType == "MIMARKS.specimen.host-associated.3.0") || (packageType == "MIMARKS.specimen.human-associated.3.0") || (packageType == "MIMARKS.specimen.human-gut.3.0") || (packageType == "MIMARKS.specimen.human-oral.3.0") || (packageType == "MIMARKS.specimen.human-skin.3.0") || (packageType == "MIMARKS.specimen.human-vaginal.3.0") || (packageType == "MIMARKS.specimen.plant-associated.3.0")) { requiredFieldsForPackage.push_back("host"); }
661 else if ((packageType == "MIMARKS.specimen.microbial.3.0") || (packageType == "MIMARKS.specimen.sediment.3.0") || (packageType == "soil")) { requiredFieldsForPackage.push_back("depth"); requiredFieldsForPackage.push_back("elev"); }
662 else if (packageType == "MIMARKS.specimen.water.3.0") { requiredFieldsForPackage.push_back("depth"); }
663 else if ((packageType == "MIMARKS.specimen.miscellaneous.3.0") || (packageType == "wastewater")) { }
665 m->mothurOut("[ERROR]: unknown package " + packageType + ", please correct.\n"); m->control_pressed = true; in.close(); return 0;
668 if (!m->isSubset(headers, requiredFieldsForPackage)){
669 string requiredFields = "";
670 for (int i = 0; i < requiredFieldsForPackage.size()-1; i++) { requiredFields += requiredFieldsForPackage[i] + ", "; } requiredFields += requiredFieldsForPackage[requiredFieldsForPackage.size()-1];
671 m->mothurOut("[ERROR]: missing required fields for package, please correct. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
674 if (m->debug) { m->mothurOut("[DEBUG]: chooseAtLeastOneForPackage.size() = " + toString(chooseAtLeastOneForPackage.size()) + "\n"); }
676 if (!m->inUsersGroups(chooseAtLeastOneForPackage, headers)){ //returns true if any of the choose at least ones are in headers
677 string requiredFields = "";
678 for (int i = 0; i < chooseAtLeastOneForPackage.size()-1; i++) { requiredFields += chooseAtLeastOneForPackage[i] + ", "; cout << chooseAtLeastOneForPackage[i] << endl; }
679 if (chooseAtLeastOneForPackage.size() < 1) { requiredFields += chooseAtLeastOneForPackage[chooseAtLeastOneForPackage.size()-1]; }
680 m->mothurOut("[ERROR]: missing a choose at least one fields for the package, please correct. These are marked with '**'. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
683 map<string, bool> allNA; for (int i = 1; i < headers.size(); i++) { allNA[headers[i]] = true; }
686 if (m->control_pressed) { break; }
688 temp = m->getline(in); m->gobble(in);
690 if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
692 string original = temp;
693 vector<string> linePieces; m->splitAtChar(temp, linePieces, '\t');
694 m->removeBlanks(linePieces);
696 if (linePieces.size() != headers.size()) { m->mothurOut("[ERROR]: line: " + original + " contains " + toString(linePieces.size()) + " columns, but you have " + toString(headers.size()) + " column headers, please correct.\n"); m->control_pressed = true; }
698 map<string, map<string, string> >:: iterator it = mimarks.find(linePieces[0]);
700 if (it == mimarks.end()) {
701 map<string, string> categories;
702 //start after *sample_name
703 for (int i = 1; i < headers.size(); i++) {
704 categories[headers[i]] = linePieces[i];
705 //check the users inputs for appropriate organisms
706 if (headers[i] == "organism") {
707 if (!m->inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism
708 organismError = true;
709 m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to metagenome. You can correct the issue and rerun the command, or NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i];
711 Group2Organism[linePieces[0]] = linePieces[i];
713 if (linePieces[i] != "NA") { allNA[headers[i]] = false; }
716 //does this sample already match an existing sample?
717 bool isOkaySample = true;
718 for (map<string, map<string, string> >:: iterator it2 = mimarks.begin(); it2 != mimarks.end(); it2++) {
719 if (m->control_pressed) { break; }
721 for (int i = 1; i < headers.size(); i++) {
722 if ((it2->second)[headers[i]] != categories[headers[i]]) { allSame = false; }
724 if (allSame) { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sample to " + it2->first + ". It has all the same attributes in the MIMarks file. Samples must have distinguishing features to be uploaded to the NCBI library, please correct.\n"); m->control_pressed = true; isOkaySample = false; }
726 if (isOkaySample) { mimarks[linePieces[0]] = categories; }
728 m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sampleName. Sample names must be unique, please correct.\n"); m->control_pressed = true;
734 //add in values for "scrap" group
735 map<string, string> categories;
736 //start after *sample_name
737 for (int i = 1; i < headers.size(); i++) {
738 categories[headers[i]] = "NA";
739 if (headers[i] == "organism") { categories[headers[i]] = "metagenome"; }
740 if (headers[i] == "seq_methods") { categories[headers[i]] = "these sequences were scrapped"; }
741 if (headers[i] == "title") { categories[headers[i]] = "these sequences were scrapped"; }
743 mimarks["scrap"] = categories;
744 Group2Organism["scrap"] = "metagenome";
747 string organismTypes = "";
748 for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; }
749 organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1];
750 m->mothurOut("[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n");
755 catch(exception& e) {
756 m->errorOut(e, "SRACommand", "readMIMarksFile");
761 //**********************************************************************************************************************
762 // going to have to rework this to allow for other options --
772 fastqfile1 oligosfile1
773 fastqfile2 oligosfile2
778 fastqfile fastqfile group
779 fastqfile fastqfile group
780 fastqfile fastqfile group
785 int SRACommand::readFile(map<string, vector<string> >& files){
787 //vector<string> theseFiles;
792 m->openInputFile(file, in);
796 if (m->control_pressed) { return 0; }
798 string line = m->getline(in); m->gobble(in);
799 vector<string> pieces = m->splitWhiteSpace(line);
802 string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
803 if (pieces.size() == 2) {
804 thisFileName1 = pieces[0];
805 thisFileName2 = pieces[1];
806 }else if (pieces.size() == 3) {
807 thisFileName1 = pieces[1];
808 thisFileName2 = pieces[2];
809 string group = pieces[0];
810 libLayout = "paired";
812 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
815 if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
817 //check to make sure both are able to be opened
819 int openForward = m->openInputFile(thisFileName1, in2, "noerror");
821 //if you can't open it, try default location
822 if (openForward == 1) {
823 if (m->getDefaultPath() != "") { //default path is set
824 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
825 m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
827 openForward = m->openInputFile(tryPath, in3, "noerror");
829 thisFileName1 = tryPath;
833 //if you can't open it, try output location
834 if (openForward == 1) {
835 if (m->getOutputDir() != "") { //default path is set
836 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
837 m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
839 openForward = m->openInputFile(tryPath, in4, "noerror");
840 thisFileName1 = tryPath;
845 if (openForward == 1) { //can't find it
846 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
847 }else{ in2.close(); }
852 openReverse = m->openInputFile(thisFileName2, in3, "noerror");
854 //if you can't open it, try default location
855 if (openReverse == 1) {
856 if (m->getDefaultPath() != "") { //default path is set
857 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
858 m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
860 openReverse = m->openInputFile(tryPath, in3, "noerror");
862 thisFileName2 = tryPath;
866 //if you can't open it, try output location
867 if (openReverse == 1) {
868 if (m->getOutputDir() != "") { //default path is set
869 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
870 m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
872 openReverse = m->openInputFile(tryPath, in4, "noerror");
873 thisFileName2 = tryPath;
878 if (openReverse == 1) { //can't find it
879 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
880 }else{ in3.close(); }
883 if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
885 int pos = thisFileName1.find(".sff");
886 if (pos != string::npos) {//these files are sff files
888 sfffile = thisFileName1; oligosfile = thisFileName2;
889 if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
891 if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
893 if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + sfffile + "\n"); }
896 fastqfile = thisFileName1; oligosfile = thisFileName2;
897 if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
899 if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
900 parseFastqFile(files);
901 if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + fastqfile + "\n"); }
904 }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
905 map<string, vector<string> >::iterator it = files.find(group);
906 if (it == files.end()) {
907 vector<string> temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp;
909 files[group].push_back(thisFileName1 + " " + thisFileName2);
919 catch(exception& e) {
920 m->errorOut(e, "SRACommand", "readFile");
924 //**********************************************************************************************************************
925 int SRACommand::parseSffFile(map<string, vector<string> >& files){
927 vector<string> theseFiles;
929 libLayout = "single"; //controlled vocab
932 //run sffinfo to parse sff file into individual sampled sff files
933 string commandString = "sff=" + sfffile;
935 commandString += ", oligos=" + oligosfile;
936 //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
937 if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
938 if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
939 if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
940 if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
941 if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
943 m->mothurOutEndLine();
944 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
945 m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
946 m->mothurCalling = true;
948 Command* sffinfoCommand = new SffInfoCommand(commandString);
949 sffinfoCommand->execute();
951 map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
952 map<string, vector<string> >::iterator it = filenames.find("sff");
953 if (it != filenames.end()) { theseFiles = it->second; }
954 else { m->control_pressed = true; } // error in sffinfo
956 delete sffinfoCommand;
957 m->mothurCalling = false;
958 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
960 mapGroupToFile(files, theseFiles);
964 catch(exception& e) {
965 m->errorOut(e, "SRACommand", "readFile");
970 //**********************************************************************************************************************
971 int SRACommand::parseFastqFile(map<string, vector<string> >& files){
973 vector<string> theseFiles;
974 inputfile = fastqfile;
975 libLayout = "single"; //controlled vocab
977 //run sffinfo to parse sff file into individual sampled sff files
978 string commandString = "fastq=" + fastqfile;
980 commandString += ", oligos=" + oligosfile;
981 //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
982 if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
983 if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
984 if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
985 if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
986 if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
988 m->mothurOutEndLine();
989 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
990 m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
991 m->mothurCalling = true;
993 Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
994 fastqinfoCommand->execute();
996 map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
997 map<string, vector<string> >::iterator it = filenames.find("fastq");
998 if (it != filenames.end()) { theseFiles = it->second; }
999 else { m->control_pressed = true; } // error in sffinfo
1001 delete fastqinfoCommand;
1002 m->mothurCalling = false;
1003 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1005 mapGroupToFile(files, theseFiles);
1009 catch(exception& e) {
1010 m->errorOut(e, "SRACommand", "readFile");
1014 //***************************************************************************************************************
1015 //maps group to file
1016 int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
1019 for (int i = 0; i < Groups.size(); i++) {
1022 for (int j = 0; j < theseFiles.size(); j++) {
1023 int pos = theseFiles[j].find(Groups[i]);
1024 if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
1025 if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
1031 if(matches.size() == 1) {
1032 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1033 if (it == files.end()) {
1034 vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
1036 files[Groups[i]].push_back(theseFiles[*matches.begin()]);
1042 catch(exception& e) {
1043 m->errorOut(e, "SRACommand", "checkGroups");
1048 //***************************************************************************************************************
1049 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
1050 int SRACommand::checkGroups(map<string, vector<string> >& files){
1052 vector<string> newGroups;
1053 for (int i = 0; i < Groups.size(); i++) {
1055 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1056 //no files for this group, remove it
1057 if (it == files.end()) { }
1058 else { newGroups.push_back(Groups[i]); }
1065 catch(exception& e) {
1066 m->errorOut(e, "SRACommand", "checkGroups");
1070 //***************************************************************************************************************
1071 int SRACommand::readOligos(){
1074 m->openInputFile(oligosfile, inOligos);
1076 string type, oligo, roligo, group;
1077 bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
1078 map<int, oligosPair> pairedBarcodes;
1079 map<int, oligosPair> pairedPrimers;
1080 map<string, int> barcodes;
1081 map<string, int> primers;
1082 vector<string> linker;
1083 vector<string> spacer, revPrimer;
1084 int indexPrimer = 0;
1085 int indexBarcode = 0;
1086 int indexPairedPrimer = 0;
1087 int indexPairedBarcode = 0;
1088 set<string> uniquePrimers;
1089 set<string> uniqueBarcodes;
1091 while(!inOligos.eof()){
1095 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1098 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1099 m->gobble(inOligos);
1102 m->gobble(inOligos);
1103 //make type case insensitive
1104 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1108 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1110 for(int i=0;i<oligo.length();i++){
1111 oligo[i] = toupper(oligo[i]);
1112 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1115 if(type == "FORWARD"){
1118 // get rest of line in case there is a primer name
1119 while (!inOligos.eof()) {
1120 char c = inOligos.get();
1121 if (c == 10 || c == 13 || c == -1){ break; }
1122 else if (c == 32 || c == 9){;} //space or tab
1123 else { group += c; }
1126 //check for repeat barcodes
1127 map<string, int>::iterator itPrime = primers.find(oligo);
1128 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1130 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1132 primers[oligo] = indexPrimer; indexPrimer++;
1133 primerNameVector.push_back(group);
1135 else if (type == "PRIMER"){
1136 m->gobble(inOligos);
1140 for(int i=0;i<roligo.length();i++){
1141 roligo[i] = toupper(roligo[i]);
1142 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1144 roligo = reverseOligo(roligo);
1148 // get rest of line in case there is a primer name
1149 while (!inOligos.eof()) {
1150 char c = inOligos.get();
1151 if (c == 10 || c == 13 || c == -1){ break; }
1152 else if (c == 32 || c == 9){;} //space or tab
1153 else { group += c; }
1156 oligosPair newPrimer(oligo, roligo);
1158 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1160 //check for repeat barcodes
1161 string tempPair = oligo+roligo;
1162 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1163 else { uniquePrimers.insert(tempPair); }
1165 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1167 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1168 primerNameVector.push_back(group);
1171 else if(type == "REVERSE"){
1172 //Sequence oligoRC("reverse", oligo);
1173 //oligoRC.reverseComplement();
1174 string oligoRC = reverseOligo(oligo);
1175 revPrimer.push_back(oligoRC);
1177 else if(type == "BARCODE"){
1180 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1181 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1184 while (!inOligos.eof()) {
1185 char c = inOligos.get();
1186 if (c == 10 || c == 13 || c == -1){ break; }
1187 else if (c == 32 || c == 9){;} //space or tab
1191 //then this is illumina data with 4 columns
1193 hasPairedBarcodes = true;
1194 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1197 for(int i=0;i<reverseBarcode.length();i++){
1198 reverseBarcode[i] = toupper(reverseBarcode[i]);
1199 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1202 reverseBarcode = reverseOligo(reverseBarcode);
1203 oligosPair newPair(oligo, reverseBarcode);
1205 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1206 //check for repeat barcodes
1207 string tempPair = oligo+reverseBarcode;
1208 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1209 else { uniqueBarcodes.insert(tempPair); }
1211 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1212 barcodeNameVector.push_back(group);
1214 //check for repeat barcodes
1215 map<string, int>::iterator itBar = barcodes.find(oligo);
1216 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1218 barcodes[oligo]=indexBarcode; indexBarcode++;
1219 barcodeNameVector.push_back(group);
1221 }else if(type == "LINKER"){
1222 linker.push_back(oligo);
1223 }else if(type == "SPACER"){
1224 spacer.push_back(oligo);
1226 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1228 m->gobble(inOligos);
1232 if (hasPairedBarcodes || hasPrimer) {
1233 pairedOligos = true;
1234 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1238 //add in potential combos
1239 if(barcodeNameVector.size() == 0){
1240 barcodeNameVector.push_back("");
1243 if(primerNameVector.size() == 0){
1244 primerNameVector.push_back("");
1247 set<string> uniqueNames; //used to cleanup outputFileNames
1249 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1250 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1252 string primerName = primerNameVector[itPrimer->first];
1253 string barcodeName = barcodeNameVector[itBar->first];
1255 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1257 string comboGroupName = "";
1258 string fastqFileName = "";
1260 if(primerName == ""){
1261 comboGroupName = barcodeNameVector[itBar->first];
1264 if(barcodeName == ""){
1265 comboGroupName = primerNameVector[itPrimer->first];
1268 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1271 uniqueNames.insert(comboGroupName);
1272 Group2Barcode[comboGroupName] = (itBar->second).forward+"."+(itBar->second).reverse;
1273 Group2Primer[comboGroupName] = (itPrimer->second).forward+"."+(itPrimer->second).reverse;
1278 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1279 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1281 string primerName = primerNameVector[itPrimer->second];
1282 string barcodeName = barcodeNameVector[itBar->second];
1284 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1286 string comboGroupName = "";
1287 string fastqFileName = "";
1289 if(primerName == ""){
1290 comboGroupName = barcodeNameVector[itBar->second];
1293 if(barcodeName == ""){
1294 comboGroupName = primerNameVector[itPrimer->second];
1297 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1300 uniqueNames.insert(comboGroupName);
1301 Group2Barcode[comboGroupName] = itBar->first;
1302 Group2Primer[comboGroupName] = itPrimer->first;
1309 if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
1311 for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
1316 catch(exception& e) {
1317 m->errorOut(e, "SRACommand", "readOligos");
1321 //********************************************************************/
1322 string SRACommand::reverseOligo(string oligo){
1324 string reverse = "";
1326 for(int i=oligo.length()-1;i>=0;i--){
1328 if(oligo[i] == 'A') { reverse += 'T'; }
1329 else if(oligo[i] == 'T'){ reverse += 'A'; }
1330 else if(oligo[i] == 'U'){ reverse += 'A'; }
1332 else if(oligo[i] == 'G'){ reverse += 'C'; }
1333 else if(oligo[i] == 'C'){ reverse += 'G'; }
1335 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1336 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1338 else if(oligo[i] == 'M'){ reverse += 'K'; }
1339 else if(oligo[i] == 'K'){ reverse += 'M'; }
1341 else if(oligo[i] == 'W'){ reverse += 'W'; }
1342 else if(oligo[i] == 'S'){ reverse += 'S'; }
1344 else if(oligo[i] == 'B'){ reverse += 'V'; }
1345 else if(oligo[i] == 'V'){ reverse += 'B'; }
1347 else if(oligo[i] == 'D'){ reverse += 'H'; }
1348 else if(oligo[i] == 'H'){ reverse += 'D'; }
1350 else { reverse += 'N'; }
1356 catch(exception& e) {
1357 m->errorOut(e, "SRACommand", "reverseOligo");
1361 //********************************************************************/
1362 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1363 bool SRACommand::checkCasesPlatforms(string& platform){
1365 string original = platform;
1368 //remove users possible case errors
1369 for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
1371 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1373 if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
1374 else { isOkay = false; }
1377 if (platform == "454") { platform = "_LS454"; }
1379 m->mothurOut("[ERROR]: " + original + " is not a valid platform option. Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
1384 catch(exception& e) {
1385 m->errorOut(e, "SRACommand", "checkCasesPlatforms");
1389 //********************************************************************/
1390 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
1391 bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
1393 string original = instrumentModel;
1396 //remove users possible case errors
1397 for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
1399 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1400 if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
1401 if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
1402 else { isOkay = false; }
1404 if (instrumentModel == "454_GS_FLX_TITANIUM") { instrumentModel = "454_GS_FLX_Titanium"; }
1405 if (instrumentModel == "454_GS_JUNIOR") { instrumentModel = "454_GS_Junior"; }
1406 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1408 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
1411 }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
1412 if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
1413 else { isOkay = false; }
1416 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER") { instrumentModel = "Illumina_Genome_Analyzer"; }
1417 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") { instrumentModel = "Illumina_Genome_Analyzer_II"; }
1418 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") { instrumentModel = "Illumina_Genome_Analyzer_IIx"; }
1419 if (instrumentModel == "ILLUMINA_HISEQ_2000") { instrumentModel = "Illumina_HiSeq_2000"; }
1420 if (instrumentModel == "ILLUMINA_HISEQ_1000") { instrumentModel = "Illumina_HiSeq_1000"; }
1421 if (instrumentModel == "ILLUMINA_MISEQ") { instrumentModel = "Illumina_MiSeq"; }
1422 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1424 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
1427 }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
1428 if ((instrumentModel == "ION_TORRENT_PGM") || (instrumentModel == "UNSPECIFIED")) { }
1429 else { isOkay = false; }
1432 if (instrumentModel == "ION_TORRENT_PGM") { instrumentModel = "Ion_Torrent_PGM"; }
1433 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1435 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
1437 }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
1438 if ((instrumentModel == "PACBIO_RS") || (instrumentModel == "UNSPECIFIED")) { }
1439 else { isOkay = false; }
1442 if (instrumentModel == "PACBIO_RS") { instrumentModel = "PacBio_RS"; }
1443 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1445 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
1450 catch(exception& e) {
1451 m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
1455 //**********************************************************************************************************************
1456 //AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER
1457 bool SRACommand::checkCasesLibStrategy(string& libStrategy){
1459 string original = libStrategy;
1462 //remove users possible case errors
1463 for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); }
1465 if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { }
1466 else { isOkay = false; }
1469 if (libStrategy == "RNA-SEQ") { libStrategy = "RNA-Seq"; }
1470 if (libStrategy == "MIRNA-SEQ") { libStrategy = "miRNA-Seq"; }
1471 if (libStrategy == "CHIP-SEQ") { libStrategy = "ChIP-Seq"; }
1472 if (libStrategy == "MNASE-SEQ") { libStrategy = "MNase-Seq"; }
1473 if (libStrategy == "DNASE-HYPERSENSITIVITY") { libStrategy = "DNase-Hypersensitivity"; }
1474 if (libStrategy == "BISULFITE-SEQ") { libStrategy = "Bisulfite-Seq"; }
1475 if (libStrategy == "TN-SEQ") { libStrategy = "Tn-Seq"; }
1476 if (libStrategy == "FL-CDNA") { libStrategy = "FL-cDNA"; }
1477 if (libStrategy == "MRE-SEQ") { libStrategy = "MRE-Seq"; }
1478 if (libStrategy == "MEDIP-SEQ") { libStrategy = "MeDIP-Seq"; }
1480 m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option. Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true;
1485 catch(exception& e) {
1486 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1491 //**********************************************************************************************************************
1492 //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
1493 bool SRACommand::checkCasesLibSource(string& libSource){
1495 string original = libSource;
1498 //remove users possible case errors
1499 for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); }
1501 if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { }
1502 else { isOkay = false; }
1507 m->mothurOut("[ERROR]: " + original + " is not a valid libsource option. Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true;
1512 catch(exception& e) {
1513 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1518 //**********************************************************************************************************************
1519 //PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified
1520 bool SRACommand::checkCasesLibSelection(string& libSelection){
1522 string original = libSelection;
1525 //remove users possible case errors
1526 for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); }
1528 if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { }
1529 else { isOkay = false; }
1532 if (libSelection == "CDNA") { libSelection = "cDNA"; }
1533 if (libSelection == "CHIP") { libSelection = "ChIP"; }
1534 if (libSelection == "MNASE") { libSelection = "MNase"; }
1535 if (libSelection == "DNASE") { libSelection = "DNAse"; }
1536 if (libSelection == "HYBRID_SELECTION") { libSelection = "Hybrid_Selection"; }
1537 if (libSelection == "REDUCED_REPRESENTATION") { libSelection = "Reduced_Representation"; }
1538 if (libSelection == "RESTRICTION_DIGEST") { libSelection = "Restriction_Digest"; }
1539 if (libSelection == "5-METHYLCYTIDINE_ANTIBODY") { libSelection = "5-methylcytidine_antibody"; }
1540 if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") { libSelection = "MBD2_protein_methyl-CpG_binding_domain"; }
1541 if (libSelection == "SIZE_FRACTIONATION") { libSelection = "size_fractionation"; }
1542 if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") { libSelection = "Padlock_probes_capture_method"; }
1543 if (libSelection == "OTHER") { libSelection = "other"; }
1544 if (libSelection == "UNSPECIFIED") { libSelection = "unspecified"; }
1547 m->mothurOut("[ERROR]: " + original + " is not a valid libselection option. Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true;
1552 catch(exception& e) {
1553 m->errorOut(e, "SRACommand", "checkCasesLibSelection");
1557 //**********************************************************************************************************************
1558 //METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER
1559 bool SRACommand::checkCasesDataType(string& dataType){
1561 string original = dataType;
1564 //remove users possible case errors
1565 for (int i = 0; i < dataType.size(); i++) { dataType[i] = toupper(dataType[i]); }
1567 if ((dataType == "METAGENOME") || (dataType == "GENOME_SEQUENCING") || (dataType == "METAGENOMIC_ASSEMBLY") || (dataType == "ASSEMBLY") || (dataType == "TRANSCRIPTOME") || (dataType == "PROTEOMIC") || (dataType == "MAP") || (dataType == "CLONE_ENDS") || (dataType == "TARGETED_LOCI") || (dataType == "RANDOM_SURVEY") || (dataType == "EXOME") || (dataType == "VARIATION") || (dataType == "EPIGENOMICS") || (dataType == "PHENOTYPE") || (dataType == "GENOTYPE") || (dataType == "OTHER")) { }
1568 else { isOkay = false; }
1573 m->mothurOut("[ERROR]: " + original + " is not a valid datatype option. Valid datatype options are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER."); m->mothurOutEndLine(); abort = true;
1578 catch(exception& e) {
1579 m->errorOut(e, "SRACommand", "checkCasesDataType");
1583 //**********************************************************************************************************************
1584 bool SRACommand::sanityCheckMiMarksGroups(){
1588 for (int i = 0; i < Groups.size(); i++) {
1589 if (m->control_pressed) { break; }
1591 map<string, map<string, string> >::iterator it = mimarks.find(Groups[i]);
1592 if (it == mimarks.end()) {
1594 m->mothurOut("[ERROR]: MIMarks file is missing group " + Groups[i] + ", please correct.\n");
1598 if (!isOkay) { m->control_pressed = true; }
1602 catch(exception& e) {
1603 m->errorOut(e, "SRACommand", "sanityCheckMiMarksGroups");
1608 //**********************************************************************************************************************