5 // Created by SarahsWork on 10/28/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "sracommand.h"
10 #include "sffinfocommand.h"
11 #include "parsefastaqcommand.h"
13 //**********************************************************************************************************************
14 vector<string> SRACommand::setParameters(){
16 CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
17 CommandParameter poligos("oligos", "InputTypes", "", "", "oligos", "none", "none","",false,false,true); parameters.push_back(poligos);
18 CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
19 CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
20 CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
21 CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
22 //choose only one multiple options
23 CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
24 CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
25 CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
26 CommandParameter pdatatype("datatype", "String", "METAGENOME", "", "", "", "","",false,false); parameters.push_back(pdatatype);
27 CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
28 CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
29 CommandParameter porientation("orientation", "Multiple", "forward-reverse", "forward", "", "", "","",false,false); parameters.push_back(porientation);
30 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
31 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
32 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
33 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
34 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
36 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
37 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
38 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
40 vector<string> myArray;
41 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
45 m->errorOut(e, "SRACommand", "setParameters");
49 //**********************************************************************************************************************
50 string SRACommand::getHelpString(){
52 string helpString = "";
53 helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
54 helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
55 helpString += "The sff parameter is used to provide the original sff file.\n";
56 helpString += "The fastq parameter is used to provide the original fastq file.\n";
57 helpString += "The project parameter is used to provide your project file.\n";
58 helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by. It is required and must contain barcodes and primers, or you must provide a file option. \n";
59 helpString += "The mimark parameter is used to provide your mimarks file. You can create the template for this file using the get.mimarkspackage command.\n";
60 helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
61 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
62 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
63 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
64 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
65 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
66 helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
67 helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
68 helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
69 helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
70 helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
71 helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
72 helpString += "The datatype parameter is used to specify datatype. Default=METAGENOME. Choices are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
73 helpString += "sra(sff=sff=GHL4YHV01.sff, GHL4YHV01.oligos, project=test.project, mimark=MIMarksData.txt)\n";
77 m->errorOut(e, "SRACommand", "getHelpString");
81 //**********************************************************************************************************************
82 string SRACommand::getOutputPattern(string type) {
86 if (type == "xml") { pattern = "[filename],xml"; }
87 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
92 m->errorOut(e, "SRACommand", "getOutputPattern");
96 //**********************************************************************************************************************
97 SRACommand::SRACommand(){
99 abort = true; calledHelp = true;
101 vector<string> tempOutNames;
102 outputTypes["xml"] = tempOutNames;
104 catch(exception& e) {
105 m->errorOut(e, "SRACommand", "SRACommand");
109 //**********************************************************************************************************************
110 SRACommand::SRACommand(string option) {
112 abort = false; calledHelp = false;
113 libLayout = "single"; //controlled vocab
115 //allow user to run help
116 if(option == "help") { help(); abort = true; calledHelp = true; }
117 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
120 //valid paramters for this command
121 vector<string> myArray = setParameters();
123 OptionParser parser(option);
124 map<string,string> parameters = parser.getParameters();
126 ValidParameters validParameter;
127 map<string,string>::iterator it;
128 //check to make sure all parameters are valid for command
129 for (it = parameters.begin(); it != parameters.end(); it++) {
130 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
133 vector<string> tempOutNames;
134 outputTypes["xml"] = tempOutNames;
136 //if the user changes the input directory command factory will send this info to us in the output parameter
137 string inputDir = validParameter.validFile(parameters, "inputdir", false);
138 if (inputDir == "not found"){ inputDir = ""; }
142 it = parameters.find("sff");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["sff"] = inputDir + it->second; }
150 it = parameters.find("fastq");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["fastq"] = inputDir + it->second; }
158 it = parameters.find("file");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["file"] = inputDir + it->second; }
166 it = parameters.find("oligos");
167 //user has given a template file
168 if(it != parameters.end()){
169 path = m->hasPath(it->second);
170 //if the user has not given a path then, add inputdir. else leave path alone.
171 if (path == "") { parameters["oligos"] = inputDir + it->second; }
174 it = parameters.find("project");
175 //user has given a template file
176 if(it != parameters.end()){
177 path = m->hasPath(it->second);
178 //if the user has not given a path then, add inputdir. else leave path alone.
179 if (path == "") { parameters["project"] = inputDir + it->second; }
182 it = parameters.find("mimark");
183 //user has given a template file
184 if(it != parameters.end()){
185 path = m->hasPath(it->second);
186 //if the user has not given a path then, add inputdir. else leave path alone.
187 if (path == "") { parameters["mimark"] = inputDir + it->second; }
191 //check for parameters
192 fastqfile = validParameter.validFile(parameters, "fastq", true);
193 if (fastqfile == "not open") { fastqfile = ""; abort = true; }
194 else if (fastqfile == "not found") { fastqfile = ""; }
196 sfffile = validParameter.validFile(parameters, "sff", true);
197 if (sfffile == "not open") { sfffile = ""; abort = true; }
198 else if (sfffile == "not found") { sfffile = ""; }
200 file = validParameter.validFile(parameters, "file", true);
201 if (file == "not open") { file = ""; abort = true; }
202 else if (file == "not found") { file = ""; }
204 oligosfile = validParameter.validFile(parameters, "oligos", true);
205 if (oligosfile == "not found") { oligosfile = ""; }
206 else if(oligosfile == "not open") { abort = true; }
207 else { m->setOligosFile(oligosfile); }
209 contactfile = validParameter.validFile(parameters, "project", true);
210 if (contactfile == "not found") { contactfile = ""; m->mothurOut("[ERROR]: You must provide a project file before you can use the sra command."); m->mothurOutEndLine(); abort = true; }
211 else if(contactfile == "not open") { abort = true; }
213 mimarksfile = validParameter.validFile(parameters, "mimark", true);
214 if (mimarksfile == "not found") { mimarksfile = ""; m->mothurOut("[ERROR]: You must provide a mimark file before you can use the sra command. You can create a template for this file using the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true; }
215 else if(mimarksfile == "not open") { abort = true; }
217 file = validParameter.validFile(parameters, "file", true);
218 if (file == "not open") { file = ""; abort = true; }
219 else if (file == "not found") { file = ""; }
221 if ((file == "") && (oligosfile == "")) {
222 m->mothurOut("[ERROR]: You must provide an oligos file or file with oligos files in them before you can use the sra command."); m->mothurOutEndLine(); abort = true;
225 if ((fastqfile == "") && (file == "") && (sfffile == "")) {
226 m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
229 //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
230 platform = validParameter.validFile(parameters, "platform", false); if (platform == "not found") { platform = "_LS454"; }
231 if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
233 if (!abort) { //don't check instrument model is platform is bad
234 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
235 instrumentModel = validParameter.validFile(parameters, "instrument", false); if (instrumentModel == "not found") { instrumentModel = "454_GS"; }
236 if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function
238 //turn _ to spaces mothur's work around
239 for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } }
241 libStrategy = validParameter.validFile(parameters, "libstrategy", false); if (libStrategy == "not found") { libStrategy = "AMPLICON"; }
242 if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function
244 //turn _ to spaces mothur's work around
245 for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; } }
247 libSource = validParameter.validFile(parameters, "libsource", false); if (libSource == "not found") { libSource = "METAGENOMIC"; }
248 if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function
250 //turn _ to spaces mothur's work around
251 for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; } }
253 libSelection = validParameter.validFile(parameters, "libselection", false); if (libSelection == "not found") { libSelection = "PCR"; }
254 if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function
256 //turn _ to spaces mothur's work around
257 for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; } }
259 dataType = validParameter.validFile(parameters, "datatype", false); if (dataType == "not found") { dataType = "METAGENOME"; }
260 if (!checkCasesDataType(dataType)) { abort = true; } //error message in function
262 //turn _ to spaces mothur's work around
263 for (int i = 0; i < dataType.length(); i++) { if (dataType[i] == '_') { dataType[i] = ' '; } }
265 orientation = validParameter.validFile(parameters, "orientation", false); if (orientation == "not found") { orientation = "forward"; }
267 if ((orientation == "forward") || (orientation == "reverse")) { }
268 else { m->mothurOut("[ERROR]: " + orientation + " is not a valid orientation option. Choices are: forward and reverse.\n"); m->mothurOutEndLine(); abort = true; }
271 string temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
272 m->mothurConvert(temp, bdiffs);
274 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; }
275 m->mothurConvert(temp, pdiffs);
277 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
278 m->mothurConvert(temp, ldiffs);
280 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
281 m->mothurConvert(temp, sdiffs);
283 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
284 m->mothurConvert(temp, tdiffs);
286 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
291 catch(exception& e) {
292 m->errorOut(e, "SRACommand", "SRACommand");
296 //**********************************************************************************************************************
297 int SRACommand::execute(){
300 if (abort == true) { if (calledHelp) { return 0; } return 2; }
304 if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
306 if (m->control_pressed) { return 0; }
309 map<string, vector<string> > filesBySample;
312 if (file != "") { readFile(filesBySample); }
313 else if (sfffile != "") { parseSffFile(filesBySample); }
314 else if (fastqfile != "") { parseFastqFile(filesBySample); }
316 for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
318 sanityCheckMiMarksGroups();
320 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
321 checkGroups(filesBySample);
324 string thisOutputDir = outputDir;
325 if (outputDir == "") { thisOutputDir += m->hasPath(inputfile); }
326 map<string, string> variables;
327 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile));
328 string outputFileName = getOutputFileName("xml", variables);
329 outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName);
331 m->openOutputFile(outputFileName, out);
334 ////////////////////////////////////////////////////////
335 out << "<Submission>\n";
336 out << "\t<Description>\n";
337 out << "\t\t<Comment> New Submission. Generated by mothur version " + m->getVersion() + " </Comment> \n";
338 out << "\t\t<Submitter user_name=\"" + submissionName + "\"/>\n";
339 out << "\t\t<Organization type=\"" + centerType + "\">\n";
340 out << "\t\t<Name>" + centerName + "</Name>\n";
341 out << "\t\t<Contact> email=\"" + email + "\">\n";
342 out << "\t\t\t<Name>\n";
343 out << "\t\t\t\t<First>" + firstName + "</First>\n";
344 out << "\t\t\t\t<Last>" + firstName + "</Last>\n";
345 out << "\t\t\t</Name>\n";
346 out << "\t\t</Contact>\n";
347 out << "\t\t</Organization>\n";
348 out << "\t</Description>\n";
349 ////////////////////////////////////////////////////////
352 ////////////////////////////////////////////////////////
353 out << "\t<Action>\n";
354 out << "\t\t<AddData target_db=\"BioProject\">\n";
355 out << "\t\t\t<Data content_type=\"XML\">\n";
356 out << "\t\t\t\t<XmlContent>\n";
357 out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
358 out << "\t\t\t\t\t\t<ProjectID>\n";
359 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
360 out << "\t\t\t\t\t\t</ProjectID>\n";
361 out << "\t\t\t\t\t\t<Descriptor>\n";
362 out << "\t\t\t\t\t\t\t<Title>" + projectTitle + " </Title> \n";
363 out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
365 out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
366 out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
367 out << "\t\t\t\t\t\t\t</ExternalLink>\n";
369 out << "\t\t\t\t\t\t</Descriptor>\n";
370 out << "\t\t\t\t\t\t<ProjectType>\n";
371 out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eEnvironment\">\n";
372 out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
373 out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n";
374 out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
375 out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
376 out << "\t\t\t\t\t\t</ProjectType>\n";
377 out << "\t\t\t\t\t</Project>\n";
378 out << "\t\t\t\t</XmlContent>\n";
379 out << "\t\t\t</Data>\n";
380 out << "\t\t\t<Identifier>\n";
381 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
382 out << "\t\t\t</Identifier>\n";
383 out << "\t\t</AddData>\n";
384 out << "\t</Action>\n";
385 ////////////////////////////////////////////////////////
388 ////////////////////////////////////////////////////////
389 for (int i = 0; i < Groups.size(); i++) {
391 string barcodeForThisSample = Group2Barcode[Groups[i]][0];
393 if (m->control_pressed) { break; }
394 out << "\t<Action>\n";
395 out << "\t\t<AddData target_db=\"BioSample\">\n";
396 out << "\t\t\t<Data content_type=\"XML\">\n";
397 out << "\t\t\t\t<XmlContent>\n";
398 out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
399 out << "\t\t\t\t\t\t<SampleId>\n";
400 out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID> \n";
401 out << "\t\t\t\t\t\t</SampleId>\n";
402 out << "\t\t\t\t\t\t<Organism>\n";
403 string organismName = "metagenome";
404 map<string, string>::iterator itOrganism = Group2Organism.find(Groups[i]);
405 if (itOrganism != Group2Organism.end()) { organismName = itOrganism->second; } //user supplied acceptable organism, so use it.
406 out << "\t\t\t\t\t\t\t<OrganismName>" + organismName + " </OrganismName> \n";
407 out << "\t\t\t\t\t\t</Organism>\n";
408 out << "\t\t\t\t\t\t<Package>" + packageType + "</Package>n";
409 out << "\t\t\t\t\t\t<Attributes>n";
410 //add biosample required attributes
411 map<string, map<string, string> >:: iterator it = mimarks.find(Groups[i]);
412 if (it != mimarks.end()) {
413 map<string, string> categories = it->second;
414 for (map<string, string>:: iterator it2 = categories.begin(); it2 != categories.end(); it2++) {
415 if (m->control_pressed) { break; }
416 out << "\t\t\t\t\t\t\t<Attribute attribute_name=\"" + it2->first + "\">\"" + it2->second + "\"</Attribute>\n";
419 out << "\t\t\t\t\t\t</Attributes>n";
420 out << "\t\t\t\t\t</BioSample>\n";
421 out << "\t\t\t\t</XmlContent>\n";
422 out << "\t\t\t</Data>\n";
423 out << "\t\t\t<Identifier>\n";
424 out << "\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
425 out << "\t\t\t</Identifier>\n";
426 out << "\t\t</AddData>\n";
427 out << "\t</Action>\n";
431 ////////////////////////////////////////////////////////
432 for (int i = 0; i < Groups.size(); i++) {
434 vector<string> thisGroupsFiles = filesBySample[Groups[i]];
435 string barcodeForThisSample = Group2Barcode[Groups[i]][0];
437 for (int j = 0; j < thisGroupsFiles.size(); j++) {
438 string libId = thisGroupsFiles[j] + "." + barcodeForThisSample;
440 if (m->control_pressed) { break; }
441 out << "\t<Action>\n";
442 out << "\t\t<AddFiles target_db=\"SRA\">\n";
443 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
444 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
445 libId = pieces[0] + barcodeForThisSample;
446 out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
447 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
448 out << "\t\t\t</File>\n";
449 vector<string> thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]][0], thisBarcodes, '.');
450 string forwardBarcode = thisBarcodes[0];
451 string reverseBarcode = thisBarcodes[1];
452 vector<string> thisPrimers; m->splitAtChar(Group2Primer[Groups[i]][0], thisPrimers, '.');
453 string forwardPrimer = thisPrimers[0];
454 string reversePrimer = thisPrimers[1];
456 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
457 out << "\t\t\t<Attribute name=\"BarCode\">" + forwardBarcode + "</Attribute>\n";
458 out << "\t\t\t<Attribute name=\"primer\">" + forwardPrimer + "</Attribute>\n";
459 out << "\t\t\t<Attribute name=\"read_type\">forward</Attribute>\n";
460 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
461 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
462 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
463 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
464 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
465 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
466 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
468 out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
469 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
470 out << "\t\t\t</File>\n";
471 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
472 out << "\t\t\t<Attribute name=\"BarCode\">" + reverseBarcode + "</Attribute>\n";
473 out << "\t\t\t<Attribute name=\"primer\">" + reversePrimer + "</Attribute>\n";
474 out << "\t\t\t<Attribute name=\"read_type\">reverse</Attribute>\n";
475 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
476 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
477 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
478 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
479 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
480 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
481 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
484 out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
485 out << "\t\t\t\t<DataType>generic-data</DataType> \n";
486 out << "\t\t\t</File>\n";
488 out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
489 out << "\t\t\t<Attribute name=\"BarCode\">" + Group2Barcode[Groups[i]][0] + "</Attribute>\n";
490 out << "\t\t\t<Attribute name=\"primer\">" + Group2Primer[Groups[i]][0] + "</Attribute>\n";
491 out << "\t\t\t<Attribute name=\"read_type\">" + orientation + "</Attribute>\n";
492 out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
493 out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
494 out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
495 out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
496 out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
497 out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
498 out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
501 ///////////////////bioProject info
502 out << "\t\t\t<AttributeRefId name=\"BioProject\">\n";
503 out << "\t\t\t\t<RefId>\n";
504 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
505 out << "\t\t\t\t</RefId>\n";
506 out << "\t\t\t</AttributeRefId>\n";
507 //////////////////bioSample info
508 out << "\t\t\t<AttributeRefId name=\"BioSample\">\n";
509 out << "\t\t\t\t<RefId>\n";
510 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
511 out << "\t\t\t\t</RefId>\n";
512 out << "\t\t\t</AttributeRefId>\n";
514 out << "\t\t\t<Identifier>\n";
515 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
516 vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
517 libId = pieces[0] + barcodeForThisSample;
519 out << "\t\t\t\t<LocalId>" + libId + " </LocalId>\n";
520 out << "\t\t\t</Identifier>\n";
521 out << "\t\t</AddFiles>\n";
522 out << "\t</Action>\n";
525 out << "</Submission>\n";
528 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
530 //output files created by command
531 m->mothurOutEndLine();
532 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
533 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
534 m->mothurOutEndLine();
538 catch(exception& e) {
539 m->errorOut(e, "SRACommand", "SRACommand");
543 //**********************************************************************************************************************
544 int SRACommand::readContactFile(){
546 lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; website = ""; projectName = "";
547 projectTitle = ""; grantAgency = ""; grantId = ""; grantTitle = "";
550 m->openInputFile(contactfile, in);
554 if (m->control_pressed) { break; }
557 in >> key; m->gobble(in);
558 value = m->getline(in); m->gobble(in);
560 for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
562 if (key == "USERNAME") { submissionName = value; }
563 else if (key == "LAST") { lastName = value; }
564 else if (key == "FIRST") { firstName = value; }
565 else if (key == "EMAIL") { email = value; }
566 else if (key == "CENTER") { centerName = value; }
567 else if (key == "TYPE") {
569 for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
570 if ((centerType == "consortium") || (centerType == "center") || (centerType == "institute") || (centerType == "lab")) {}
571 else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option. Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
572 }else if (key == "DESCRIPTION") { description = value; }
573 else if (key == "WEBSITE") { website = value; }
574 else if (key == "PROJECTNAME") { projectName = value; }
575 else if (key == "PROJECTTITLE") { projectTitle = value; }
576 else if (key == "GRANTID") { grantId = value; }
577 else if (key == "GRANTTITLE") { grantTitle = value; }
578 else if (key == "GRANTAGENCY") { grantAgency = value; }
582 if (lastName == "") { m->mothurOut("[ERROR]: missing last name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
583 if (firstName == "") { m->mothurOut("[ERROR]: missing first name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
584 if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
585 if (email == "") { m->mothurOut("[ERROR]: missing email from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
586 if (centerName == "") { m->mothurOut("[ERROR]: missing center name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
587 if (centerType == "") { m->mothurOut("[ERROR]: missing center type from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
588 if (description == "") { m->mothurOut("[ERROR]: missing description from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
589 if (projectTitle == "") { m->mothurOut("[ERROR]: missing project title from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
590 if (projectName == "") { m->mothurOut("[ERROR]: missing project name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
594 catch(exception& e) {
595 m->errorOut(e, "SRACommand", "readContactFile");
599 //**********************************************************************************************************************
600 //air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water
601 //all packages require: *sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon
603 //host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, plant_associated: *host
604 //microbial, sediment, soil: *depth *elev
606 int SRACommand::readMIMarksFile(){
608 //acceptable organisms
609 vector<string> acceptableOrganisms;
610 bool organismError = false;
612 acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("air metagenome"); acceptableOrganisms.push_back("anaerobic digester metagenome"); acceptableOrganisms.push_back("ant fungus garden metagenome"); acceptableOrganisms.push_back("aquatic metagenome"); acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("beach sand metagenome"); acceptableOrganisms.push_back("biofilm metagenome"); acceptableOrganisms.push_back("biofilter metagenome"); acceptableOrganisms.push_back("biogas fermenter metagenome"); acceptableOrganisms.push_back("bioreactor metagenome"); acceptableOrganisms.push_back("bioreactor sludge metagenome"); acceptableOrganisms.push_back("clinical metagenome"); acceptableOrganisms.push_back("coal metagenome"); acceptableOrganisms.push_back("compost metagenome"); acceptableOrganisms.push_back("dust metagenome"); acceptableOrganisms.push_back("fermentation metagenome"); acceptableOrganisms.push_back("food fermentation metagenome"); acceptableOrganisms.push_back("food metagenome"); acceptableOrganisms.push_back("freshwater metagenome"); acceptableOrganisms.push_back("freshwater sediment metagenome"); acceptableOrganisms.push_back("groundwater metagenome"); acceptableOrganisms.push_back("halite metagenome"); acceptableOrganisms.push_back("hot springs metagenome"); acceptableOrganisms.push_back("hydrocarbon metagenome"); acceptableOrganisms.push_back("hydrothermal vent metagenome"); acceptableOrganisms.push_back("hypersaline lake metagenome"); acceptableOrganisms.push_back("ice metagenome"); acceptableOrganisms.push_back("indoor metagenome"); acceptableOrganisms.push_back("industrial waste metagenome"); acceptableOrganisms.push_back("mangrove metagenome"); acceptableOrganisms.push_back("marine metagenome"); acceptableOrganisms.push_back("marine sediment metagenome"); acceptableOrganisms.push_back("microbial mat metagenome"); acceptableOrganisms.push_back("mine drainage metagenome"); acceptableOrganisms.push_back("mixed culture metagenome"); acceptableOrganisms.push_back("oil production facility metagenome"); acceptableOrganisms.push_back("paper pulp metagenome"); acceptableOrganisms.push_back("permafrost metagenome"); acceptableOrganisms.push_back("plastisphere metagenome"); acceptableOrganisms.push_back("power plant metagenome"); acceptableOrganisms.push_back("retting rhizosphere metagenome"); acceptableOrganisms.push_back("rock metagenome"); acceptableOrganisms.push_back("salt lake metagenome"); acceptableOrganisms.push_back("saltern metagenome"); acceptableOrganisms.push_back("sediment metagenome"); acceptableOrganisms.push_back("snow metagenome"); acceptableOrganisms.push_back("soil metagenome"); acceptableOrganisms.push_back("stromatolite metagenome"); acceptableOrganisms.push_back("terrestrial metagenome"); acceptableOrganisms.push_back("tomb wall metagenome"); acceptableOrganisms.push_back("wastewater metagenome"); acceptableOrganisms.push_back("wetland metagenome"); acceptableOrganisms.push_back("whale fall metagenome");
614 acceptableOrganisms.push_back("algae metagenome"); acceptableOrganisms.push_back("ant metagenome"); acceptableOrganisms.push_back("bat metagenome"); acceptableOrganisms.push_back("beetle metagenome"); acceptableOrganisms.push_back("bovine gut metagenome"); acceptableOrganisms.push_back("bovine metagenome"); acceptableOrganisms.push_back("chicken gut metagenome"); acceptableOrganisms.push_back("coral metagenome"); acceptableOrganisms.push_back("echinoderm metagenome"); acceptableOrganisms.push_back("endophyte metagenome"); acceptableOrganisms.push_back("epibiont metagenome"); acceptableOrganisms.push_back("fish metagenome"); acceptableOrganisms.push_back("fossil metagenome"); acceptableOrganisms.push_back("gill metagenome"); acceptableOrganisms.push_back("gut metagenome"); acceptableOrganisms.push_back("honeybee metagenome"); acceptableOrganisms.push_back("human gut metagenome"); acceptableOrganisms.push_back("human lung metagenome"); acceptableOrganisms.push_back("human metagenome"); acceptableOrganisms.push_back("human nasal/pharyngeal metagenome"); acceptableOrganisms.push_back("human oral metagenome"); acceptableOrganisms.push_back("human skin metagenome"); acceptableOrganisms.push_back("insect gut metagenome"); acceptableOrganisms.push_back("insect metagenome"); acceptableOrganisms.push_back("mollusc metagenome"); acceptableOrganisms.push_back("mosquito metagenome"); acceptableOrganisms.push_back("mouse gut metagenome"); acceptableOrganisms.push_back("mouse metagenome"); acceptableOrganisms.push_back("mouse skin metagenome"); acceptableOrganisms.push_back("nematode metagenome"); acceptableOrganisms.push_back("oral metagenome"); acceptableOrganisms.push_back("phyllosphere metagenome"); acceptableOrganisms.push_back("pig metagenome"); acceptableOrganisms.push_back("plant metagenome"); acceptableOrganisms.push_back("primate metagenome"); acceptableOrganisms.push_back("rat metagenome"); acceptableOrganisms.push_back("root metagenome"); acceptableOrganisms.push_back("sea squirt metagenome"); acceptableOrganisms.push_back("seed metagenome"); acceptableOrganisms.push_back("shoot metagenome"); acceptableOrganisms.push_back("skin metagenome"); acceptableOrganisms.push_back("snake metagenome"); acceptableOrganisms.push_back("sponge metagenome"); acceptableOrganisms.push_back("stomach metagenome"); acceptableOrganisms.push_back("symbiont metagenome"); acceptableOrganisms.push_back("termite gut metagenome"); acceptableOrganisms.push_back("termite metagenome"); acceptableOrganisms.push_back("upper respiratory tract metagenome"); acceptableOrganisms.push_back("urine metagenome"); acceptableOrganisms.push_back("viral metagenome"); acceptableOrganisms.push_back("wallaby gut metagenome"); acceptableOrganisms.push_back("wasp metagenome"); acceptableOrganisms.push_back("sythetic metagenome"); acceptableOrganisms.push_back("metagenome");
616 vector<string> requiredFieldsForPackage;
617 requiredFieldsForPackage.push_back("sample_name"); requiredFieldsForPackage.push_back("organism");
618 requiredFieldsForPackage.push_back("collection_date"); requiredFieldsForPackage.push_back("biome");
619 requiredFieldsForPackage.push_back("feature"); requiredFieldsForPackage.push_back("material");
620 requiredFieldsForPackage.push_back("geo_loc_name"); requiredFieldsForPackage.push_back("lat_lon");
621 requiredFieldsForPackage.push_back("seq_methods"); requiredFieldsForPackage.push_back("title");
622 vector<string> chooseAtLeastOneForPackage;
625 m->openInputFile(mimarksfile, in);
628 string temp; packageType = "";
631 if (m->control_pressed) { break; }
632 temp = m->getline(in); m->gobble(in);
634 if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
636 if (temp[0] == '#') {
637 int pos = temp.find("Environmental");
638 if (pos != string::npos) {
639 for (int i = pos+14; i < temp.length(); i++) {
640 if (!isspace(temp[i])) { packageType += temp[i]; }
641 else { i+= temp.length(); }
645 else{ break; } //hit headers line
648 vector<string> headers; m->splitAtChar(temp, headers, '\t');
649 m->removeBlanks(headers);
650 //remove * from required's
651 for (int i = 0; i < headers.size(); i++) {
652 if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); }
653 if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); chooseAtLeastOneForPackage.push_back(headers[i]); } //secondary condition
654 if (m->debug) { m->mothurOut("[DEBUG]: " + headers[i] + "\n"); }
657 if (m->debug) { m->mothurOut("[DEBUG]: packageType = '" + packageType + "'\n"); }
659 //check to make sure package has all its required parts
660 //MIMARKS.specimen.water.3.0
661 if (packageType == "MIMARKS.specimen.air.3.0") { requiredFieldsForPackage.push_back("altitude"); }
662 else if ((packageType == "MIMARKS.specimen.host-associated.3.0") || (packageType == "MIMARKS.specimen.human-associated.3.0") || (packageType == "MIMARKS.specimen.human-gut.3.0") || (packageType == "MIMARKS.specimen.human-oral.3.0") || (packageType == "MIMARKS.specimen.human-skin.3.0") || (packageType == "MIMARKS.specimen.human-vaginal.3.0") || (packageType == "MIMARKS.specimen.plant-associated.3.0")) { requiredFieldsForPackage.push_back("host"); }
663 else if ((packageType == "MIMARKS.specimen.microbial.3.0") || (packageType == "MIMARKS.specimen.sediment.3.0") || (packageType == "soil")) { requiredFieldsForPackage.push_back("depth"); requiredFieldsForPackage.push_back("elev"); }
664 else if (packageType == "MIMARKS.specimen.water.3.0") { requiredFieldsForPackage.push_back("depth"); }
665 else if ((packageType == "MIMARKS.specimen.miscellaneous.3.0") || (packageType == "wastewater")) { }
667 m->mothurOut("[ERROR]: unknown package " + packageType + ", please correct.\n"); m->control_pressed = true; in.close(); return 0;
670 if (!m->isSubset(headers, requiredFieldsForPackage)){
671 string requiredFields = "";
672 for (int i = 0; i < requiredFieldsForPackage.size()-1; i++) { requiredFields += requiredFieldsForPackage[i] + ", "; } requiredFields += requiredFieldsForPackage[requiredFieldsForPackage.size()-1];
673 m->mothurOut("[ERROR]: missing required fields for package, please correct. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
676 if (m->debug) { m->mothurOut("[DEBUG]: chooseAtLeastOneForPackage.size() = " + toString(chooseAtLeastOneForPackage.size()) + "\n"); }
678 if (!m->inUsersGroups(chooseAtLeastOneForPackage, headers)){ //returns true if any of the choose at least ones are in headers
679 string requiredFields = "";
680 for (int i = 0; i < chooseAtLeastOneForPackage.size()-1; i++) { requiredFields += chooseAtLeastOneForPackage[i] + ", "; cout << chooseAtLeastOneForPackage[i] << endl; }
681 if (chooseAtLeastOneForPackage.size() < 1) { requiredFields += chooseAtLeastOneForPackage[chooseAtLeastOneForPackage.size()-1]; }
682 m->mothurOut("[ERROR]: missing a choose at least one fields for the package, please correct. These are marked with '**'. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
685 map<string, bool> allNA; for (int i = 1; i < headers.size(); i++) { allNA[headers[i]] = true; }
688 if (m->control_pressed) { break; }
690 temp = m->getline(in); m->gobble(in);
692 if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
694 string original = temp;
695 vector<string> linePieces; m->splitAtChar(temp, linePieces, '\t');
696 m->removeBlanks(linePieces);
698 if (linePieces.size() != headers.size()) { m->mothurOut("[ERROR]: line: " + original + " contains " + toString(linePieces.size()) + " columns, but you have " + toString(headers.size()) + " column headers, please correct.\n"); m->control_pressed = true; }
700 map<string, map<string, string> >:: iterator it = mimarks.find(linePieces[0]);
702 if (it == mimarks.end()) {
703 map<string, string> categories;
704 //start after *sample_name
705 for (int i = 1; i < headers.size(); i++) {
706 categories[headers[i]] = linePieces[i];
707 //check the users inputs for appropriate organisms
708 if (headers[i] == "organism") {
709 if (!m->inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism
710 organismError = true;
711 m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to acceptable 'metagenome'. NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i];
713 Group2Organism[linePieces[0]] = linePieces[i];
715 if (linePieces[i] != "NA") { allNA[headers[i]] = false; }
718 //does this sample already match an existing sample?
719 bool isOkaySample = true;
720 for (map<string, map<string, string> >:: iterator it2 = mimarks.begin(); it2 != mimarks.end(); it2++) {
721 if (m->control_pressed) { break; }
723 for (int i = 1; i < headers.size(); i++) {
724 if ((it2->second)[headers[i]] != categories[headers[i]]) { allSame = false; }
726 if (allSame) { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sample to " + it2->first + ". It has all the same attributes in the MIMarks file. Samples must have distinguishing features to be uploaded to the NCBI library, please correct.\n"); m->control_pressed = true; isOkaySample = false; }
728 if (isOkaySample) { mimarks[linePieces[0]] = categories; }
730 m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sampleName. Sample names must be unique, please correct.\n"); m->control_pressed = true;
736 //add in values for "scrap" group
737 map<string, string> categories;
738 //start after *sample_name
739 for (int i = 1; i < headers.size(); i++) {
740 categories[headers[i]] = "NA";
741 if (headers[i] == "organism") { categories[headers[i]] = "metagenome"; }
742 if (headers[i] == "seq_methods") { categories[headers[i]] = "these sequences were scrapped"; }
743 if (headers[i] == "title") { categories[headers[i]] = "these sequences were scrapped"; }
745 mimarks["scrap"] = categories;
746 Group2Organism["scrap"] = "metagenome";
749 string organismTypes = "";
750 for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; }
751 organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1];
752 m->mothurOut("\n[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n\n\n");
757 catch(exception& e) {
758 m->errorOut(e, "SRACommand", "readMIMarksFile");
763 //**********************************************************************************************************************
764 // going to have to rework this to allow for other options --
774 fastqfile1 oligosfile1
775 fastqfile2 oligosfile2
780 fastqfile fastqfile group
781 fastqfile fastqfile group
782 fastqfile fastqfile group
787 int SRACommand::readFile(map<string, vector<string> >& files){
789 //vector<string> theseFiles;
794 m->openInputFile(file, in);
798 if (m->control_pressed) { return 0; }
800 string line = m->getline(in); m->gobble(in);
801 vector<string> pieces = m->splitWhiteSpace(line);
804 string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
805 if (pieces.size() == 2) {
806 thisFileName1 = pieces[0];
807 thisFileName2 = pieces[1];
808 }else if (pieces.size() == 3) {
809 thisFileName1 = pieces[1];
810 thisFileName2 = pieces[2];
811 string group = pieces[0];
812 libLayout = "paired";
814 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
817 if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
819 //check to make sure both are able to be opened
821 int openForward = m->openInputFile(thisFileName1, in2, "noerror");
823 //if you can't open it, try default location
824 if (openForward == 1) {
825 if (m->getDefaultPath() != "") { //default path is set
826 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
827 m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
829 openForward = m->openInputFile(tryPath, in3, "noerror");
831 thisFileName1 = tryPath;
835 //if you can't open it, try output location
836 if (openForward == 1) {
837 if (m->getOutputDir() != "") { //default path is set
838 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
839 m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
841 openForward = m->openInputFile(tryPath, in4, "noerror");
842 thisFileName1 = tryPath;
847 if (openForward == 1) { //can't find it
848 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
849 }else{ in2.close(); }
854 openReverse = m->openInputFile(thisFileName2, in3, "noerror");
856 //if you can't open it, try default location
857 if (openReverse == 1) {
858 if (m->getDefaultPath() != "") { //default path is set
859 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
860 m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
862 openReverse = m->openInputFile(tryPath, in3, "noerror");
864 thisFileName2 = tryPath;
868 //if you can't open it, try output location
869 if (openReverse == 1) {
870 if (m->getOutputDir() != "") { //default path is set
871 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
872 m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
874 openReverse = m->openInputFile(tryPath, in4, "noerror");
875 thisFileName2 = tryPath;
880 if (openReverse == 1) { //can't find it
881 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
882 }else{ in3.close(); }
885 if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
887 int pos = thisFileName1.find(".sff");
888 if (pos != string::npos) {//these files are sff files
890 sfffile = thisFileName1; oligosfile = thisFileName2;
891 if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
893 if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
895 if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + sfffile + "\n"); }
898 fastqfile = thisFileName1; oligosfile = thisFileName2;
899 if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
901 if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
902 parseFastqFile(files);
903 if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + fastqfile + "\n"); }
906 }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
907 map<string, vector<string> >::iterator it = files.find(group);
908 if (it == files.end()) {
909 vector<string> temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp;
911 files[group].push_back(thisFileName1 + " " + thisFileName2);
921 catch(exception& e) {
922 m->errorOut(e, "SRACommand", "readFile");
926 //**********************************************************************************************************************
927 int SRACommand::parseSffFile(map<string, vector<string> >& files){
929 vector<string> theseFiles;
931 libLayout = "single"; //controlled vocab
934 //run sffinfo to parse sff file into individual sampled sff files
935 string commandString = "sff=" + sfffile;
937 commandString += ", oligos=" + oligosfile;
938 //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
939 if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
940 if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
941 if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
942 if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
943 if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
945 m->mothurOutEndLine();
946 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
947 m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
948 m->mothurCalling = true;
950 Command* sffinfoCommand = new SffInfoCommand(commandString);
951 sffinfoCommand->execute();
953 map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
954 map<string, vector<string> >::iterator it = filenames.find("sff");
955 if (it != filenames.end()) { theseFiles = it->second; }
956 else { m->control_pressed = true; } // error in sffinfo
958 delete sffinfoCommand;
959 m->mothurCalling = false;
960 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
962 mapGroupToFile(files, theseFiles);
966 catch(exception& e) {
967 m->errorOut(e, "SRACommand", "readFile");
972 //**********************************************************************************************************************
973 int SRACommand::parseFastqFile(map<string, vector<string> >& files){
975 vector<string> theseFiles;
976 inputfile = fastqfile;
977 libLayout = "single"; //controlled vocab
979 //run sffinfo to parse sff file into individual sampled sff files
980 string commandString = "fastq=" + fastqfile;
982 commandString += ", oligos=" + oligosfile;
983 //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
984 if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
985 if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
986 if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
987 if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
988 if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
990 m->mothurOutEndLine();
991 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
992 m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
993 m->mothurCalling = true;
995 Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
996 fastqinfoCommand->execute();
998 map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
999 map<string, vector<string> >::iterator it = filenames.find("fastq");
1000 if (it != filenames.end()) { theseFiles = it->second; }
1001 else { m->control_pressed = true; } // error in sffinfo
1003 delete fastqinfoCommand;
1004 m->mothurCalling = false;
1005 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1007 mapGroupToFile(files, theseFiles);
1011 catch(exception& e) {
1012 m->errorOut(e, "SRACommand", "readFile");
1016 //***************************************************************************************************************
1017 //maps group to file
1018 int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
1021 for (int i = 0; i < Groups.size(); i++) {
1024 for (int j = 0; j < theseFiles.size(); j++) {
1025 int pos = theseFiles[j].find(Groups[i]);
1026 if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
1027 if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
1033 if(matches.size() == 1) {
1034 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1035 if (it == files.end()) {
1036 vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
1038 files[Groups[i]].push_back(theseFiles[*matches.begin()]);
1044 catch(exception& e) {
1045 m->errorOut(e, "SRACommand", "checkGroups");
1050 //***************************************************************************************************************
1051 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
1052 int SRACommand::checkGroups(map<string, vector<string> >& files){
1054 vector<string> newGroups;
1055 for (int i = 0; i < Groups.size(); i++) {
1057 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1058 //no files for this group, remove it
1059 if (it == files.end()) { }
1060 else { newGroups.push_back(Groups[i]); }
1067 catch(exception& e) {
1068 m->errorOut(e, "SRACommand", "checkGroups");
1072 //***************************************************************************************************************
1073 int SRACommand::readOligos(){
1076 m->openInputFile(oligosfile, inOligos);
1078 string type, oligo, roligo, group;
1079 bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
1080 map<int, oligosPair> pairedBarcodes;
1081 map<int, oligosPair> pairedPrimers;
1082 map<string, int> barcodes;
1083 map<string, int> primers;
1084 vector<string> linker;
1085 vector<string> spacer, revPrimer;
1086 int indexPrimer = 0;
1087 int indexBarcode = 0;
1088 int indexPairedPrimer = 0;
1089 int indexPairedBarcode = 0;
1090 set<string> uniquePrimers;
1091 set<string> uniqueBarcodes;
1093 while(!inOligos.eof()){
1097 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1100 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
1101 m->gobble(inOligos);
1104 m->gobble(inOligos);
1105 //make type case insensitive
1106 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
1110 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1112 for(int i=0;i<oligo.length();i++){
1113 oligo[i] = toupper(oligo[i]);
1114 if(oligo[i] == 'U') { oligo[i] = 'T'; }
1117 if(type == "FORWARD"){
1120 // get rest of line in case there is a primer name
1121 while (!inOligos.eof()) {
1122 char c = inOligos.get();
1123 if (c == 10 || c == 13 || c == -1){ break; }
1124 else if (c == 32 || c == 9){;} //space or tab
1125 else { group += c; }
1128 //check for repeat barcodes
1129 map<string, int>::iterator itPrime = primers.find(oligo);
1130 if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1132 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
1134 primers[oligo] = indexPrimer; indexPrimer++;
1135 primerNameVector.push_back(group);
1137 else if (type == "PRIMER"){
1138 m->gobble(inOligos);
1142 for(int i=0;i<roligo.length();i++){
1143 roligo[i] = toupper(roligo[i]);
1144 if(roligo[i] == 'U') { roligo[i] = 'T'; }
1146 roligo = reverseOligo(roligo);
1150 // get rest of line in case there is a primer name
1151 while (!inOligos.eof()) {
1152 char c = inOligos.get();
1153 if (c == 10 || c == 13 || c == -1){ break; }
1154 else if (c == 32 || c == 9){;} //space or tab
1155 else { group += c; }
1158 oligosPair newPrimer(oligo, roligo);
1160 if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1162 //check for repeat barcodes
1163 string tempPair = oligo+roligo;
1164 if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
1165 else { uniquePrimers.insert(tempPair); }
1167 if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
1169 pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1170 primerNameVector.push_back(group);
1173 else if(type == "REVERSE"){
1174 //Sequence oligoRC("reverse", oligo);
1175 //oligoRC.reverseComplement();
1176 string oligoRC = reverseOligo(oligo);
1177 revPrimer.push_back(oligoRC);
1179 else if(type == "BARCODE"){
1182 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
1183 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
1186 while (!inOligos.eof()) {
1187 char c = inOligos.get();
1188 if (c == 10 || c == 13 || c == -1){ break; }
1189 else if (c == 32 || c == 9){;} //space or tab
1193 //then this is illumina data with 4 columns
1195 hasPairedBarcodes = true;
1196 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1199 for(int i=0;i<reverseBarcode.length();i++){
1200 reverseBarcode[i] = toupper(reverseBarcode[i]);
1201 if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
1204 reverseBarcode = reverseOligo(reverseBarcode);
1205 oligosPair newPair(oligo, reverseBarcode);
1207 if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1208 //check for repeat barcodes
1209 string tempPair = oligo+reverseBarcode;
1210 if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
1211 else { uniqueBarcodes.insert(tempPair); }
1213 pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1214 barcodeNameVector.push_back(group);
1216 //check for repeat barcodes
1217 map<string, int>::iterator itBar = barcodes.find(oligo);
1218 if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
1220 barcodes[oligo]=indexBarcode; indexBarcode++;
1221 barcodeNameVector.push_back(group);
1223 }else if(type == "LINKER"){
1224 linker.push_back(oligo);
1225 }else if(type == "SPACER"){
1226 spacer.push_back(oligo);
1228 else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1230 m->gobble(inOligos);
1234 if (hasPairedBarcodes || hasPrimer) {
1235 pairedOligos = true;
1236 if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
1240 //add in potential combos
1241 if(barcodeNameVector.size() == 0){
1242 barcodeNameVector.push_back("");
1245 if(primerNameVector.size() == 0){
1246 primerNameVector.push_back("");
1250 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1251 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1253 string primerName = primerNameVector[itPrimer->first];
1254 string barcodeName = barcodeNameVector[itBar->first];
1256 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1258 string comboGroupName = "";
1259 string fastqFileName = "";
1261 if(primerName == ""){
1262 comboGroupName = barcodeNameVector[itBar->first];
1265 if(barcodeName == ""){
1266 comboGroupName = primerNameVector[itPrimer->first];
1269 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1272 uniqueNames.insert(comboGroupName);
1274 map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
1275 if (itGroup2Barcode == Group2Barcode.end()) {
1276 vector<string> tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse);
1277 Group2Barcode[comboGroupName] = tempBarcodes;
1279 Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse);
1282 itGroup2Barcode = Group2Primer.find(comboGroupName);
1283 if (itGroup2Barcode == Group2Primer.end()) {
1284 vector<string> tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
1285 Group2Primer[comboGroupName] = tempPrimers;
1287 Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
1293 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1294 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1296 string primerName = primerNameVector[itPrimer->second];
1297 string barcodeName = barcodeNameVector[itBar->second];
1299 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1301 string comboGroupName = "";
1302 string fastqFileName = "";
1304 if(primerName == ""){
1305 comboGroupName = barcodeNameVector[itBar->second];
1308 if(barcodeName == ""){
1309 comboGroupName = primerNameVector[itPrimer->second];
1312 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1315 uniqueNames.insert(comboGroupName);
1317 map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
1318 if (itGroup2Barcode == Group2Barcode.end()) {
1319 vector<string> tempBarcodes; tempBarcodes.push_back(itBar->first);
1320 Group2Barcode[comboGroupName] = tempBarcodes;
1322 Group2Barcode[comboGroupName].push_back(itBar->first);
1325 itGroup2Barcode = Group2Primer.find(comboGroupName);
1326 if (itGroup2Barcode == Group2Primer.end()) {
1327 vector<string> tempPrimers; tempPrimers.push_back(itPrimer->first);
1328 Group2Primer[comboGroupName] = tempPrimers;
1330 Group2Primer[comboGroupName].push_back(itPrimer->first);
1338 if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
1344 catch(exception& e) {
1345 m->errorOut(e, "SRACommand", "readOligos");
1349 //********************************************************************/
1350 string SRACommand::reverseOligo(string oligo){
1352 string reverse = "";
1354 for(int i=oligo.length()-1;i>=0;i--){
1356 if(oligo[i] == 'A') { reverse += 'T'; }
1357 else if(oligo[i] == 'T'){ reverse += 'A'; }
1358 else if(oligo[i] == 'U'){ reverse += 'A'; }
1360 else if(oligo[i] == 'G'){ reverse += 'C'; }
1361 else if(oligo[i] == 'C'){ reverse += 'G'; }
1363 else if(oligo[i] == 'R'){ reverse += 'Y'; }
1364 else if(oligo[i] == 'Y'){ reverse += 'R'; }
1366 else if(oligo[i] == 'M'){ reverse += 'K'; }
1367 else if(oligo[i] == 'K'){ reverse += 'M'; }
1369 else if(oligo[i] == 'W'){ reverse += 'W'; }
1370 else if(oligo[i] == 'S'){ reverse += 'S'; }
1372 else if(oligo[i] == 'B'){ reverse += 'V'; }
1373 else if(oligo[i] == 'V'){ reverse += 'B'; }
1375 else if(oligo[i] == 'D'){ reverse += 'H'; }
1376 else if(oligo[i] == 'H'){ reverse += 'D'; }
1378 else { reverse += 'N'; }
1384 catch(exception& e) {
1385 m->errorOut(e, "SRACommand", "reverseOligo");
1389 //********************************************************************/
1390 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1391 bool SRACommand::checkCasesPlatforms(string& platform){
1393 string original = platform;
1396 //remove users possible case errors
1397 for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
1399 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1401 if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
1402 else { isOkay = false; }
1405 if (platform == "454") { platform = "_LS454"; }
1407 m->mothurOut("[ERROR]: " + original + " is not a valid platform option. Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
1412 catch(exception& e) {
1413 m->errorOut(e, "SRACommand", "checkCasesPlatforms");
1417 //********************************************************************/
1418 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
1419 bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
1421 string original = instrumentModel;
1424 //remove users possible case errors
1425 for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
1427 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1428 if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
1429 if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
1430 else { isOkay = false; }
1432 if (instrumentModel == "454_GS_FLX_TITANIUM") { instrumentModel = "454_GS_FLX_Titanium"; }
1433 if (instrumentModel == "454_GS_JUNIOR") { instrumentModel = "454_GS_Junior"; }
1434 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1436 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
1439 }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
1440 if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
1441 else { isOkay = false; }
1444 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER") { instrumentModel = "Illumina_Genome_Analyzer"; }
1445 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") { instrumentModel = "Illumina_Genome_Analyzer_II"; }
1446 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") { instrumentModel = "Illumina_Genome_Analyzer_IIx"; }
1447 if (instrumentModel == "ILLUMINA_HISEQ_2000") { instrumentModel = "Illumina_HiSeq_2000"; }
1448 if (instrumentModel == "ILLUMINA_HISEQ_1000") { instrumentModel = "Illumina_HiSeq_1000"; }
1449 if (instrumentModel == "ILLUMINA_MISEQ") { instrumentModel = "Illumina_MiSeq"; }
1450 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1452 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
1455 }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
1456 if ((instrumentModel == "ION_TORRENT_PGM") || (instrumentModel == "UNSPECIFIED")) { }
1457 else { isOkay = false; }
1460 if (instrumentModel == "ION_TORRENT_PGM") { instrumentModel = "Ion_Torrent_PGM"; }
1461 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1463 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
1465 }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
1466 if ((instrumentModel == "PACBIO_RS") || (instrumentModel == "UNSPECIFIED")) { }
1467 else { isOkay = false; }
1470 if (instrumentModel == "PACBIO_RS") { instrumentModel = "PacBio_RS"; }
1471 if (instrumentModel == "UNSPECIFIED") { instrumentModel = "unspecified"; }
1473 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform. Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
1478 catch(exception& e) {
1479 m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
1483 //**********************************************************************************************************************
1484 //AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER
1485 bool SRACommand::checkCasesLibStrategy(string& libStrategy){
1487 string original = libStrategy;
1490 //remove users possible case errors
1491 for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); }
1493 if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { }
1494 else { isOkay = false; }
1497 if (libStrategy == "RNA-SEQ") { libStrategy = "RNA-Seq"; }
1498 if (libStrategy == "MIRNA-SEQ") { libStrategy = "miRNA-Seq"; }
1499 if (libStrategy == "CHIP-SEQ") { libStrategy = "ChIP-Seq"; }
1500 if (libStrategy == "MNASE-SEQ") { libStrategy = "MNase-Seq"; }
1501 if (libStrategy == "DNASE-HYPERSENSITIVITY") { libStrategy = "DNase-Hypersensitivity"; }
1502 if (libStrategy == "BISULFITE-SEQ") { libStrategy = "Bisulfite-Seq"; }
1503 if (libStrategy == "TN-SEQ") { libStrategy = "Tn-Seq"; }
1504 if (libStrategy == "FL-CDNA") { libStrategy = "FL-cDNA"; }
1505 if (libStrategy == "MRE-SEQ") { libStrategy = "MRE-Seq"; }
1506 if (libStrategy == "MEDIP-SEQ") { libStrategy = "MeDIP-Seq"; }
1508 m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option. Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true;
1513 catch(exception& e) {
1514 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1519 //**********************************************************************************************************************
1520 //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
1521 bool SRACommand::checkCasesLibSource(string& libSource){
1523 string original = libSource;
1526 //remove users possible case errors
1527 for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); }
1529 if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { }
1530 else { isOkay = false; }
1535 m->mothurOut("[ERROR]: " + original + " is not a valid libsource option. Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true;
1540 catch(exception& e) {
1541 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1546 //**********************************************************************************************************************
1547 //PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified
1548 bool SRACommand::checkCasesLibSelection(string& libSelection){
1550 string original = libSelection;
1553 //remove users possible case errors
1554 for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); }
1556 if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { }
1557 else { isOkay = false; }
1560 if (libSelection == "CDNA") { libSelection = "cDNA"; }
1561 if (libSelection == "CHIP") { libSelection = "ChIP"; }
1562 if (libSelection == "MNASE") { libSelection = "MNase"; }
1563 if (libSelection == "DNASE") { libSelection = "DNAse"; }
1564 if (libSelection == "HYBRID_SELECTION") { libSelection = "Hybrid_Selection"; }
1565 if (libSelection == "REDUCED_REPRESENTATION") { libSelection = "Reduced_Representation"; }
1566 if (libSelection == "RESTRICTION_DIGEST") { libSelection = "Restriction_Digest"; }
1567 if (libSelection == "5-METHYLCYTIDINE_ANTIBODY") { libSelection = "5-methylcytidine_antibody"; }
1568 if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") { libSelection = "MBD2_protein_methyl-CpG_binding_domain"; }
1569 if (libSelection == "SIZE_FRACTIONATION") { libSelection = "size_fractionation"; }
1570 if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") { libSelection = "Padlock_probes_capture_method"; }
1571 if (libSelection == "OTHER") { libSelection = "other"; }
1572 if (libSelection == "UNSPECIFIED") { libSelection = "unspecified"; }
1575 m->mothurOut("[ERROR]: " + original + " is not a valid libselection option. Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true;
1580 catch(exception& e) {
1581 m->errorOut(e, "SRACommand", "checkCasesLibSelection");
1585 //**********************************************************************************************************************
1586 //METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER
1587 bool SRACommand::checkCasesDataType(string& dataType){
1589 string original = dataType;
1592 //remove users possible case errors
1593 for (int i = 0; i < dataType.size(); i++) { dataType[i] = toupper(dataType[i]); }
1595 if ((dataType == "METAGENOME") || (dataType == "GENOME_SEQUENCING") || (dataType == "METAGENOMIC_ASSEMBLY") || (dataType == "ASSEMBLY") || (dataType == "TRANSCRIPTOME") || (dataType == "PROTEOMIC") || (dataType == "MAP") || (dataType == "CLONE_ENDS") || (dataType == "TARGETED_LOCI") || (dataType == "RANDOM_SURVEY") || (dataType == "EXOME") || (dataType == "VARIATION") || (dataType == "EPIGENOMICS") || (dataType == "PHENOTYPE") || (dataType == "GENOTYPE") || (dataType == "OTHER")) { }
1596 else { isOkay = false; }
1601 m->mothurOut("[ERROR]: " + original + " is not a valid datatype option. Valid datatype options are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER."); m->mothurOutEndLine(); abort = true;
1606 catch(exception& e) {
1607 m->errorOut(e, "SRACommand", "checkCasesDataType");
1611 //**********************************************************************************************************************
1612 bool SRACommand::sanityCheckMiMarksGroups(){
1616 for (int i = 0; i < Groups.size(); i++) {
1617 if (m->control_pressed) { break; }
1619 map<string, map<string, string> >::iterator it = mimarks.find(Groups[i]);
1620 if (it == mimarks.end()) {
1622 m->mothurOut("[ERROR]: MIMarks file is missing group " + Groups[i] + ", please correct.\n");
1626 if (!isOkay) { m->control_pressed = true; }
1630 catch(exception& e) {
1631 m->errorOut(e, "SRACommand", "sanityCheckMiMarksGroups");
1636 //**********************************************************************************************************************