]> git.donarmstrong.com Git - mothur.git/blob - sracommand.cpp
02e98991c892e922bc7c5a8f8234739e8b42c40a
[mothur.git] / sracommand.cpp
1 //
2 //  sracommand.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 10/28/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "sracommand.h"
10 #include "sffinfocommand.h"
11 #include "parsefastaqcommand.h"
12
13 //**********************************************************************************************************************
14 vector<string> SRACommand::setParameters(){
15         try {
16         CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
17         CommandParameter poligos("oligos", "InputTypes", "", "", "oligos", "none", "none","",false,false,true); parameters.push_back(poligos);
18         CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
19                 CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
20         CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
21         CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
22         //choose only one multiple options
23         CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
24         CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
25         CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
26         CommandParameter pdatatype("datatype", "String", "METAGENOME", "", "", "", "","",false,false); parameters.push_back(pdatatype);
27         CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
28         CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
29         CommandParameter porientation("orientation", "Multiple", "forward-reverse", "forward", "", "", "","",false,false); parameters.push_back(porientation);
30         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
31                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
32         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
33                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
34         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
35         
36          //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
39                 
40                 vector<string> myArray;
41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "SRACommand", "setParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string SRACommand::getHelpString(){
51         try {
52                 string helpString = "";
53                 helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
54                 helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
55         helpString += "The sff parameter is used to provide the original sff file.\n";
56                 helpString += "The fastq parameter is used to provide the original fastq file.\n";
57         helpString += "The project parameter is used to provide your project file.\n";
58         helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by. It is required and must contain barcodes and primers, or you must provide a file option. \n";
59         helpString += "The mimark parameter is used to provide your mimarks file.  You can create the template for this file using the get.mimarkspackage command.\n";
60                 helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
61         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
62                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
63                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
64         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
65                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
66         helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
67         helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
68         helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
69         helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated.  \n";
70         helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
71         helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
72         helpString += "The datatype parameter is used to specify datatype. Default=METAGENOME. Choices are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
73                 helpString += "sra(sff=sff=GHL4YHV01.sff, GHL4YHV01.oligos, project=test.project, mimark=MIMarksData.txt)\n";
74                 return helpString;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "SRACommand", "getHelpString");
78                 exit(1);
79         }
80 }
81 //**********************************************************************************************************************
82 string SRACommand::getOutputPattern(string type) {
83     try {
84         string pattern = "";
85         
86         if (type == "xml") {  pattern = "[filename],xml"; }
87         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
88         
89         return pattern;
90     }
91     catch(exception& e) {
92         m->errorOut(e, "SRACommand", "getOutputPattern");
93         exit(1);
94     }
95 }
96 //**********************************************************************************************************************
97 SRACommand::SRACommand(){
98         try {
99                 abort = true; calledHelp = true;
100                 setParameters();
101         vector<string> tempOutNames;
102                 outputTypes["xml"] = tempOutNames;
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "SRACommand", "SRACommand");
106                 exit(1);
107         }
108 }
109 //**********************************************************************************************************************
110 SRACommand::SRACommand(string option)  {
111         try {
112                 abort = false; calledHelp = false;
113         libLayout = "single"; //controlled vocab
114                 
115                 //allow user to run help
116                 if(option == "help") { help(); abort = true; calledHelp = true; }
117                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
118                 
119                 else {
120                         //valid paramters for this command
121                         vector<string> myArray = setParameters();
122                         
123                         OptionParser parser(option);
124                         map<string,string> parameters = parser.getParameters();
125                         
126                         ValidParameters validParameter;
127                         map<string,string>::iterator it;
128                         //check to make sure all parameters are valid for command
129                         for (it = parameters.begin(); it != parameters.end(); it++) {
130                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
131                         }
132                         
133             vector<string> tempOutNames;
134             outputTypes["xml"] = tempOutNames;
135                         
136                         //if the user changes the input directory command factory will send this info to us in the output parameter
137                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
138                         if (inputDir == "not found"){   inputDir = "";          }
139                         else {
140             
141                 string path;
142                                 it = parameters.find("sff");
143                                 //user has given a template file
144                                 if(it != parameters.end()){
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["sff"] = inputDir + it->second;              }
148                                 }
149                                 
150                                 it = parameters.find("fastq");
151                                 //user has given a template file
152                                 if(it != parameters.end()){
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
156                                 }
157                 
158                 it = parameters.find("file");
159                                 //user has given a template file
160                                 if(it != parameters.end()){
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
164                                 }
165                 
166                 it = parameters.find("oligos");
167                                 //user has given a template file
168                                 if(it != parameters.end()){
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
172                                 }
173                 
174                 it = parameters.find("project");
175                                 //user has given a template file
176                                 if(it != parameters.end()){
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["project"] = inputDir + it->second;          }
180                                 }
181                 
182                 it = parameters.find("mimark");
183                                 //user has given a template file
184                                 if(it != parameters.end()){
185                                         path = m->hasPath(it->second);
186                                         //if the user has not given a path then, add inputdir. else leave path alone.
187                                         if (path == "") {       parameters["mimark"] = inputDir + it->second;           }
188                                 }
189             }
190             
191                         //check for parameters
192             fastqfile = validParameter.validFile(parameters, "fastq", true);
193                         if (fastqfile == "not open") { fastqfile = "";  abort = true; }
194                         else if (fastqfile == "not found") { fastqfile = ""; }
195                         
196                         sfffile = validParameter.validFile(parameters, "sff", true);
197                         if (sfffile == "not open") {  sfffile = "";  abort = true; }
198                         else if (sfffile == "not found") { sfffile = ""; }
199             
200             file = validParameter.validFile(parameters, "file", true);
201                         if (file == "not open") {  file = "";  abort = true; }
202                         else if (file == "not found") { file = ""; }
203             
204             oligosfile = validParameter.validFile(parameters, "oligos", true);
205                         if (oligosfile == "not found")      {  oligosfile = "";     }
206             else if(oligosfile == "not open")   {       abort = true;           }
207                         else {  m->setOligosFile(oligosfile); }
208             
209             contactfile = validParameter.validFile(parameters, "project", true);
210                         if (contactfile == "not found")      {  contactfile = ""; m->mothurOut("[ERROR]: You must provide a project file before you can use the sra command."); m->mothurOutEndLine(); abort = true;    }
211                         else if(contactfile == "not open")      {       abort = true;           }
212             
213             mimarksfile = validParameter.validFile(parameters, "mimark", true);
214                         if (mimarksfile == "not found")      {  mimarksfile = ""; m->mothurOut("[ERROR]: You must provide a mimark file before you can use the sra command. You can create a template for this file using the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true;       }
215                         else if(mimarksfile == "not open")      {       abort = true;           }
216             
217             file = validParameter.validFile(parameters, "file", true);
218                         if (file == "not open") {  file = "";  abort = true; }
219                         else if (file == "not found") { file = ""; }
220                         
221             if ((file == "") && (oligosfile == "")) {
222                 m->mothurOut("[ERROR]: You must provide an oligos file or file with oligos files in them before you can use the sra command."); m->mothurOutEndLine(); abort = true;
223             }
224             
225                         if ((fastqfile == "") && (file == "") && (sfffile == "")) {
226                 m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
227             }
228             
229             //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
230                         platform = validParameter.validFile(parameters, "platform", false);         if (platform == "not found") { platform = "_LS454"; }
231                         if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
232                                  
233             if (!abort) { //don't check instrument model is platform is bad
234                 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
235                 instrumentModel = validParameter.validFile(parameters, "instrument", false);         if (instrumentModel == "not found") { instrumentModel = "454_GS"; }
236                 if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function
237             }
238             //turn _ to spaces mothur's work around
239             for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } }
240             
241             libStrategy = validParameter.validFile(parameters, "libstrategy", false);         if (libStrategy == "not found") { libStrategy = "AMPLICON"; }
242             if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function
243
244             //turn _ to spaces mothur's work around
245             for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; }  }
246             
247             libSource = validParameter.validFile(parameters, "libsource", false);         if (libSource == "not found") { libSource = "METAGENOMIC"; }
248             if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function
249             
250             //turn _ to spaces mothur's work around
251             for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; }  }
252             
253             libSelection = validParameter.validFile(parameters, "libselection", false);         if (libSelection == "not found") { libSelection = "PCR"; }
254             if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function
255             
256             //turn _ to spaces mothur's work around
257             for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; }  }
258             
259             dataType = validParameter.validFile(parameters, "datatype", false);         if (dataType == "not found") { dataType = "METAGENOME"; }
260             if (!checkCasesDataType(dataType)) { abort = true; } //error message in function
261             
262             //turn _ to spaces mothur's work around
263             for (int i = 0; i < dataType.length(); i++) { if (dataType[i] == '_') { dataType[i] = ' '; }  }
264             
265             orientation = validParameter.validFile(parameters, "orientation", false);         if (orientation == "not found") { orientation = "forward"; }
266             
267             if ((orientation == "forward") || (orientation == "reverse")) {  }
268             else {  m->mothurOut("[ERROR]: " + orientation + " is not a valid orientation option. Choices are: forward and reverse.\n"); m->mothurOutEndLine(); abort = true; }
269
270             
271             string temp = validParameter.validFile(parameters, "bdiffs", false);                if (temp == "not found"){       temp = "0";             }
272                         m->mothurConvert(temp, bdiffs);
273                         
274                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
275                         m->mothurConvert(temp, pdiffs);
276                         
277             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
278                         m->mothurConvert(temp, ldiffs);
279             
280             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
281                         m->mothurConvert(temp, sdiffs);
282                         
283                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
284                         m->mothurConvert(temp, tdiffs);
285                         
286                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
287                                 
288                 }
289                 
290         }
291         catch(exception& e) {
292                 m->errorOut(e, "SRACommand", "SRACommand");
293                 exit(1);
294         }
295 }
296 //**********************************************************************************************************************
297 int SRACommand::execute(){
298         try {
299                 
300                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
301         
302         readContactFile();
303         readMIMarksFile();
304         if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
305         
306         if (m->control_pressed) { return 0; }
307         
308         //parse files
309         map<string, vector<string> > filesBySample;
310         isSFF = false;
311         
312         if (file != "")             {       readFile(filesBySample);        }
313         else if (sfffile != "")     {       parseSffFile(filesBySample);    }
314         else if (fastqfile != "")   {       parseFastqFile(filesBySample);  }
315         
316         sanityCheckMiMarksGroups();
317         
318         //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
319         checkGroups(filesBySample);
320         
321         //create xml file
322         string thisOutputDir = outputDir;
323         if (outputDir == "") {  thisOutputDir += m->hasPath(inputfile);  }
324                 map<string, string> variables;
325         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile));
326         string outputFileName = getOutputFileName("xml", variables);
327         outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName);
328         ofstream out;
329         m->openOutputFile(outputFileName, out);
330         
331         //contacts portion
332         ////////////////////////////////////////////////////////
333         out << "<Submission>\n";
334         out << "\t<Description>\n";
335         out << "\t\t<Comment> New Submission. Generated by mothur version " + m->getVersion() + " </Comment> \n";
336         out << "\t\t<Submitter user_name=\"" + submissionName + "\"/>\n";
337         out << "\t\t<Organization type=\"" + centerType + "\">\n";
338         out << "\t\t<Name>" + centerName + "</Name>\n";
339         out << "\t\t<Contact> email=\"" + email + "\">\n";
340         out << "\t\t\t<Name>\n";
341         out << "\t\t\t\t<First>" + firstName + "</First>\n";
342         out << "\t\t\t\t<Last>" + firstName + "</Last>\n";
343         out << "\t\t\t</Name>\n";
344         out << "\t\t</Contact>\n";
345         out << "\t\t</Organization>\n";
346         out << "\t</Description>\n";
347         ////////////////////////////////////////////////////////
348         
349         //bioproject
350         ////////////////////////////////////////////////////////
351         out << "\t<Action>\n";
352         out << "\t\t<AddData target_db=\"BioProject\">\n";
353         out << "\t\t\t<Data content_type=\"XML\">\n";
354         out << "\t\t\t\t<XmlContent>\n";
355         out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
356         out << "\t\t\t\t\t\t<ProjectID>\n";
357         out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
358         out << "\t\t\t\t\t\t</ProjectID>\n";
359         out << "\t\t\t\t\t\t<Descriptor>\n";
360         out << "\t\t\t\t\t\t\t<Title>" + projectTitle + " </Title> \n";
361         out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
362         if (website != "") {
363             out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
364             out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
365             out << "\t\t\t\t\t\t\t</ExternalLink>\n";
366         }
367         out << "\t\t\t\t\t\t</Descriptor>\n";
368         out << "\t\t\t\t\t\t<ProjectType>\n";
369         out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eEnvironment\">\n";
370         out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
371         out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n";
372         out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
373         out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
374         out << "\t\t\t\t\t\t</ProjectType>\n";
375         out << "\t\t\t\t\t</Project>\n";
376         out << "\t\t\t\t</XmlContent>\n";
377         out << "\t\t\t</Data>\n";
378         out << "\t\t\t<Identifier>\n";
379         out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
380         out << "\t\t\t</Identifier>\n";
381         out << "\t\t</AddData>\n";
382         out << "\t</Action>\n";
383         ////////////////////////////////////////////////////////
384         
385         //bioSample
386         ////////////////////////////////////////////////////////
387         for (int i = 0; i < Groups.size(); i++) {
388             
389             string barcodeForThisSample = Group2Barcode[Groups[i]];
390             
391             if (m->control_pressed) { break; }
392             out << "\t<Action>\n";
393             out << "\t\t<AddData target_db=\"BioSample\">\n";
394             out << "\t\t\t<Data content_type=\"XML\">\n";
395             out << "\t\t\t\t<XmlContent>\n";
396             out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
397             out << "\t\t\t\t\t\t<SampleId>\n";
398             out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID> \n";
399             out << "\t\t\t\t\t\t</SampleId>\n";
400             out << "\t\t\t\t\t\t<Organism>\n";
401             string organismName = "metagenome";
402             map<string, string>::iterator itOrganism = Group2Organism.find(Groups[i]);
403             if (itOrganism != Group2Organism.end()) { organismName = itOrganism->second; } //user supplied acceptable organism, so use it.
404             out << "\t\t\t\t\t\t\t<OrganismName>" + organismName + " </OrganismName> \n";
405             out << "\t\t\t\t\t\t</Organism>\n";
406             out << "\t\t\t\t\t\t<Package>" + packageType + "</Package>n";
407             out << "\t\t\t\t\t\t<Attributes>n";
408             //add biosample required attributes
409             map<string, map<string, string> >:: iterator it = mimarks.find(Groups[i]);
410             if (it != mimarks.end()) {
411                 map<string, string> categories = it->second;
412                 for (map<string, string>:: iterator it2 = categories.begin(); it2 != categories.end(); it2++) {
413                     if (m->control_pressed) { break; }
414                     out << "\t\t\t\t\t\t\t<Attribute attribute_name=\"" + it2->first + "\">\"" + it2->second + "\"</Attribute>\n";
415                 }
416             }
417             out << "\t\t\t\t\t\t</Attributes>n";
418             out << "\t\t\t\t\t</BioSample>\n";
419             out << "\t\t\t\t</XmlContent>\n";
420             out << "\t\t\t</Data>\n";
421             out << "\t\t\t<Identifier>\n";
422             out << "\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
423             out << "\t\t\t</Identifier>\n";
424             out << "\t\t</AddData>\n";
425             out << "\t</Action>\n";
426         }
427         
428         //File objects
429         ////////////////////////////////////////////////////////
430         for (int i = 0; i < Groups.size(); i++) {
431             
432             vector<string> thisGroupsFiles = filesBySample[Groups[i]];
433             string barcodeForThisSample = Group2Barcode[Groups[i]];
434             
435             for (int j = 0; j < thisGroupsFiles.size(); j++) {
436                 string libId = thisGroupsFiles[j] + "." + barcodeForThisSample;
437                 
438                 if (m->control_pressed) { break; }
439                 out << "\t<Action>\n";
440                 out << "\t\t<AddFiles target_db=\"SRA\">\n";
441                 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
442                     vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
443                     libId = pieces[0] + barcodeForThisSample;
444                     out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
445                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
446                     out << "\t\t\t</File>\n";
447                     vector<string> thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]], thisBarcodes, '.');
448                     string forwardBarcode = thisBarcodes[0];
449                     string reverseBarcode = thisBarcodes[1];
450                     vector<string> thisPrimers; m->splitAtChar(Group2Primer[Groups[i]], thisPrimers, '.');
451                     string forwardPrimer = thisPrimers[0];
452                     string reversePrimer = thisPrimers[1];
453                     //attributes
454                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
455                     out << "\t\t\t<Attribute name=\"BarCode\">" + forwardBarcode + "</Attribute>\n";
456                     out << "\t\t\t<Attribute name=\"primer\">" + forwardPrimer + "</Attribute>\n";
457                     out << "\t\t\t<Attribute name=\"read_type\">forward</Attribute>\n";
458                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
459                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
460                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
461                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
462                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
463                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
464                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
465
466                     out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
467                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
468                     out << "\t\t\t</File>\n";
469                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
470                     out << "\t\t\t<Attribute name=\"BarCode\">" + reverseBarcode + "</Attribute>\n";
471                     out << "\t\t\t<Attribute name=\"primer\">" + reversePrimer + "</Attribute>\n";
472                     out << "\t\t\t<Attribute name=\"read_type\">reverse</Attribute>\n";
473                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
474                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
475                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
476                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
477                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
478                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
479                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
480
481                 }else { //single
482                     out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
483                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
484                     out << "\t\t\t</File>\n";
485                     //attributes
486                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
487                     out << "\t\t\t<Attribute name=\"BarCode\">" + Group2Barcode[Groups[i]] + "</Attribute>\n";
488                     out << "\t\t\t<Attribute name=\"primer\">" + Group2Primer[Groups[i]] + "</Attribute>\n";
489                     out << "\t\t\t<Attribute name=\"read_type\">" + orientation + "</Attribute>\n";
490                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
491                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
492                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
493                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
494                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
495                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
496                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
497
498                 }
499                 ///////////////////bioProject info
500                 out << "\t\t\t<AttributeRefId name=\"BioProject\">\n";
501                 out << "\t\t\t\t<RefId>\n";
502                 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
503                 out << "\t\t\t\t</RefId>\n";
504                 out << "\t\t\t</AttributeRefId>\n";
505                 //////////////////bioSample info
506                 out << "\t\t\t<AttributeRefId name=\"BioSample\">\n";
507                 out << "\t\t\t\t<RefId>\n";
508                 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
509                 out << "\t\t\t\t</RefId>\n";
510                 out << "\t\t\t</AttributeRefId>\n";
511                 //libID
512                 out << "\t\t\t<Identifier>\n";
513                 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
514                     vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
515                     libId = pieces[0] + barcodeForThisSample;
516                 }
517                 out << "\t\t\t\t<LocalId>" + libId + " </LocalId>\n";
518                 out << "\t\t\t</Identifier>\n";
519                 out << "\t\t</AddFiles>\n";
520                 out << "\t</Action>\n";
521             }
522         }
523         out << "</Submission>\n";
524         out.close();
525         
526         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
527                 
528         //output files created by command
529                 m->mothurOutEndLine();
530                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
531                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
532                 m->mothurOutEndLine();
533         return 0;
534                 
535     }
536         catch(exception& e) {
537                 m->errorOut(e, "SRACommand", "SRACommand");
538                 exit(1);
539         }
540 }
541 //**********************************************************************************************************************
542 int SRACommand::readContactFile(){
543         try {
544         lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; website = ""; projectName = "";
545         projectTitle = ""; grantAgency = ""; grantId = ""; grantTitle = "";
546         
547         ifstream in;
548         m->openInputFile(contactfile, in);
549         
550         while(!in.eof()) {
551             
552             if (m->control_pressed) { break; }
553             
554             string key, value;
555             in >> key; m->gobble(in);
556             value = m->getline(in); m->gobble(in);
557             
558             for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
559             
560             if (key == "USERNAME")          {   submissionName = value; }
561             else if (key == "LAST")         {   lastName = value;       }
562             else if (key == "FIRST")        {   firstName = value;      }
563             else if (key == "EMAIL")        {   email = value;          }
564             else if (key == "CENTER")       {   centerName = value;     }
565             else if (key == "TYPE")         {
566                 centerType = value;
567                 for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
568                 if ((centerType == "consortium") || (centerType == "center") ||  (centerType == "institute") ||  (centerType == "lab")) {}
569                 else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option.  Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
570             }else if (key == "DESCRIPTION")     {   description = value;    }
571             else if (key == "WEBSITE")          {   website = value;        }
572             else if (key == "PROJECTNAME")      {   projectName = value;    }
573             else if (key == "PROJECTTITLE")     {   projectTitle = value;   }
574             else if (key == "GRANTID")          {   grantId = value;        }
575             else if (key == "GRANTTITLE")       {   grantTitle = value;     }
576             else if (key == "GRANTAGENCY")      {   grantAgency = value;    }
577         }
578         in.close();
579         
580         if (lastName == "") { m->mothurOut("[ERROR]: missing last name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
581         if (firstName == "") { m->mothurOut("[ERROR]: missing first name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
582         if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
583         if (email == "") { m->mothurOut("[ERROR]: missing email from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
584         if (centerName == "") { m->mothurOut("[ERROR]: missing center name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
585         if (centerType == "") { m->mothurOut("[ERROR]: missing center type from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
586         if (description == "") { m->mothurOut("[ERROR]: missing description from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
587         if (projectTitle == "") { m->mothurOut("[ERROR]: missing project title from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
588         if (projectName == "") { m->mothurOut("[ERROR]: missing project name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
589
590         return 0;
591     }
592         catch(exception& e) {
593                 m->errorOut(e, "SRACommand", "readContactFile");
594                 exit(1);
595         }
596 }
597 //**********************************************************************************************************************
598 //air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water
599 //all packages require: *sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon
600 //air: *altitude
601 //host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, plant_associated: *host
602 //microbial, sediment, soil: *depth     *elev
603 //water: *depth
604 int SRACommand::readMIMarksFile(){
605         try {
606         //acceptable organisms
607         vector<string> acceptableOrganisms;
608         bool organismError = false;
609         //ecological
610         acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("air metagenome"); acceptableOrganisms.push_back("anaerobic digester metagenome"); acceptableOrganisms.push_back("ant fungus garden metagenome"); acceptableOrganisms.push_back("aquatic metagenome"); acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("beach sand metagenome"); acceptableOrganisms.push_back("biofilm metagenome"); acceptableOrganisms.push_back("biofilter metagenome"); acceptableOrganisms.push_back("biogas fermenter metagenome"); acceptableOrganisms.push_back("bioreactor metagenome"); acceptableOrganisms.push_back("bioreactor sludge metagenome"); acceptableOrganisms.push_back("clinical metagenome"); acceptableOrganisms.push_back("coal metagenome"); acceptableOrganisms.push_back("compost metagenome"); acceptableOrganisms.push_back("dust metagenome"); acceptableOrganisms.push_back("fermentation metagenome"); acceptableOrganisms.push_back("food fermentation metagenome"); acceptableOrganisms.push_back("food metagenome"); acceptableOrganisms.push_back("freshwater metagenome"); acceptableOrganisms.push_back("freshwater sediment metagenome"); acceptableOrganisms.push_back("groundwater metagenome"); acceptableOrganisms.push_back("halite metagenome"); acceptableOrganisms.push_back("hot springs metagenome"); acceptableOrganisms.push_back("hydrocarbon metagenome"); acceptableOrganisms.push_back("hydrothermal vent metagenome"); acceptableOrganisms.push_back("hypersaline lake metagenome"); acceptableOrganisms.push_back("ice metagenome"); acceptableOrganisms.push_back("indoor metagenome"); acceptableOrganisms.push_back("industrial waste metagenome"); acceptableOrganisms.push_back("mangrove metagenome"); acceptableOrganisms.push_back("marine metagenome"); acceptableOrganisms.push_back("marine sediment metagenome"); acceptableOrganisms.push_back("microbial mat metagenome"); acceptableOrganisms.push_back("mine drainage metagenome"); acceptableOrganisms.push_back("mixed culture metagenome"); acceptableOrganisms.push_back("oil production facility metagenome"); acceptableOrganisms.push_back("paper pulp metagenome"); acceptableOrganisms.push_back("permafrost metagenome"); acceptableOrganisms.push_back("plastisphere metagenome"); acceptableOrganisms.push_back("power plant metagenome"); acceptableOrganisms.push_back("retting rhizosphere metagenome"); acceptableOrganisms.push_back("rock metagenome"); acceptableOrganisms.push_back("salt lake metagenome"); acceptableOrganisms.push_back("saltern metagenome"); acceptableOrganisms.push_back("sediment metagenome"); acceptableOrganisms.push_back("snow metagenome"); acceptableOrganisms.push_back("soil metagenome"); acceptableOrganisms.push_back("stromatolite metagenome"); acceptableOrganisms.push_back("terrestrial metagenome"); acceptableOrganisms.push_back("tomb wall metagenome"); acceptableOrganisms.push_back("wastewater metagenome"); acceptableOrganisms.push_back("wetland metagenome"); acceptableOrganisms.push_back("whale fall metagenome");
611         //oganismal
612         acceptableOrganisms.push_back("algae metagenome"); acceptableOrganisms.push_back("ant metagenome"); acceptableOrganisms.push_back("bat metagenome"); acceptableOrganisms.push_back("beetle metagenome"); acceptableOrganisms.push_back("bovine gut metagenome"); acceptableOrganisms.push_back("bovine metagenome"); acceptableOrganisms.push_back("chicken gut metagenome"); acceptableOrganisms.push_back("coral metagenome"); acceptableOrganisms.push_back("echinoderm metagenome"); acceptableOrganisms.push_back("endophyte metagenome"); acceptableOrganisms.push_back("epibiont metagenome"); acceptableOrganisms.push_back("fish metagenome"); acceptableOrganisms.push_back("fossil metagenome"); acceptableOrganisms.push_back("gill metagenome"); acceptableOrganisms.push_back("gut metagenome"); acceptableOrganisms.push_back("honeybee metagenome"); acceptableOrganisms.push_back("human gut metagenome"); acceptableOrganisms.push_back("human lung metagenome"); acceptableOrganisms.push_back("human metagenome"); acceptableOrganisms.push_back("human nasal/pharyngeal metagenome"); acceptableOrganisms.push_back("human oral metagenome"); acceptableOrganisms.push_back("human skin metagenome"); acceptableOrganisms.push_back("insect gut metagenome"); acceptableOrganisms.push_back("insect metagenome"); acceptableOrganisms.push_back("mollusc metagenome"); acceptableOrganisms.push_back("mosquito metagenome"); acceptableOrganisms.push_back("mouse gut metagenome"); acceptableOrganisms.push_back("mouse metagenome"); acceptableOrganisms.push_back("mouse skin metagenome"); acceptableOrganisms.push_back("nematode metagenome"); acceptableOrganisms.push_back("oral metagenome"); acceptableOrganisms.push_back("phyllosphere metagenome"); acceptableOrganisms.push_back("pig metagenome"); acceptableOrganisms.push_back("plant metagenome"); acceptableOrganisms.push_back("primate metagenome"); acceptableOrganisms.push_back("rat metagenome"); acceptableOrganisms.push_back("root metagenome"); acceptableOrganisms.push_back("sea squirt metagenome"); acceptableOrganisms.push_back("seed metagenome"); acceptableOrganisms.push_back("shoot metagenome"); acceptableOrganisms.push_back("skin metagenome"); acceptableOrganisms.push_back("snake metagenome"); acceptableOrganisms.push_back("sponge metagenome"); acceptableOrganisms.push_back("stomach metagenome"); acceptableOrganisms.push_back("symbiont metagenome"); acceptableOrganisms.push_back("termite gut metagenome"); acceptableOrganisms.push_back("termite metagenome"); acceptableOrganisms.push_back("upper respiratory tract metagenome"); acceptableOrganisms.push_back("urine metagenome"); acceptableOrganisms.push_back("viral metagenome"); acceptableOrganisms.push_back("wallaby gut metagenome"); acceptableOrganisms.push_back("wasp metagenome"); acceptableOrganisms.push_back("sythetic metagenome"); acceptableOrganisms.push_back("metagenome");
613         
614         vector<string> requiredFieldsForPackage;
615         requiredFieldsForPackage.push_back("sample_name"); requiredFieldsForPackage.push_back("organism");
616         requiredFieldsForPackage.push_back("collection_date"); requiredFieldsForPackage.push_back("biome");
617         requiredFieldsForPackage.push_back("feature"); requiredFieldsForPackage.push_back("material");
618         requiredFieldsForPackage.push_back("geo_loc_name"); requiredFieldsForPackage.push_back("lat_lon");
619         requiredFieldsForPackage.push_back("seq_methods"); requiredFieldsForPackage.push_back("title");
620         vector<string> chooseAtLeastOneForPackage;
621         
622         ifstream in;
623         m->openInputFile(mimarksfile, in);
624         
625         //read comments
626         string temp; packageType = "";
627         while(!in.eof()) {
628             
629             if (m->control_pressed) { break; }
630             temp = m->getline(in); m->gobble(in);
631             
632             if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
633             
634             if (temp[0] == '#') {
635                 int pos = temp.find("Environmental");
636                 if (pos != string::npos) {
637                     for (int i = pos+14; i < temp.length(); i++) {
638                         if (!isspace(temp[i])) { packageType += temp[i]; }
639                         else { i+= temp.length(); }
640                     }
641                 }
642             }
643             else{ break; } //hit headers line
644          }
645         
646         vector<string> headers; m->splitAtChar(temp, headers, '\t');
647         m->removeBlanks(headers);
648         //remove * from required's
649         for (int i = 0; i < headers.size(); i++) {
650             if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); }
651             if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); chooseAtLeastOneForPackage.push_back(headers[i]); }  //secondary condition
652             if (m->debug) { m->mothurOut("[DEBUG]: " + headers[i] + "\n"); }
653         }
654         
655         if (m->debug) {  m->mothurOut("[DEBUG]: packageType = '" + packageType + "'\n");   }
656         
657         //check to make sure package has all its required parts
658         //MIMARKS.specimen.water.3.0
659         if (packageType == "MIMARKS.specimen.air.3.0") {   requiredFieldsForPackage.push_back("altitude");  }
660         else if ((packageType == "MIMARKS.specimen.host-associated.3.0") || (packageType == "MIMARKS.specimen.human-associated.3.0") || (packageType == "MIMARKS.specimen.human-gut.3.0") || (packageType == "MIMARKS.specimen.human-oral.3.0") || (packageType == "MIMARKS.specimen.human-skin.3.0") || (packageType == "MIMARKS.specimen.human-vaginal.3.0") || (packageType == "MIMARKS.specimen.plant-associated.3.0")) {  requiredFieldsForPackage.push_back("host");  }
661         else if ((packageType == "MIMARKS.specimen.microbial.3.0") || (packageType == "MIMARKS.specimen.sediment.3.0") || (packageType == "soil")) {   requiredFieldsForPackage.push_back("depth");  requiredFieldsForPackage.push_back("elev"); }
662         else if (packageType == "MIMARKS.specimen.water.3.0") {   requiredFieldsForPackage.push_back("depth");  }
663         else if ((packageType == "MIMARKS.specimen.miscellaneous.3.0") || (packageType == "wastewater")) { }
664         else {
665             m->mothurOut("[ERROR]: unknown package " + packageType + ", please correct.\n"); m->control_pressed = true; in.close(); return 0;
666         }
667         
668         if (!m->isSubset(headers, requiredFieldsForPackage)){
669             string requiredFields = "";
670             for (int i = 0; i < requiredFieldsForPackage.size()-1; i++) { requiredFields += requiredFieldsForPackage[i] + ", "; } requiredFields += requiredFieldsForPackage[requiredFieldsForPackage.size()-1];
671             m->mothurOut("[ERROR]: missing required fields for package, please correct. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
672         }
673         
674         if (m->debug) {  m->mothurOut("[DEBUG]: chooseAtLeastOneForPackage.size() = " + toString(chooseAtLeastOneForPackage.size()) + "\n");   }
675         
676         if (!m->inUsersGroups(chooseAtLeastOneForPackage, headers)){ //returns true if any of the choose at least ones are in headers
677             string requiredFields = "";
678             for (int i = 0; i < chooseAtLeastOneForPackage.size()-1; i++) { requiredFields += chooseAtLeastOneForPackage[i] + ", "; cout << chooseAtLeastOneForPackage[i] << endl; }
679             if (chooseAtLeastOneForPackage.size() < 1) { requiredFields += chooseAtLeastOneForPackage[chooseAtLeastOneForPackage.size()-1]; }
680             m->mothurOut("[ERROR]: missing a choose at least one fields for the package, please correct. These are marked with '**'. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
681         }
682         
683         map<string, bool> allNA;  for (int i = 1; i < headers.size(); i++) {  allNA[headers[i]] = true; }
684         while(!in.eof()) {
685             
686             if (m->control_pressed) { break; }
687             
688             temp = m->getline(in);  m->gobble(in);
689             
690             if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
691             
692             string original = temp;
693             vector<string> linePieces; m->splitAtChar(temp, linePieces, '\t');
694             m->removeBlanks(linePieces);
695             
696             if (linePieces.size() != headers.size()) { m->mothurOut("[ERROR]: line: " + original + " contains " + toString(linePieces.size()) + " columns, but you have " + toString(headers.size()) + " column headers, please correct.\n"); m->control_pressed = true; }
697             else {
698                 map<string, map<string, string> >:: iterator it = mimarks.find(linePieces[0]);
699                 
700                 if (it == mimarks.end()) {
701                     map<string, string> categories;
702                     //start after *sample_name
703                     for (int i = 1; i < headers.size(); i++) {
704                         categories[headers[i]] = linePieces[i];
705                         //check the users inputs for appropriate organisms
706                         if (headers[i] == "organism") {
707                             if (!m->inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism
708                                 organismError = true;
709                                 m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to metagenome. You can correct the issue and rerun the command, or NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i];
710                             }
711                             Group2Organism[linePieces[0]] = linePieces[i];
712                         }
713                         if (linePieces[i] != "NA") {  allNA[headers[i]] = false;     }
714                     }
715                     
716                     //does this sample already match an existing sample?
717                     bool isOkaySample = true;
718                     for (map<string, map<string, string> >:: iterator it2 = mimarks.begin(); it2 != mimarks.end(); it2++) {
719                         if (m->control_pressed) { break; }
720                         bool allSame = true;
721                         for (int i = 1; i < headers.size(); i++) {
722                             if ((it2->second)[headers[i]] != categories[headers[i]]) { allSame = false; }
723                         }
724                         if (allSame) { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sample to " + it2->first + ". It has all the same attributes in the MIMarks file. Samples must have distinguishing features to be uploaded to the NCBI library, please correct.\n"); m->control_pressed = true; isOkaySample = false; }
725                     }
726                     if (isOkaySample) { mimarks[linePieces[0]] = categories; }
727                 }else {
728                     m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sampleName. Sample names must be unique, please correct.\n"); m->control_pressed = true;
729                 }
730             }
731         }
732         in.close();
733         
734         //add in values for "scrap" group
735         map<string, string> categories;
736         //start after *sample_name
737         for (int i = 1; i < headers.size(); i++) {
738             categories[headers[i]] = "NA";
739             if (headers[i] == "organism")       { categories[headers[i]] = "metagenome"; }
740             if (headers[i] == "seq_methods")    { categories[headers[i]] = "these sequences were scrapped"; }
741             if (headers[i] == "title")          { categories[headers[i]] = "these sequences were scrapped"; }
742         }
743         mimarks["scrap"] = categories;
744         Group2Organism["scrap"] = "metagenome";
745         
746         if (organismError) {
747             string organismTypes = "";
748             for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; }
749             organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1];
750             m->mothurOut("[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n");
751         }
752         
753         return 0;
754     }
755         catch(exception& e) {
756                 m->errorOut(e, "SRACommand", "readMIMarksFile");
757                 exit(1);
758         }
759 }
760
761 //**********************************************************************************************************************
762 // going to have to rework this to allow for other options --
763 /*
764  file option 1
765  
766  sfffile1   oligosfile1
767  sfffile2   oligosfile2
768  ...
769  
770  file option 2
771  
772  fastqfile1 oligosfile1
773  fastqfile2 oligosfile2
774  ...
775  
776  file option 3
777  
778  fastqfile  fastqfile   group
779  fastqfile  fastqfile   group
780  fastqfile  fastqfile   group
781  ...
782  
783 */
784
785 int SRACommand::readFile(map<string, vector<string> >& files){
786         try {
787         //vector<string> theseFiles;
788         inputfile = file;
789         files.clear();
790         
791         ifstream in;
792         m->openInputFile(file, in);
793         
794         while(!in.eof()) {
795             
796             if (m->control_pressed) { return 0; }
797             
798             string line = m->getline(in);  m->gobble(in);
799             vector<string> pieces = m->splitWhiteSpace(line);
800             
801             string group = "";
802             string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
803             if (pieces.size() == 2) {
804                 thisFileName1 = pieces[0];
805                 thisFileName2 = pieces[1];
806             }else if (pieces.size() == 3) {
807                 thisFileName1 = pieces[1];
808                 thisFileName2 = pieces[2];
809                 string group = pieces[0];
810                 libLayout = "paired";
811             }else {
812                 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
813             }
814             
815             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2  + ".\n"); }
816             
817             //check to make sure both are able to be opened
818             ifstream in2;
819             int openForward = m->openInputFile(thisFileName1, in2, "noerror");
820             
821             //if you can't open it, try default location
822             if (openForward == 1) {
823                 if (m->getDefaultPath() != "") { //default path is set
824                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
825                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
826                     ifstream in3;
827                     openForward = m->openInputFile(tryPath, in3, "noerror");
828                     in3.close();
829                     thisFileName1 = tryPath;
830                 }
831             }
832             
833             //if you can't open it, try output location
834             if (openForward == 1) {
835                 if (m->getOutputDir() != "") { //default path is set
836                     string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
837                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
838                     ifstream in4;
839                     openForward = m->openInputFile(tryPath, in4, "noerror");
840                     thisFileName1 = tryPath;
841                     in4.close();
842                 }
843             }
844             
845             if (openForward == 1) { //can't find it
846                 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
847             }else{  in2.close();  }
848             
849             int openReverse = 1;
850             
851             ifstream in3;
852             openReverse = m->openInputFile(thisFileName2, in3, "noerror");
853             
854             //if you can't open it, try default location
855             if (openReverse == 1) {
856                 if (m->getDefaultPath() != "") { //default path is set
857                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
858                     m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
859                     ifstream in3;
860                     openReverse = m->openInputFile(tryPath, in3, "noerror");
861                     in3.close();
862                     thisFileName2 = tryPath;
863                 }
864             }
865             
866             //if you can't open it, try output location
867             if (openReverse == 1) {
868                 if (m->getOutputDir() != "") { //default path is set
869                     string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
870                     m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
871                     ifstream in4;
872                     openReverse = m->openInputFile(tryPath, in4, "noerror");
873                     thisFileName2 = tryPath;
874                     in4.close();
875                 }
876             }
877             
878             if (openReverse == 1) { //can't find it
879                 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
880             }else{  in3.close();  }
881            
882             
883             if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
884                 //process pair
885                 int pos = thisFileName1.find(".sff");
886                 if (pos != string::npos) {//these files are sff files
887                     isSFF = true;
888                     sfffile = thisFileName1; oligosfile = thisFileName2;
889                     if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
890                     readOligos();
891                     if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
892                     parseSffFile(files);
893                     if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + sfffile + "\n"); }
894                 }else{
895                     isSFF = false;
896                     fastqfile = thisFileName1; oligosfile = thisFileName2;
897                     if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
898                     readOligos();
899                     if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
900                     parseFastqFile(files);
901                     if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + fastqfile + "\n"); }
902                 }
903                 
904             }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
905                 map<string, vector<string> >::iterator it = files.find(group);
906                 if (it == files.end()) {
907                     vector<string> temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp;
908                 }else {
909                     files[group].push_back(thisFileName1 + " " + thisFileName2);
910                 }
911             }
912         }
913         in.close();
914     
915         inputfile = file;
916         
917         return 0;
918     }
919         catch(exception& e) {
920                 m->errorOut(e, "SRACommand", "readFile");
921                 exit(1);
922         }
923 }
924 //**********************************************************************************************************************
925 int SRACommand::parseSffFile(map<string, vector<string> >& files){
926         try {
927         vector<string> theseFiles;
928         inputfile = sfffile;
929         libLayout = "single"; //controlled vocab
930         
931         isSFF = true;
932         //run sffinfo to parse sff file into individual sampled sff files
933         string commandString = "sff=" + sfffile;
934         
935         commandString += ", oligos=" + oligosfile;
936         //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
937         if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
938         if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
939         if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
940         if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
941         if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
942         
943         m->mothurOutEndLine();
944         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
945         m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
946         m->mothurCalling = true;
947         
948         Command* sffinfoCommand = new SffInfoCommand(commandString);
949         sffinfoCommand->execute();
950         
951         map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
952         map<string, vector<string> >::iterator it = filenames.find("sff");
953         if (it != filenames.end()) { theseFiles = it->second; }
954         else { m->control_pressed = true; } // error in sffinfo
955         
956         delete sffinfoCommand;
957         m->mothurCalling = false;
958         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
959         
960         mapGroupToFile(files, theseFiles);
961         
962         return 0;
963     }
964         catch(exception& e) {
965                 m->errorOut(e, "SRACommand", "readFile");
966                 exit(1);
967         }
968 }
969
970 //**********************************************************************************************************************
971 int SRACommand::parseFastqFile(map<string, vector<string> >& files){
972         try {
973         vector<string> theseFiles;
974         inputfile = fastqfile;
975         libLayout = "single"; //controlled vocab
976         
977         //run sffinfo to parse sff file into individual sampled sff files
978         string commandString = "fastq=" + fastqfile;
979         
980         commandString += ", oligos=" + oligosfile;
981         //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
982         if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
983         if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
984         if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
985         if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
986         if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
987        
988         m->mothurOutEndLine();
989         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
990         m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
991         m->mothurCalling = true;
992         
993         Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
994         fastqinfoCommand->execute();
995         
996         map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
997         map<string, vector<string> >::iterator it = filenames.find("fastq");
998         if (it != filenames.end()) { theseFiles = it->second; }
999         else { m->control_pressed = true; } // error in sffinfo
1000         
1001         delete fastqinfoCommand;
1002         m->mothurCalling = false;
1003         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1004         
1005         mapGroupToFile(files, theseFiles);
1006         
1007         return 0;
1008     }
1009         catch(exception& e) {
1010                 m->errorOut(e, "SRACommand", "readFile");
1011                 exit(1);
1012         }
1013 }
1014 //***************************************************************************************************************
1015 //maps group to file
1016 int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
1017         try {
1018         
1019         for (int i = 0; i < Groups.size(); i++) {
1020             
1021             set<int> matches;
1022             for (int j = 0; j < theseFiles.size(); j++) {
1023                 int pos = theseFiles[j].find(Groups[i]);
1024                 if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
1025                     if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
1026                         matches.insert(i);
1027                     }
1028                 }
1029             }
1030             
1031             if(matches.size() == 1) {
1032                 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1033                 if (it == files.end()) {
1034                     vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
1035                 }else {
1036                     files[Groups[i]].push_back(theseFiles[*matches.begin()]);
1037                 }
1038             }
1039         }
1040         return 0;
1041     }
1042         catch(exception& e) {
1043                 m->errorOut(e, "SRACommand", "checkGroups");
1044                 exit(1);
1045         }
1046 }
1047
1048 //***************************************************************************************************************
1049 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
1050 int SRACommand::checkGroups(map<string, vector<string> >& files){
1051         try {
1052         vector<string> newGroups;
1053         for (int i = 0; i < Groups.size(); i++) {
1054             
1055             map<string, vector<string> >::iterator it = files.find(Groups[i]);
1056              //no files for this group, remove it
1057             if (it == files.end()) { }
1058             else { newGroups.push_back(Groups[i]); }
1059         }
1060         
1061         Groups = newGroups;
1062         
1063         return 0;
1064     }
1065         catch(exception& e) {
1066                 m->errorOut(e, "SRACommand", "checkGroups");
1067                 exit(1);
1068         }
1069 }
1070 //***************************************************************************************************************
1071 int SRACommand::readOligos(){
1072         try {
1073                 ifstream inOligos;
1074                 m->openInputFile(oligosfile, inOligos);
1075                 
1076                 string type, oligo, roligo, group;
1077         bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
1078         map<int, oligosPair> pairedBarcodes;
1079         map<int, oligosPair> pairedPrimers;
1080         map<string, int> barcodes;
1081         map<string, int> primers;
1082         vector<string>  linker;
1083         vector<string>  spacer, revPrimer;
1084                 int indexPrimer = 0;
1085                 int indexBarcode = 0;
1086         int indexPairedPrimer = 0;
1087                 int indexPairedBarcode = 0;
1088         set<string> uniquePrimers;
1089         set<string> uniqueBarcodes;
1090                 
1091                 while(!inOligos.eof()){
1092             
1093                         inOligos >> type;
1094             
1095                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1096             
1097                         if(type[0] == '#'){
1098                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1099                                 m->gobble(inOligos);
1100                         }
1101                         else{
1102                                 m->gobble(inOligos);
1103                                 //make type case insensitive
1104                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1105                                 
1106                                 inOligos >> oligo;
1107                 
1108                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1109                                 
1110                                 for(int i=0;i<oligo.length();i++){
1111                                         oligo[i] = toupper(oligo[i]);
1112                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1113                                 }
1114                                 
1115                                 if(type == "FORWARD"){
1116                                         group = "";
1117                                         
1118                                         // get rest of line in case there is a primer name
1119                                         while (!inOligos.eof()) {
1120                                                 char c = inOligos.get();
1121                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1122                                                 else if (c == 32 || c == 9){;} //space or tab
1123                                                 else {  group += c;  }
1124                                         }
1125                                         
1126                                         //check for repeat barcodes
1127                                         map<string, int>::iterator itPrime = primers.find(oligo);
1128                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1129                                         
1130                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1131                     
1132                                         primers[oligo] = indexPrimer; indexPrimer++;
1133                                         primerNameVector.push_back(group);
1134                                 }
1135                 else if (type == "PRIMER"){
1136                     m->gobble(inOligos);
1137                                         
1138                     inOligos >> roligo;
1139                     
1140                     for(int i=0;i<roligo.length();i++){
1141                         roligo[i] = toupper(roligo[i]);
1142                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1143                     }
1144                     roligo = reverseOligo(roligo);
1145                     
1146                     group = "";
1147                     
1148                                         // get rest of line in case there is a primer name
1149                                         while (!inOligos.eof()) {
1150                                                 char c = inOligos.get();
1151                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1152                                                 else if (c == 32 || c == 9){;} //space or tab
1153                                                 else {  group += c;  }
1154                                         }
1155                     
1156                     oligosPair newPrimer(oligo, roligo);
1157                     
1158                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1159                                         
1160                                         //check for repeat barcodes
1161                     string tempPair = oligo+roligo;
1162                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1163                     else { uniquePrimers.insert(tempPair); }
1164                                         
1165                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1166                     
1167                                         pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1168                                         primerNameVector.push_back(group);
1169                     hasPrimer = true;
1170                 }
1171                                 else if(type == "REVERSE"){
1172                                         //Sequence oligoRC("reverse", oligo);
1173                                         //oligoRC.reverseComplement();
1174                     string oligoRC = reverseOligo(oligo);
1175                                         revPrimer.push_back(oligoRC);
1176                                 }
1177                                 else if(type == "BARCODE"){
1178                                         inOligos >> group;
1179                     
1180                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1181                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1182                     
1183                     string temp = "";
1184                     while (!inOligos.eof())     {
1185                                                 char c = inOligos.get();
1186                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1187                                                 else if (c == 32 || c == 9){;} //space or tab
1188                                                 else {  temp += c;  }
1189                                         }
1190                                         
1191                     //then this is illumina data with 4 columns
1192                     if (temp != "") {
1193                         hasPairedBarcodes = true;
1194                         string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1195                         group = temp;
1196                         
1197                         for(int i=0;i<reverseBarcode.length();i++){
1198                             reverseBarcode[i] = toupper(reverseBarcode[i]);
1199                             if(reverseBarcode[i] == 'U')        {       reverseBarcode[i] = 'T';        }
1200                         }
1201                         
1202                         reverseBarcode = reverseOligo(reverseBarcode);
1203                         oligosPair newPair(oligo, reverseBarcode);
1204                         
1205                         if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1206                         //check for repeat barcodes
1207                         string tempPair = oligo+reverseBarcode;
1208                         if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
1209                         else { uniqueBarcodes.insert(tempPair); }
1210                         
1211                         pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1212                         barcodeNameVector.push_back(group);
1213                     }else {
1214                         //check for repeat barcodes
1215                         map<string, int>::iterator itBar = barcodes.find(oligo);
1216                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1217                         
1218                         barcodes[oligo]=indexBarcode; indexBarcode++;
1219                         barcodeNameVector.push_back(group);
1220                     }
1221                                 }else if(type == "LINKER"){
1222                                         linker.push_back(oligo);
1223                                 }else if(type == "SPACER"){
1224                                         spacer.push_back(oligo);
1225                                 }
1226                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1227                         }
1228                         m->gobble(inOligos);
1229                 }
1230                 inOligos.close();
1231                 
1232         if (hasPairedBarcodes || hasPrimer) {
1233             pairedOligos = true;
1234             if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
1235         }
1236                 
1237         
1238                 //add in potential combos
1239                 if(barcodeNameVector.size() == 0){
1240                         barcodeNameVector.push_back("");
1241                 }
1242                 
1243                 if(primerNameVector.size() == 0){
1244                         primerNameVector.push_back("");
1245                 }
1246         
1247         set<string> uniqueNames; //used to cleanup outputFileNames
1248         if (pairedOligos) {
1249             for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1250                 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1251                     
1252                     string primerName = primerNameVector[itPrimer->first];
1253                     string barcodeName = barcodeNameVector[itBar->first];
1254                     
1255                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1256                     else {
1257                         string comboGroupName = "";
1258                         string fastqFileName = "";
1259                         
1260                         if(primerName == ""){
1261                             comboGroupName = barcodeNameVector[itBar->first];
1262                         }
1263                         else{
1264                             if(barcodeName == ""){
1265                                 comboGroupName = primerNameVector[itPrimer->first];
1266                             }
1267                             else{
1268                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1269                             }
1270                         }
1271                         uniqueNames.insert(comboGroupName);
1272                         Group2Barcode[comboGroupName] = (itBar->second).forward+"."+(itBar->second).reverse;
1273                         Group2Primer[comboGroupName] = (itPrimer->second).forward+"."+(itPrimer->second).reverse;
1274                     }
1275                 }
1276             }
1277         }else {
1278             for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1279                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1280                     
1281                     string primerName = primerNameVector[itPrimer->second];
1282                     string barcodeName = barcodeNameVector[itBar->second];
1283                     
1284                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1285                     else {
1286                         string comboGroupName = "";
1287                         string fastqFileName = "";
1288                         
1289                         if(primerName == ""){
1290                             comboGroupName = barcodeNameVector[itBar->second];
1291                         }
1292                         else{
1293                             if(barcodeName == ""){
1294                                 comboGroupName = primerNameVector[itPrimer->second];
1295                             }
1296                             else{
1297                                 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1298                             }
1299                         }
1300                         uniqueNames.insert(comboGroupName);
1301                         Group2Barcode[comboGroupName] = itBar->first;
1302                         Group2Primer[comboGroupName] = itPrimer->first;
1303                     }
1304                 }
1305             }
1306         }
1307
1308                
1309         if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
1310         
1311         for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {  Groups.push_back(*it); }
1312         
1313                 return true;
1314                 
1315         }
1316         catch(exception& e) {
1317                 m->errorOut(e, "SRACommand", "readOligos");
1318                 exit(1);
1319         }
1320 }
1321 //********************************************************************/
1322 string SRACommand::reverseOligo(string oligo){
1323         try {
1324         string reverse = "";
1325         
1326         for(int i=oligo.length()-1;i>=0;i--){
1327             
1328             if(oligo[i] == 'A')         {       reverse += 'T'; }
1329             else if(oligo[i] == 'T'){   reverse += 'A'; }
1330             else if(oligo[i] == 'U'){   reverse += 'A'; }
1331             
1332             else if(oligo[i] == 'G'){   reverse += 'C'; }
1333             else if(oligo[i] == 'C'){   reverse += 'G'; }
1334             
1335             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1336             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1337             
1338             else if(oligo[i] == 'M'){   reverse += 'K'; }
1339             else if(oligo[i] == 'K'){   reverse += 'M'; }
1340             
1341             else if(oligo[i] == 'W'){   reverse += 'W'; }
1342             else if(oligo[i] == 'S'){   reverse += 'S'; }
1343             
1344             else if(oligo[i] == 'B'){   reverse += 'V'; }
1345             else if(oligo[i] == 'V'){   reverse += 'B'; }
1346             
1347             else if(oligo[i] == 'D'){   reverse += 'H'; }
1348             else if(oligo[i] == 'H'){   reverse += 'D'; }
1349             
1350             else                                                {       reverse += 'N'; }
1351         }
1352         
1353         
1354         return reverse;
1355     }
1356         catch(exception& e) {
1357                 m->errorOut(e, "SRACommand", "reverseOligo");
1358                 exit(1);
1359         }
1360 }
1361 //********************************************************************/
1362 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1363 bool SRACommand::checkCasesPlatforms(string& platform){
1364         try {
1365         string original = platform;
1366         bool isOkay = true;
1367         
1368         //remove users possible case errors
1369         for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
1370         
1371         //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1372         
1373             if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
1374             else { isOkay = false; }
1375         
1376             if (isOkay) {
1377                 if (platform == "454")   {  platform = "_LS454"; }
1378             }else {
1379                 m->mothurOut("[ERROR]: " + original + " is not a valid platform option.  Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
1380             }
1381             
1382             return isOkay;
1383     }
1384         catch(exception& e) {
1385                 m->errorOut(e, "SRACommand", "checkCasesPlatforms");
1386                 exit(1);
1387         }
1388 }
1389 //********************************************************************/
1390 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
1391 bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
1392         try {
1393         string original = instrumentModel;
1394         bool isOkay = true;
1395         
1396         //remove users possible case errors
1397         for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
1398         
1399         //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1400         if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
1401             if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
1402             else { isOkay = false; }
1403             if (isOkay) {
1404                 if (instrumentModel == "454_GS_FLX_TITANIUM")   {  instrumentModel = "454_GS_FLX_Titanium"; }
1405                 if (instrumentModel == "454_GS_JUNIOR")         {  instrumentModel = "454_GS_Junior";       }
1406                 if (instrumentModel == "UNSPECIFIED")           {  instrumentModel = "unspecified";         }
1407             }else {
1408                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
1409             }
1410             
1411         }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
1412             if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
1413             else { isOkay = false; }
1414             
1415             if (isOkay) {
1416                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER")          {  instrumentModel = "Illumina_Genome_Analyzer";        }
1417                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II")       {  instrumentModel = "Illumina_Genome_Analyzer_II";     }
1418                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX")      {  instrumentModel = "Illumina_Genome_Analyzer_IIx";    }
1419                 if (instrumentModel == "ILLUMINA_HISEQ_2000")               {  instrumentModel = "Illumina_HiSeq_2000";             }
1420                 if (instrumentModel == "ILLUMINA_HISEQ_1000")               {  instrumentModel = "Illumina_HiSeq_1000";             }
1421                 if (instrumentModel == "ILLUMINA_MISEQ")                    {  instrumentModel = "Illumina_MiSeq";                  }
1422                 if (instrumentModel == "UNSPECIFIED")                       {  instrumentModel = "unspecified";                     }
1423             }else {
1424                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
1425             }
1426             
1427         }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
1428             if ((instrumentModel == "ION_TORRENT_PGM")  || (instrumentModel == "UNSPECIFIED")) { }
1429             else { isOkay = false; }
1430             
1431             if (isOkay) {
1432                 if (instrumentModel == "ION_TORRENT_PGM")          {  instrumentModel = "Ion_Torrent_PGM";        }
1433                 if (instrumentModel == "UNSPECIFIED")              {  instrumentModel = "unspecified";            }
1434             }else {
1435                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
1436             }
1437         }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
1438             if ((instrumentModel == "PACBIO_RS")  || (instrumentModel == "UNSPECIFIED")) { }
1439             else { isOkay = false; }
1440             
1441             if (isOkay) {
1442                 if (instrumentModel == "PACBIO_RS")          {  instrumentModel = "PacBio_RS";        }
1443                 if (instrumentModel == "UNSPECIFIED")        {  instrumentModel = "unspecified";      }
1444             }else {
1445                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
1446             }
1447         }
1448         return isOkay;
1449     }
1450         catch(exception& e) {
1451                 m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
1452                 exit(1);
1453         }
1454 }
1455 //**********************************************************************************************************************
1456 //AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER
1457 bool SRACommand::checkCasesLibStrategy(string& libStrategy){
1458         try {
1459         string original = libStrategy;
1460         bool isOkay = true;
1461         
1462         //remove users possible case errors
1463         for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); }
1464         
1465         if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { }
1466         else { isOkay = false; }
1467         
1468         if (isOkay) {
1469             if (libStrategy == "RNA-SEQ")                   {  libStrategy = "RNA-Seq";                 }
1470             if (libStrategy == "MIRNA-SEQ")                 {  libStrategy = "miRNA-Seq";               }
1471             if (libStrategy == "CHIP-SEQ")                  {  libStrategy = "ChIP-Seq";                }
1472             if (libStrategy == "MNASE-SEQ")                 {  libStrategy = "MNase-Seq";               }
1473             if (libStrategy == "DNASE-HYPERSENSITIVITY")    {  libStrategy = "DNase-Hypersensitivity";  }
1474             if (libStrategy == "BISULFITE-SEQ")             {  libStrategy = "Bisulfite-Seq";           }
1475             if (libStrategy == "TN-SEQ")                    {  libStrategy = "Tn-Seq";                  }
1476             if (libStrategy == "FL-CDNA")                   {  libStrategy = "FL-cDNA";                 }
1477             if (libStrategy == "MRE-SEQ")                   {  libStrategy = "MRE-Seq";                 }
1478             if (libStrategy == "MEDIP-SEQ")                 {  libStrategy = "MeDIP-Seq";               }
1479             }else {
1480             m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option.  Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true;
1481         }
1482         
1483         return isOkay;
1484     }
1485         catch(exception& e) {
1486                 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1487                 exit(1);
1488         }
1489 }
1490
1491 //**********************************************************************************************************************
1492 //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
1493 bool SRACommand::checkCasesLibSource(string& libSource){
1494         try {
1495         string original = libSource;
1496         bool isOkay = true;
1497         
1498         //remove users possible case errors
1499         for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); }
1500         
1501         if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { }
1502         else { isOkay = false; }
1503         
1504         if (isOkay) {
1505             
1506         }else {
1507             m->mothurOut("[ERROR]: " + original + " is not a valid libsource option.  Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true;
1508         }
1509         
1510         return isOkay;
1511     }
1512         catch(exception& e) {
1513                 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1514                 exit(1);
1515         }
1516 }
1517
1518 //**********************************************************************************************************************
1519 //PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified
1520 bool SRACommand::checkCasesLibSelection(string& libSelection){
1521         try {
1522         string original = libSelection;
1523         bool isOkay = true;
1524         
1525         //remove users possible case errors
1526         for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); }
1527         
1528         if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { }
1529         else { isOkay = false; }
1530         
1531         if (isOkay) {
1532             if (libSelection == "CDNA")                                         {  libSelection = "cDNA";                                       }
1533             if (libSelection == "CHIP")                                         {  libSelection = "ChIP";                                       }
1534             if (libSelection == "MNASE")                                        {  libSelection = "MNase";                                      }
1535             if (libSelection == "DNASE")                                        {  libSelection = "DNAse";                                      }
1536             if (libSelection == "HYBRID_SELECTION")                             {  libSelection = "Hybrid_Selection";                           }
1537             if (libSelection == "REDUCED_REPRESENTATION")                       {  libSelection = "Reduced_Representation";                     }
1538             if (libSelection == "RESTRICTION_DIGEST")                           {  libSelection = "Restriction_Digest";                         }
1539             if (libSelection == "5-METHYLCYTIDINE_ANTIBODY")                    {  libSelection = "5-methylcytidine_antibody";                  }
1540             if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN")       {  libSelection = "MBD2_protein_methyl-CpG_binding_domain";     }
1541             if (libSelection == "SIZE_FRACTIONATION")                           {  libSelection = "size_fractionation";                         }
1542             if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD")                {  libSelection = "Padlock_probes_capture_method";              }
1543             if (libSelection == "OTHER")                                        {  libSelection = "other";                                      }
1544             if (libSelection == "UNSPECIFIED")                                  {  libSelection = "unspecified";                                }
1545             
1546         }else {
1547             m->mothurOut("[ERROR]: " + original + " is not a valid libselection option.  Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true;
1548         }
1549         
1550         return isOkay;
1551     }
1552         catch(exception& e) {
1553                 m->errorOut(e, "SRACommand", "checkCasesLibSelection");
1554                 exit(1);
1555         }
1556 }
1557 //**********************************************************************************************************************
1558 //METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER
1559 bool SRACommand::checkCasesDataType(string& dataType){
1560         try {
1561         string original = dataType;
1562         bool isOkay = true;
1563         
1564         //remove users possible case errors
1565         for (int i = 0; i < dataType.size(); i++) { dataType[i] = toupper(dataType[i]); }
1566         
1567         if ((dataType == "METAGENOME") || (dataType == "GENOME_SEQUENCING") || (dataType == "METAGENOMIC_ASSEMBLY") || (dataType == "ASSEMBLY") || (dataType == "TRANSCRIPTOME") || (dataType == "PROTEOMIC") || (dataType == "MAP") || (dataType == "CLONE_ENDS") || (dataType == "TARGETED_LOCI") || (dataType == "RANDOM_SURVEY") || (dataType == "EXOME") || (dataType == "VARIATION") || (dataType == "EPIGENOMICS") || (dataType == "PHENOTYPE") || (dataType == "GENOTYPE") || (dataType == "OTHER")) { }
1568         else { isOkay = false; }
1569         
1570         if (isOkay) {
1571             
1572         }else {
1573             m->mothurOut("[ERROR]: " + original + " is not a valid datatype option.  Valid datatype options are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER."); m->mothurOutEndLine(); abort = true;
1574         }
1575         
1576         return isOkay;
1577     }
1578         catch(exception& e) {
1579                 m->errorOut(e, "SRACommand", "checkCasesDataType");
1580                 exit(1);
1581         }
1582 }
1583 //**********************************************************************************************************************
1584 bool SRACommand::sanityCheckMiMarksGroups(){
1585         try {
1586         bool isOkay = true;
1587         
1588         for (int i = 0; i < Groups.size(); i++) {
1589             if (m->control_pressed) { break; }
1590             
1591             map<string, map<string, string> >::iterator it = mimarks.find(Groups[i]);
1592             if (it == mimarks.end()) {
1593                 isOkay = false;
1594                 m->mothurOut("[ERROR]: MIMarks file is missing group " + Groups[i] + ", please correct.\n");
1595             }
1596         }
1597         
1598         if (!isOkay) { m->control_pressed = true; }
1599         
1600         return isOkay;
1601     }
1602         catch(exception& e) {
1603                 m->errorOut(e, "SRACommand", "sanityCheckMiMarksGroups");
1604                 exit(1);
1605         }
1606 }
1607
1608 //**********************************************************************************************************************