]> git.donarmstrong.com Git - mothur.git/blob - sracommand.cpp
cde795585c49a71e758c7fe331f02f9aac3e3964
[mothur.git] / sracommand.cpp
1 //
2 //  sracommand.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 10/28/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "sracommand.h"
10 #include "sffinfocommand.h"
11 #include "parsefastaqcommand.h"
12
13 //**********************************************************************************************************************
14 vector<string> SRACommand::setParameters(){
15         try {
16         CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
17         CommandParameter poligos("oligos", "InputTypes", "", "", "oligos", "none", "none","",false,false,true); parameters.push_back(poligos);
18         CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
19                 CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
20         CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
21         CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
22         //choose only one multiple options
23         CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
24         CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
25         CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
26         CommandParameter pdatatype("datatype", "String", "METAGENOME", "", "", "", "","",false,false); parameters.push_back(pdatatype);
27         CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
28         CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
29         CommandParameter porientation("orientation", "Multiple", "forward-reverse", "forward", "", "", "","",false,false); parameters.push_back(porientation);
30         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
31                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
32         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
33                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
34         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
35         
36          //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
37                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
38                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
39                 
40                 vector<string> myArray;
41                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
42                 return myArray;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "SRACommand", "setParameters");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string SRACommand::getHelpString(){
51         try {
52                 string helpString = "";
53                 helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
54                 helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
55         helpString += "The sff parameter is used to provide the original sff file.\n";
56                 helpString += "The fastq parameter is used to provide the original fastq file.\n";
57         helpString += "The project parameter is used to provide your project file.\n";
58         helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by. It is required and must contain barcodes and primers, or you must provide a file option. \n";
59         helpString += "The mimark parameter is used to provide your mimarks file.  You can create the template for this file using the get.mimarkspackage command.\n";
60                 helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
61         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
62                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
63                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
64         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
65                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
66         helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
67         helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
68         helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
69         helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated.  \n";
70         helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
71         helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
72         helpString += "The datatype parameter is used to specify datatype. Default=METAGENOME. Choices are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
73                 helpString += "sra(sff=sff=GHL4YHV01.sff, GHL4YHV01.oligos, project=test.project, mimark=MIMarksData.txt)\n";
74                 return helpString;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "SRACommand", "getHelpString");
78                 exit(1);
79         }
80 }
81 //**********************************************************************************************************************
82 string SRACommand::getOutputPattern(string type) {
83     try {
84         string pattern = "";
85         
86         if (type == "xml") {  pattern = "[filename],xml"; }
87         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
88         
89         return pattern;
90     }
91     catch(exception& e) {
92         m->errorOut(e, "SRACommand", "getOutputPattern");
93         exit(1);
94     }
95 }
96 //**********************************************************************************************************************
97 SRACommand::SRACommand(){
98         try {
99                 abort = true; calledHelp = true;
100                 setParameters();
101         vector<string> tempOutNames;
102                 outputTypes["xml"] = tempOutNames;
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "SRACommand", "SRACommand");
106                 exit(1);
107         }
108 }
109 //**********************************************************************************************************************
110 SRACommand::SRACommand(string option)  {
111         try {
112                 abort = false; calledHelp = false;
113         libLayout = "single"; //controlled vocab
114                 
115                 //allow user to run help
116                 if(option == "help") { help(); abort = true; calledHelp = true; }
117                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
118                 
119                 else {
120                         //valid paramters for this command
121                         vector<string> myArray = setParameters();
122                         
123                         OptionParser parser(option);
124                         map<string,string> parameters = parser.getParameters();
125                         
126                         ValidParameters validParameter;
127                         map<string,string>::iterator it;
128                         //check to make sure all parameters are valid for command
129                         for (it = parameters.begin(); it != parameters.end(); it++) {
130                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
131                         }
132                         
133             vector<string> tempOutNames;
134             outputTypes["xml"] = tempOutNames;
135                         
136                         //if the user changes the input directory command factory will send this info to us in the output parameter
137                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
138                         if (inputDir == "not found"){   inputDir = "";          }
139                         else {
140             
141                 string path;
142                                 it = parameters.find("sff");
143                                 //user has given a template file
144                                 if(it != parameters.end()){
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["sff"] = inputDir + it->second;              }
148                                 }
149                                 
150                                 it = parameters.find("fastq");
151                                 //user has given a template file
152                                 if(it != parameters.end()){
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
156                                 }
157                 
158                 it = parameters.find("file");
159                                 //user has given a template file
160                                 if(it != parameters.end()){
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
164                                 }
165                 
166                 it = parameters.find("oligos");
167                                 //user has given a template file
168                                 if(it != parameters.end()){
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
172                                 }
173                 
174                 it = parameters.find("project");
175                                 //user has given a template file
176                                 if(it != parameters.end()){
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["project"] = inputDir + it->second;          }
180                                 }
181                 
182                 it = parameters.find("mimark");
183                                 //user has given a template file
184                                 if(it != parameters.end()){
185                                         path = m->hasPath(it->second);
186                                         //if the user has not given a path then, add inputdir. else leave path alone.
187                                         if (path == "") {       parameters["mimark"] = inputDir + it->second;           }
188                                 }
189             }
190             
191                         //check for parameters
192             fastqfile = validParameter.validFile(parameters, "fastq", true);
193                         if (fastqfile == "not open") { fastqfile = "";  abort = true; }
194                         else if (fastqfile == "not found") { fastqfile = ""; }
195                         
196                         sfffile = validParameter.validFile(parameters, "sff", true);
197                         if (sfffile == "not open") {  sfffile = "";  abort = true; }
198                         else if (sfffile == "not found") { sfffile = ""; }
199             
200             file = validParameter.validFile(parameters, "file", true);
201                         if (file == "not open") {  file = "";  abort = true; }
202                         else if (file == "not found") { file = ""; }
203             
204             oligosfile = validParameter.validFile(parameters, "oligos", true);
205                         if (oligosfile == "not found")      {  oligosfile = "";     }
206             else if(oligosfile == "not open")   {       abort = true;           }
207                         else {  m->setOligosFile(oligosfile); }
208             
209             contactfile = validParameter.validFile(parameters, "project", true);
210                         if (contactfile == "not found")      {  contactfile = ""; m->mothurOut("[ERROR]: You must provide a project file before you can use the sra command."); m->mothurOutEndLine(); abort = true;    }
211                         else if(contactfile == "not open")      {       abort = true;           }
212             
213             mimarksfile = validParameter.validFile(parameters, "mimark", true);
214                         if (mimarksfile == "not found")      {  mimarksfile = ""; m->mothurOut("[ERROR]: You must provide a mimark file before you can use the sra command. You can create a template for this file using the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true;       }
215                         else if(mimarksfile == "not open")      {       abort = true;           }
216             
217             file = validParameter.validFile(parameters, "file", true);
218                         if (file == "not open") {  file = "";  abort = true; }
219                         else if (file == "not found") { file = ""; }
220                         
221             if ((file == "") && (oligosfile == "")) {
222                 m->mothurOut("[ERROR]: You must provide an oligos file or file with oligos files in them before you can use the sra command."); m->mothurOutEndLine(); abort = true;
223             }
224             
225                         if ((fastqfile == "") && (file == "") && (sfffile == "")) {
226                 m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
227             }
228             
229             //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
230                         platform = validParameter.validFile(parameters, "platform", false);         if (platform == "not found") { platform = "_LS454"; }
231                         if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
232                                  
233             if (!abort) { //don't check instrument model is platform is bad
234                 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
235                 instrumentModel = validParameter.validFile(parameters, "instrument", false);         if (instrumentModel == "not found") { instrumentModel = "454_GS"; }
236                 if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function
237             }
238             //turn _ to spaces mothur's work around
239             for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } }
240             
241             libStrategy = validParameter.validFile(parameters, "libstrategy", false);         if (libStrategy == "not found") { libStrategy = "AMPLICON"; }
242             if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function
243
244             //turn _ to spaces mothur's work around
245             for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; }  }
246             
247             libSource = validParameter.validFile(parameters, "libsource", false);         if (libSource == "not found") { libSource = "METAGENOMIC"; }
248             if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function
249             
250             //turn _ to spaces mothur's work around
251             for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; }  }
252             
253             libSelection = validParameter.validFile(parameters, "libselection", false);         if (libSelection == "not found") { libSelection = "PCR"; }
254             if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function
255             
256             //turn _ to spaces mothur's work around
257             for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; }  }
258             
259             dataType = validParameter.validFile(parameters, "datatype", false);         if (dataType == "not found") { dataType = "METAGENOME"; }
260             if (!checkCasesDataType(dataType)) { abort = true; } //error message in function
261             
262             //turn _ to spaces mothur's work around
263             for (int i = 0; i < dataType.length(); i++) { if (dataType[i] == '_') { dataType[i] = ' '; }  }
264             
265             orientation = validParameter.validFile(parameters, "orientation", false);         if (orientation == "not found") { orientation = "forward"; }
266             
267             if ((orientation == "forward") || (orientation == "reverse")) {  }
268             else {  m->mothurOut("[ERROR]: " + orientation + " is not a valid orientation option. Choices are: forward and reverse.\n"); m->mothurOutEndLine(); abort = true; }
269
270             
271             string temp = validParameter.validFile(parameters, "bdiffs", false);                if (temp == "not found"){       temp = "0";             }
272                         m->mothurConvert(temp, bdiffs);
273                         
274                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
275                         m->mothurConvert(temp, pdiffs);
276                         
277             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
278                         m->mothurConvert(temp, ldiffs);
279             
280             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
281                         m->mothurConvert(temp, sdiffs);
282                         
283                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
284                         m->mothurConvert(temp, tdiffs);
285                         
286                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
287                                 
288                 }
289                 
290         }
291         catch(exception& e) {
292                 m->errorOut(e, "SRACommand", "SRACommand");
293                 exit(1);
294         }
295 }
296 //**********************************************************************************************************************
297 int SRACommand::execute(){
298         try {
299                 
300                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
301         
302         readContactFile();
303         readMIMarksFile();
304         if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
305         
306         if (m->control_pressed) { return 0; }
307         
308         //parse files
309         map<string, vector<string> > filesBySample;
310         isSFF = false;
311         
312         if (file != "")             {       readFile(filesBySample);        }
313         else if (sfffile != "")     {       parseSffFile(filesBySample);    }
314         else if (fastqfile != "")   {       parseFastqFile(filesBySample);  }
315         
316         for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {  Groups.push_back(*it); }
317         
318         sanityCheckMiMarksGroups();
319         
320         //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
321         checkGroups(filesBySample);
322         
323         //create xml file
324         string thisOutputDir = outputDir;
325         if (outputDir == "") {  thisOutputDir += m->hasPath(inputfile);  }
326                 map<string, string> variables;
327         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile));
328         string outputFileName = getOutputFileName("xml", variables);
329         outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName);
330         ofstream out;
331         m->openOutputFile(outputFileName, out);
332         
333         //contacts portion
334         ////////////////////////////////////////////////////////
335         out << "<Submission>\n";
336         out << "\t<Description>\n";
337         out << "\t\t<Comment> New Submission. Generated by mothur version " + m->getVersion() + " </Comment> \n";
338         out << "\t\t<Submitter user_name=\"" + submissionName + "\"/>\n";
339         out << "\t\t<Organization type=\"" + centerType + "\">\n";
340         out << "\t\t<Name>" + centerName + "</Name>\n";
341         out << "\t\t<Contact> email=\"" + email + "\">\n";
342         out << "\t\t\t<Name>\n";
343         out << "\t\t\t\t<First>" + firstName + "</First>\n";
344         out << "\t\t\t\t<Last>" + firstName + "</Last>\n";
345         out << "\t\t\t</Name>\n";
346         out << "\t\t</Contact>\n";
347         out << "\t\t</Organization>\n";
348         out << "\t</Description>\n";
349         ////////////////////////////////////////////////////////
350         
351         //bioproject
352         ////////////////////////////////////////////////////////
353         out << "\t<Action>\n";
354         out << "\t\t<AddData target_db=\"BioProject\">\n";
355         out << "\t\t\t<Data content_type=\"XML\">\n";
356         out << "\t\t\t\t<XmlContent>\n";
357         out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
358         out << "\t\t\t\t\t\t<ProjectID>\n";
359         out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
360         out << "\t\t\t\t\t\t</ProjectID>\n";
361         out << "\t\t\t\t\t\t<Descriptor>\n";
362         out << "\t\t\t\t\t\t\t<Title>" + projectTitle + " </Title> \n";
363         out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
364         if (website != "") {
365             out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
366             out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
367             out << "\t\t\t\t\t\t\t</ExternalLink>\n";
368         }
369         out << "\t\t\t\t\t\t</Descriptor>\n";
370         out << "\t\t\t\t\t\t<ProjectType>\n";
371         out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eEnvironment\">\n";
372         out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
373         out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n";
374         out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
375         out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
376         out << "\t\t\t\t\t\t</ProjectType>\n";
377         out << "\t\t\t\t\t</Project>\n";
378         out << "\t\t\t\t</XmlContent>\n";
379         out << "\t\t\t</Data>\n";
380         out << "\t\t\t<Identifier>\n";
381         out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
382         out << "\t\t\t</Identifier>\n";
383         out << "\t\t</AddData>\n";
384         out << "\t</Action>\n";
385         ////////////////////////////////////////////////////////
386         
387         //bioSample
388         ////////////////////////////////////////////////////////
389         for (int i = 0; i < Groups.size(); i++) {
390             
391             string barcodeForThisSample = Group2Barcode[Groups[i]][0];
392             
393             if (m->control_pressed) { break; }
394             out << "\t<Action>\n";
395             out << "\t\t<AddData target_db=\"BioSample\">\n";
396             out << "\t\t\t<Data content_type=\"XML\">\n";
397             out << "\t\t\t\t<XmlContent>\n";
398             out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
399             out << "\t\t\t\t\t\t<SampleId>\n";
400             out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID> \n";
401             out << "\t\t\t\t\t\t</SampleId>\n";
402             out << "\t\t\t\t\t\t<Organism>\n";
403             string organismName = "metagenome";
404             map<string, string>::iterator itOrganism = Group2Organism.find(Groups[i]);
405             if (itOrganism != Group2Organism.end()) { organismName = itOrganism->second; } //user supplied acceptable organism, so use it.
406             out << "\t\t\t\t\t\t\t<OrganismName>" + organismName + " </OrganismName> \n";
407             out << "\t\t\t\t\t\t</Organism>\n";
408             out << "\t\t\t\t\t\t<Package>" + packageType + "</Package>n";
409             out << "\t\t\t\t\t\t<Attributes>n";
410             //add biosample required attributes
411             map<string, map<string, string> >:: iterator it = mimarks.find(Groups[i]);
412             if (it != mimarks.end()) {
413                 map<string, string> categories = it->second;
414                 for (map<string, string>:: iterator it2 = categories.begin(); it2 != categories.end(); it2++) {
415                     if (m->control_pressed) { break; }
416                     out << "\t\t\t\t\t\t\t<Attribute attribute_name=\"" + it2->first + "\">\"" + it2->second + "\"</Attribute>\n";
417                 }
418             }
419             out << "\t\t\t\t\t\t</Attributes>n";
420             out << "\t\t\t\t\t</BioSample>\n";
421             out << "\t\t\t\t</XmlContent>\n";
422             out << "\t\t\t</Data>\n";
423             out << "\t\t\t<Identifier>\n";
424             out << "\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
425             out << "\t\t\t</Identifier>\n";
426             out << "\t\t</AddData>\n";
427             out << "\t</Action>\n";
428         }
429         
430         //File objects
431         ////////////////////////////////////////////////////////
432         for (int i = 0; i < Groups.size(); i++) {
433             
434             vector<string> thisGroupsFiles = filesBySample[Groups[i]];
435             string barcodeForThisSample = Group2Barcode[Groups[i]][0];
436             
437             for (int j = 0; j < thisGroupsFiles.size(); j++) {
438                 string libId = thisGroupsFiles[j] + "." + barcodeForThisSample;
439                 
440                 if (m->control_pressed) { break; }
441                 out << "\t<Action>\n";
442                 out << "\t\t<AddFiles target_db=\"SRA\">\n";
443                 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
444                     vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
445                     libId = pieces[0] + barcodeForThisSample;
446                     out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
447                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
448                     out << "\t\t\t</File>\n";
449                     vector<string> thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]][0], thisBarcodes, '.');
450                     string forwardBarcode = thisBarcodes[0];
451                     string reverseBarcode = thisBarcodes[1];
452                     vector<string> thisPrimers; m->splitAtChar(Group2Primer[Groups[i]][0], thisPrimers, '.');
453                     string forwardPrimer = thisPrimers[0];
454                     string reversePrimer = thisPrimers[1];
455                     //attributes
456                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
457                     out << "\t\t\t<Attribute name=\"BarCode\">" + forwardBarcode + "</Attribute>\n";
458                     out << "\t\t\t<Attribute name=\"primer\">" + forwardPrimer + "</Attribute>\n";
459                     out << "\t\t\t<Attribute name=\"read_type\">forward</Attribute>\n";
460                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
461                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
462                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
463                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
464                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
465                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
466                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
467
468                     out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
469                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
470                     out << "\t\t\t</File>\n";
471                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
472                     out << "\t\t\t<Attribute name=\"BarCode\">" + reverseBarcode + "</Attribute>\n";
473                     out << "\t\t\t<Attribute name=\"primer\">" + reversePrimer + "</Attribute>\n";
474                     out << "\t\t\t<Attribute name=\"read_type\">reverse</Attribute>\n";
475                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
476                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
477                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
478                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
479                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
480                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
481                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
482
483                 }else { //single
484                     out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
485                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
486                     out << "\t\t\t</File>\n";
487                     //attributes
488                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
489                     out << "\t\t\t<Attribute name=\"BarCode\">" + Group2Barcode[Groups[i]][0] + "</Attribute>\n";
490                     out << "\t\t\t<Attribute name=\"primer\">" + Group2Primer[Groups[i]][0] + "</Attribute>\n";
491                     out << "\t\t\t<Attribute name=\"read_type\">" + orientation + "</Attribute>\n";
492                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
493                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
494                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
495                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
496                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
497                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
498                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
499
500                 }
501                 ///////////////////bioProject info
502                 out << "\t\t\t<AttributeRefId name=\"BioProject\">\n";
503                 out << "\t\t\t\t<RefId>\n";
504                 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
505                 out << "\t\t\t\t</RefId>\n";
506                 out << "\t\t\t</AttributeRefId>\n";
507                 //////////////////bioSample info
508                 out << "\t\t\t<AttributeRefId name=\"BioSample\">\n";
509                 out << "\t\t\t\t<RefId>\n";
510                 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
511                 out << "\t\t\t\t</RefId>\n";
512                 out << "\t\t\t</AttributeRefId>\n";
513                 //libID
514                 out << "\t\t\t<Identifier>\n";
515                 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
516                     vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
517                     libId = pieces[0] + barcodeForThisSample;
518                 }
519                 out << "\t\t\t\t<LocalId>" + libId + " </LocalId>\n";
520                 out << "\t\t\t</Identifier>\n";
521                 out << "\t\t</AddFiles>\n";
522                 out << "\t</Action>\n";
523             }
524         }
525         out << "</Submission>\n";
526         out.close();
527         
528         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
529                 
530         //output files created by command
531                 m->mothurOutEndLine();
532                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
533                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
534                 m->mothurOutEndLine();
535         return 0;
536                 
537     }
538         catch(exception& e) {
539                 m->errorOut(e, "SRACommand", "SRACommand");
540                 exit(1);
541         }
542 }
543 //**********************************************************************************************************************
544 int SRACommand::readContactFile(){
545         try {
546         lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; website = ""; projectName = "";
547         projectTitle = ""; grantAgency = ""; grantId = ""; grantTitle = "";
548         
549         ifstream in;
550         m->openInputFile(contactfile, in);
551         
552         while(!in.eof()) {
553             
554             if (m->control_pressed) { break; }
555             
556             string key, value;
557             in >> key; m->gobble(in);
558             value = m->getline(in); m->gobble(in);
559             
560             for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
561             
562             if (key == "USERNAME")          {   submissionName = value; }
563             else if (key == "LAST")         {   lastName = value;       }
564             else if (key == "FIRST")        {   firstName = value;      }
565             else if (key == "EMAIL")        {   email = value;          }
566             else if (key == "CENTER")       {   centerName = value;     }
567             else if (key == "TYPE")         {
568                 centerType = value;
569                 for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
570                 if ((centerType == "consortium") || (centerType == "center") ||  (centerType == "institute") ||  (centerType == "lab")) {}
571                 else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option.  Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
572             }else if (key == "DESCRIPTION")     {   description = value;    }
573             else if (key == "WEBSITE")          {   website = value;        }
574             else if (key == "PROJECTNAME")      {   projectName = value;    }
575             else if (key == "PROJECTTITLE")     {   projectTitle = value;   }
576             else if (key == "GRANTID")          {   grantId = value;        }
577             else if (key == "GRANTTITLE")       {   grantTitle = value;     }
578             else if (key == "GRANTAGENCY")      {   grantAgency = value;    }
579         }
580         in.close();
581         
582         if (lastName == "") { m->mothurOut("[ERROR]: missing last name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
583         if (firstName == "") { m->mothurOut("[ERROR]: missing first name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
584         if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
585         if (email == "") { m->mothurOut("[ERROR]: missing email from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
586         if (centerName == "") { m->mothurOut("[ERROR]: missing center name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
587         if (centerType == "") { m->mothurOut("[ERROR]: missing center type from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
588         if (description == "") { m->mothurOut("[ERROR]: missing description from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
589         if (projectTitle == "") { m->mothurOut("[ERROR]: missing project title from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
590         if (projectName == "") { m->mothurOut("[ERROR]: missing project name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
591
592         return 0;
593     }
594         catch(exception& e) {
595                 m->errorOut(e, "SRACommand", "readContactFile");
596                 exit(1);
597         }
598 }
599 //**********************************************************************************************************************
600 //air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water
601 //all packages require: *sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon
602 //air: *altitude
603 //host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, plant_associated: *host
604 //microbial, sediment, soil: *depth     *elev
605 //water: *depth
606 int SRACommand::readMIMarksFile(){
607         try {
608         //acceptable organisms
609         vector<string> acceptableOrganisms;
610         bool organismError = false;
611         //ecological
612         acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("air metagenome"); acceptableOrganisms.push_back("anaerobic digester metagenome"); acceptableOrganisms.push_back("ant fungus garden metagenome"); acceptableOrganisms.push_back("aquatic metagenome"); acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("beach sand metagenome"); acceptableOrganisms.push_back("biofilm metagenome"); acceptableOrganisms.push_back("biofilter metagenome"); acceptableOrganisms.push_back("biogas fermenter metagenome"); acceptableOrganisms.push_back("bioreactor metagenome"); acceptableOrganisms.push_back("bioreactor sludge metagenome"); acceptableOrganisms.push_back("clinical metagenome"); acceptableOrganisms.push_back("coal metagenome"); acceptableOrganisms.push_back("compost metagenome"); acceptableOrganisms.push_back("dust metagenome"); acceptableOrganisms.push_back("fermentation metagenome"); acceptableOrganisms.push_back("food fermentation metagenome"); acceptableOrganisms.push_back("food metagenome"); acceptableOrganisms.push_back("freshwater metagenome"); acceptableOrganisms.push_back("freshwater sediment metagenome"); acceptableOrganisms.push_back("groundwater metagenome"); acceptableOrganisms.push_back("halite metagenome"); acceptableOrganisms.push_back("hot springs metagenome"); acceptableOrganisms.push_back("hydrocarbon metagenome"); acceptableOrganisms.push_back("hydrothermal vent metagenome"); acceptableOrganisms.push_back("hypersaline lake metagenome"); acceptableOrganisms.push_back("ice metagenome"); acceptableOrganisms.push_back("indoor metagenome"); acceptableOrganisms.push_back("industrial waste metagenome"); acceptableOrganisms.push_back("mangrove metagenome"); acceptableOrganisms.push_back("marine metagenome"); acceptableOrganisms.push_back("marine sediment metagenome"); acceptableOrganisms.push_back("microbial mat metagenome"); acceptableOrganisms.push_back("mine drainage metagenome"); acceptableOrganisms.push_back("mixed culture metagenome"); acceptableOrganisms.push_back("oil production facility metagenome"); acceptableOrganisms.push_back("paper pulp metagenome"); acceptableOrganisms.push_back("permafrost metagenome"); acceptableOrganisms.push_back("plastisphere metagenome"); acceptableOrganisms.push_back("power plant metagenome"); acceptableOrganisms.push_back("retting rhizosphere metagenome"); acceptableOrganisms.push_back("rock metagenome"); acceptableOrganisms.push_back("salt lake metagenome"); acceptableOrganisms.push_back("saltern metagenome"); acceptableOrganisms.push_back("sediment metagenome"); acceptableOrganisms.push_back("snow metagenome"); acceptableOrganisms.push_back("soil metagenome"); acceptableOrganisms.push_back("stromatolite metagenome"); acceptableOrganisms.push_back("terrestrial metagenome"); acceptableOrganisms.push_back("tomb wall metagenome"); acceptableOrganisms.push_back("wastewater metagenome"); acceptableOrganisms.push_back("wetland metagenome"); acceptableOrganisms.push_back("whale fall metagenome");
613         //oganismal
614         acceptableOrganisms.push_back("algae metagenome"); acceptableOrganisms.push_back("ant metagenome"); acceptableOrganisms.push_back("bat metagenome"); acceptableOrganisms.push_back("beetle metagenome"); acceptableOrganisms.push_back("bovine gut metagenome"); acceptableOrganisms.push_back("bovine metagenome"); acceptableOrganisms.push_back("chicken gut metagenome"); acceptableOrganisms.push_back("coral metagenome"); acceptableOrganisms.push_back("echinoderm metagenome"); acceptableOrganisms.push_back("endophyte metagenome"); acceptableOrganisms.push_back("epibiont metagenome"); acceptableOrganisms.push_back("fish metagenome"); acceptableOrganisms.push_back("fossil metagenome"); acceptableOrganisms.push_back("gill metagenome"); acceptableOrganisms.push_back("gut metagenome"); acceptableOrganisms.push_back("honeybee metagenome"); acceptableOrganisms.push_back("human gut metagenome"); acceptableOrganisms.push_back("human lung metagenome"); acceptableOrganisms.push_back("human metagenome"); acceptableOrganisms.push_back("human nasal/pharyngeal metagenome"); acceptableOrganisms.push_back("human oral metagenome"); acceptableOrganisms.push_back("human skin metagenome"); acceptableOrganisms.push_back("insect gut metagenome"); acceptableOrganisms.push_back("insect metagenome"); acceptableOrganisms.push_back("mollusc metagenome"); acceptableOrganisms.push_back("mosquito metagenome"); acceptableOrganisms.push_back("mouse gut metagenome"); acceptableOrganisms.push_back("mouse metagenome"); acceptableOrganisms.push_back("mouse skin metagenome"); acceptableOrganisms.push_back("nematode metagenome"); acceptableOrganisms.push_back("oral metagenome"); acceptableOrganisms.push_back("phyllosphere metagenome"); acceptableOrganisms.push_back("pig metagenome"); acceptableOrganisms.push_back("plant metagenome"); acceptableOrganisms.push_back("primate metagenome"); acceptableOrganisms.push_back("rat metagenome"); acceptableOrganisms.push_back("root metagenome"); acceptableOrganisms.push_back("sea squirt metagenome"); acceptableOrganisms.push_back("seed metagenome"); acceptableOrganisms.push_back("shoot metagenome"); acceptableOrganisms.push_back("skin metagenome"); acceptableOrganisms.push_back("snake metagenome"); acceptableOrganisms.push_back("sponge metagenome"); acceptableOrganisms.push_back("stomach metagenome"); acceptableOrganisms.push_back("symbiont metagenome"); acceptableOrganisms.push_back("termite gut metagenome"); acceptableOrganisms.push_back("termite metagenome"); acceptableOrganisms.push_back("upper respiratory tract metagenome"); acceptableOrganisms.push_back("urine metagenome"); acceptableOrganisms.push_back("viral metagenome"); acceptableOrganisms.push_back("wallaby gut metagenome"); acceptableOrganisms.push_back("wasp metagenome"); acceptableOrganisms.push_back("sythetic metagenome"); acceptableOrganisms.push_back("metagenome");
615         
616         vector<string> requiredFieldsForPackage;
617         requiredFieldsForPackage.push_back("sample_name"); requiredFieldsForPackage.push_back("organism");
618         requiredFieldsForPackage.push_back("collection_date"); requiredFieldsForPackage.push_back("biome");
619         requiredFieldsForPackage.push_back("feature"); requiredFieldsForPackage.push_back("material");
620         requiredFieldsForPackage.push_back("geo_loc_name"); requiredFieldsForPackage.push_back("lat_lon");
621         requiredFieldsForPackage.push_back("seq_methods"); requiredFieldsForPackage.push_back("title");
622         vector<string> chooseAtLeastOneForPackage;
623         
624         ifstream in;
625         m->openInputFile(mimarksfile, in);
626         
627         //read comments
628         string temp; packageType = "";
629         while(!in.eof()) {
630             
631             if (m->control_pressed) { break; }
632             temp = m->getline(in); m->gobble(in);
633             
634             if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
635             
636             if (temp[0] == '#') {
637                 int pos = temp.find("Environmental");
638                 if (pos != string::npos) {
639                     for (int i = pos+14; i < temp.length(); i++) {
640                         if (!isspace(temp[i])) { packageType += temp[i]; }
641                         else { i+= temp.length(); }
642                     }
643                 }
644             }
645             else{ break; } //hit headers line
646          }
647         
648         vector<string> headers; m->splitAtChar(temp, headers, '\t');
649         m->removeBlanks(headers);
650         //remove * from required's
651         for (int i = 0; i < headers.size(); i++) {
652             if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); }
653             if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); chooseAtLeastOneForPackage.push_back(headers[i]); }  //secondary condition
654             if (m->debug) { m->mothurOut("[DEBUG]: " + headers[i] + "\n"); }
655         }
656         
657         if (m->debug) {  m->mothurOut("[DEBUG]: packageType = '" + packageType + "'\n");   }
658         
659         //check to make sure package has all its required parts
660         //MIMARKS.specimen.water.3.0
661         if (packageType == "MIMARKS.specimen.air.3.0") {   requiredFieldsForPackage.push_back("altitude");  }
662         else if ((packageType == "MIMARKS.specimen.host-associated.3.0") || (packageType == "MIMARKS.specimen.human-associated.3.0") || (packageType == "MIMARKS.specimen.human-gut.3.0") || (packageType == "MIMARKS.specimen.human-oral.3.0") || (packageType == "MIMARKS.specimen.human-skin.3.0") || (packageType == "MIMARKS.specimen.human-vaginal.3.0") || (packageType == "MIMARKS.specimen.plant-associated.3.0")) {  requiredFieldsForPackage.push_back("host");  }
663         else if ((packageType == "MIMARKS.specimen.microbial.3.0") || (packageType == "MIMARKS.specimen.sediment.3.0") || (packageType == "soil")) {   requiredFieldsForPackage.push_back("depth");  requiredFieldsForPackage.push_back("elev"); }
664         else if (packageType == "MIMARKS.specimen.water.3.0") {   requiredFieldsForPackage.push_back("depth");  }
665         else if ((packageType == "MIMARKS.specimen.miscellaneous.3.0") || (packageType == "wastewater")) { }
666         else {
667             m->mothurOut("[ERROR]: unknown package " + packageType + ", please correct.\n"); m->control_pressed = true; in.close(); return 0;
668         }
669         
670         if (!m->isSubset(headers, requiredFieldsForPackage)){
671             string requiredFields = "";
672             for (int i = 0; i < requiredFieldsForPackage.size()-1; i++) { requiredFields += requiredFieldsForPackage[i] + ", "; } requiredFields += requiredFieldsForPackage[requiredFieldsForPackage.size()-1];
673             m->mothurOut("[ERROR]: missing required fields for package, please correct. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
674         }
675         
676         if (m->debug) {  m->mothurOut("[DEBUG]: chooseAtLeastOneForPackage.size() = " + toString(chooseAtLeastOneForPackage.size()) + "\n");   }
677         
678         if (!m->inUsersGroups(chooseAtLeastOneForPackage, headers)){ //returns true if any of the choose at least ones are in headers
679             string requiredFields = "";
680             for (int i = 0; i < chooseAtLeastOneForPackage.size()-1; i++) { requiredFields += chooseAtLeastOneForPackage[i] + ", "; cout << chooseAtLeastOneForPackage[i] << endl; }
681             if (chooseAtLeastOneForPackage.size() < 1) { requiredFields += chooseAtLeastOneForPackage[chooseAtLeastOneForPackage.size()-1]; }
682             m->mothurOut("[ERROR]: missing a choose at least one fields for the package, please correct. These are marked with '**'. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
683         }
684         
685         map<string, bool> allNA;  for (int i = 1; i < headers.size(); i++) {  allNA[headers[i]] = true; }
686         while(!in.eof()) {
687             
688             if (m->control_pressed) { break; }
689             
690             temp = m->getline(in);  m->gobble(in);
691             
692             if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
693             
694             string original = temp;
695             vector<string> linePieces; m->splitAtChar(temp, linePieces, '\t');
696             m->removeBlanks(linePieces);
697             
698             if (linePieces.size() != headers.size()) { m->mothurOut("[ERROR]: line: " + original + " contains " + toString(linePieces.size()) + " columns, but you have " + toString(headers.size()) + " column headers, please correct.\n"); m->control_pressed = true; }
699             else {
700                 map<string, map<string, string> >:: iterator it = mimarks.find(linePieces[0]);
701                 
702                 if (it == mimarks.end()) {
703                     map<string, string> categories;
704                     //start after *sample_name
705                     for (int i = 1; i < headers.size(); i++) {
706                         categories[headers[i]] = linePieces[i];
707                         //check the users inputs for appropriate organisms
708                         if (headers[i] == "organism") {
709                             if (!m->inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism
710                                 organismError = true;
711                                 m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to acceptable 'metagenome'. NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i];
712                             }
713                             Group2Organism[linePieces[0]] = linePieces[i];
714                         }
715                         if (linePieces[i] != "NA") {  allNA[headers[i]] = false;     }
716                     }
717                     
718                     //does this sample already match an existing sample?
719                     bool isOkaySample = true;
720                     for (map<string, map<string, string> >:: iterator it2 = mimarks.begin(); it2 != mimarks.end(); it2++) {
721                         if (m->control_pressed) { break; }
722                         bool allSame = true;
723                         for (int i = 1; i < headers.size(); i++) {
724                             if ((it2->second)[headers[i]] != categories[headers[i]]) { allSame = false; }
725                         }
726                         if (allSame) { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sample to " + it2->first + ". It has all the same attributes in the MIMarks file. Samples must have distinguishing features to be uploaded to the NCBI library, please correct.\n"); m->control_pressed = true; isOkaySample = false; }
727                     }
728                     if (isOkaySample) { mimarks[linePieces[0]] = categories; }
729                 }else {
730                     m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sampleName. Sample names must be unique, please correct.\n"); m->control_pressed = true;
731                 }
732             }
733         }
734         in.close();
735         
736         //add in values for "scrap" group
737         map<string, string> categories;
738         //start after *sample_name
739         for (int i = 1; i < headers.size(); i++) {
740             categories[headers[i]] = "NA";
741             if (headers[i] == "organism")       { categories[headers[i]] = "metagenome"; }
742             if (headers[i] == "seq_methods")    { categories[headers[i]] = "these sequences were scrapped"; }
743             if (headers[i] == "title")          { categories[headers[i]] = "these sequences were scrapped"; }
744         }
745         mimarks["scrap"] = categories;
746         Group2Organism["scrap"] = "metagenome";
747         
748         if (organismError) {
749             string organismTypes = "";
750             for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; }
751             organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1];
752             m->mothurOut("\n[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n\n\n");
753         }
754         
755         return 0;
756     }
757         catch(exception& e) {
758                 m->errorOut(e, "SRACommand", "readMIMarksFile");
759                 exit(1);
760         }
761 }
762
763 //**********************************************************************************************************************
764 // going to have to rework this to allow for other options --
765 /*
766  file option 1
767  
768  sfffile1   oligosfile1
769  sfffile2   oligosfile2
770  ...
771  
772  file option 2
773  
774  fastqfile1 oligosfile1
775  fastqfile2 oligosfile2
776  ...
777  
778  file option 3
779  
780  fastqfile  fastqfile   group
781  fastqfile  fastqfile   group
782  fastqfile  fastqfile   group
783  ...
784  
785 */
786
787 int SRACommand::readFile(map<string, vector<string> >& files){
788         try {
789         //vector<string> theseFiles;
790         inputfile = file;
791         files.clear();
792         
793         ifstream in;
794         m->openInputFile(file, in);
795         
796         while(!in.eof()) {
797             
798             if (m->control_pressed) { return 0; }
799             
800             string line = m->getline(in);  m->gobble(in);
801             vector<string> pieces = m->splitWhiteSpace(line);
802             
803             string group = "";
804             string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
805             if (pieces.size() == 2) {
806                 thisFileName1 = pieces[0];
807                 thisFileName2 = pieces[1];
808             }else if (pieces.size() == 3) {
809                 thisFileName1 = pieces[1];
810                 thisFileName2 = pieces[2];
811                 string group = pieces[0];
812                 libLayout = "paired";
813             }else {
814                 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
815             }
816             
817             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2  + ".\n"); }
818             
819             //check to make sure both are able to be opened
820             ifstream in2;
821             int openForward = m->openInputFile(thisFileName1, in2, "noerror");
822             
823             //if you can't open it, try default location
824             if (openForward == 1) {
825                 if (m->getDefaultPath() != "") { //default path is set
826                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
827                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
828                     ifstream in3;
829                     openForward = m->openInputFile(tryPath, in3, "noerror");
830                     in3.close();
831                     thisFileName1 = tryPath;
832                 }
833             }
834             
835             //if you can't open it, try output location
836             if (openForward == 1) {
837                 if (m->getOutputDir() != "") { //default path is set
838                     string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
839                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
840                     ifstream in4;
841                     openForward = m->openInputFile(tryPath, in4, "noerror");
842                     thisFileName1 = tryPath;
843                     in4.close();
844                 }
845             }
846             
847             if (openForward == 1) { //can't find it
848                 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
849             }else{  in2.close();  }
850             
851             int openReverse = 1;
852             
853             ifstream in3;
854             openReverse = m->openInputFile(thisFileName2, in3, "noerror");
855             
856             //if you can't open it, try default location
857             if (openReverse == 1) {
858                 if (m->getDefaultPath() != "") { //default path is set
859                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
860                     m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
861                     ifstream in3;
862                     openReverse = m->openInputFile(tryPath, in3, "noerror");
863                     in3.close();
864                     thisFileName2 = tryPath;
865                 }
866             }
867             
868             //if you can't open it, try output location
869             if (openReverse == 1) {
870                 if (m->getOutputDir() != "") { //default path is set
871                     string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
872                     m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
873                     ifstream in4;
874                     openReverse = m->openInputFile(tryPath, in4, "noerror");
875                     thisFileName2 = tryPath;
876                     in4.close();
877                 }
878             }
879             
880             if (openReverse == 1) { //can't find it
881                 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
882             }else{  in3.close();  }
883            
884             
885             if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
886                 //process pair
887                 int pos = thisFileName1.find(".sff");
888                 if (pos != string::npos) {//these files are sff files
889                     isSFF = true;
890                     sfffile = thisFileName1; oligosfile = thisFileName2;
891                     if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
892                     readOligos();
893                     if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
894                     parseSffFile(files);
895                     if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + sfffile + "\n"); }
896                 }else{
897                     isSFF = false;
898                     fastqfile = thisFileName1; oligosfile = thisFileName2;
899                     if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
900                     readOligos();
901                     if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
902                     parseFastqFile(files);
903                     if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + fastqfile + "\n"); }
904                 }
905                 
906             }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
907                 map<string, vector<string> >::iterator it = files.find(group);
908                 if (it == files.end()) {
909                     vector<string> temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp;
910                 }else {
911                     files[group].push_back(thisFileName1 + " " + thisFileName2);
912                 }
913             }
914         }
915         in.close();
916     
917         inputfile = file;
918         
919         return 0;
920     }
921         catch(exception& e) {
922                 m->errorOut(e, "SRACommand", "readFile");
923                 exit(1);
924         }
925 }
926 //**********************************************************************************************************************
927 int SRACommand::parseSffFile(map<string, vector<string> >& files){
928         try {
929         vector<string> theseFiles;
930         inputfile = sfffile;
931         libLayout = "single"; //controlled vocab
932         
933         isSFF = true;
934         //run sffinfo to parse sff file into individual sampled sff files
935         string commandString = "sff=" + sfffile;
936         
937         commandString += ", oligos=" + oligosfile;
938         //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
939         if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
940         if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
941         if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
942         if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
943         if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
944         
945         m->mothurOutEndLine();
946         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
947         m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
948         m->mothurCalling = true;
949         
950         Command* sffinfoCommand = new SffInfoCommand(commandString);
951         sffinfoCommand->execute();
952         
953         map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
954         map<string, vector<string> >::iterator it = filenames.find("sff");
955         if (it != filenames.end()) { theseFiles = it->second; }
956         else { m->control_pressed = true; } // error in sffinfo
957         
958         delete sffinfoCommand;
959         m->mothurCalling = false;
960         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
961         
962         mapGroupToFile(files, theseFiles);
963         
964         return 0;
965     }
966         catch(exception& e) {
967                 m->errorOut(e, "SRACommand", "readFile");
968                 exit(1);
969         }
970 }
971
972 //**********************************************************************************************************************
973 int SRACommand::parseFastqFile(map<string, vector<string> >& files){
974         try {
975         vector<string> theseFiles;
976         inputfile = fastqfile;
977         libLayout = "single"; //controlled vocab
978         
979         //run sffinfo to parse sff file into individual sampled sff files
980         string commandString = "fastq=" + fastqfile;
981         
982         commandString += ", oligos=" + oligosfile;
983         //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
984         if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
985         if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
986         if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
987         if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
988         if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
989        
990         m->mothurOutEndLine();
991         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
992         m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
993         m->mothurCalling = true;
994         
995         Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
996         fastqinfoCommand->execute();
997         
998         map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
999         map<string, vector<string> >::iterator it = filenames.find("fastq");
1000         if (it != filenames.end()) { theseFiles = it->second; }
1001         else { m->control_pressed = true; } // error in sffinfo
1002         
1003         delete fastqinfoCommand;
1004         m->mothurCalling = false;
1005         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1006         
1007         mapGroupToFile(files, theseFiles);
1008         
1009         return 0;
1010     }
1011         catch(exception& e) {
1012                 m->errorOut(e, "SRACommand", "readFile");
1013                 exit(1);
1014         }
1015 }
1016 //***************************************************************************************************************
1017 //maps group to file
1018 int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
1019         try {
1020         
1021         for (int i = 0; i < Groups.size(); i++) {
1022             
1023             set<int> matches;
1024             for (int j = 0; j < theseFiles.size(); j++) {
1025                 int pos = theseFiles[j].find(Groups[i]);
1026                 if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
1027                     if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
1028                         matches.insert(i);
1029                     }
1030                 }
1031             }
1032             
1033             if(matches.size() == 1) {
1034                 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1035                 if (it == files.end()) {
1036                     vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
1037                 }else {
1038                     files[Groups[i]].push_back(theseFiles[*matches.begin()]);
1039                 }
1040             }
1041         }
1042         return 0;
1043     }
1044         catch(exception& e) {
1045                 m->errorOut(e, "SRACommand", "checkGroups");
1046                 exit(1);
1047         }
1048 }
1049
1050 //***************************************************************************************************************
1051 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
1052 int SRACommand::checkGroups(map<string, vector<string> >& files){
1053         try {
1054         vector<string> newGroups;
1055         for (int i = 0; i < Groups.size(); i++) {
1056             
1057             map<string, vector<string> >::iterator it = files.find(Groups[i]);
1058              //no files for this group, remove it
1059             if (it == files.end()) { }
1060             else { newGroups.push_back(Groups[i]); }
1061         }
1062         
1063         Groups = newGroups;
1064         
1065         return 0;
1066     }
1067         catch(exception& e) {
1068                 m->errorOut(e, "SRACommand", "checkGroups");
1069                 exit(1);
1070         }
1071 }
1072 //***************************************************************************************************************
1073 int SRACommand::readOligos(){
1074         try {
1075                 ifstream inOligos;
1076                 m->openInputFile(oligosfile, inOligos);
1077                 
1078                 string type, oligo, roligo, group;
1079         bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
1080         map<int, oligosPair> pairedBarcodes;
1081         map<int, oligosPair> pairedPrimers;
1082         map<string, int> barcodes;
1083         map<string, int> primers;
1084         vector<string>  linker;
1085         vector<string>  spacer, revPrimer;
1086                 int indexPrimer = 0;
1087                 int indexBarcode = 0;
1088         int indexPairedPrimer = 0;
1089                 int indexPairedBarcode = 0;
1090         set<string> uniquePrimers;
1091         set<string> uniqueBarcodes;
1092                 
1093                 while(!inOligos.eof()){
1094             
1095                         inOligos >> type;
1096             
1097                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
1098             
1099                         if(type[0] == '#'){
1100                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1101                                 m->gobble(inOligos);
1102                         }
1103                         else{
1104                                 m->gobble(inOligos);
1105                                 //make type case insensitive
1106                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1107                                 
1108                                 inOligos >> oligo;
1109                 
1110                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1111                                 
1112                                 for(int i=0;i<oligo.length();i++){
1113                                         oligo[i] = toupper(oligo[i]);
1114                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1115                                 }
1116                                 
1117                                 if(type == "FORWARD"){
1118                                         group = "";
1119                                         
1120                                         // get rest of line in case there is a primer name
1121                                         while (!inOligos.eof()) {
1122                                                 char c = inOligos.get();
1123                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1124                                                 else if (c == 32 || c == 9){;} //space or tab
1125                                                 else {  group += c;  }
1126                                         }
1127                                         
1128                                         //check for repeat barcodes
1129                                         map<string, int>::iterator itPrime = primers.find(oligo);
1130                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1131                                         
1132                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1133                     
1134                                         primers[oligo] = indexPrimer; indexPrimer++;
1135                                         primerNameVector.push_back(group);
1136                                 }
1137                 else if (type == "PRIMER"){
1138                     m->gobble(inOligos);
1139                                         
1140                     inOligos >> roligo;
1141                     
1142                     for(int i=0;i<roligo.length();i++){
1143                         roligo[i] = toupper(roligo[i]);
1144                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1145                     }
1146                     roligo = reverseOligo(roligo);
1147                     
1148                     group = "";
1149                     
1150                                         // get rest of line in case there is a primer name
1151                                         while (!inOligos.eof()) {
1152                                                 char c = inOligos.get();
1153                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1154                                                 else if (c == 32 || c == 9){;} //space or tab
1155                                                 else {  group += c;  }
1156                                         }
1157                     
1158                     oligosPair newPrimer(oligo, roligo);
1159                     
1160                     if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1161                                         
1162                                         //check for repeat barcodes
1163                     string tempPair = oligo+roligo;
1164                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1165                     else { uniquePrimers.insert(tempPair); }
1166                                         
1167                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1168                     
1169                                         pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1170                                         primerNameVector.push_back(group);
1171                     hasPrimer = true;
1172                 }
1173                                 else if(type == "REVERSE"){
1174                                         //Sequence oligoRC("reverse", oligo);
1175                                         //oligoRC.reverseComplement();
1176                     string oligoRC = reverseOligo(oligo);
1177                                         revPrimer.push_back(oligoRC);
1178                                 }
1179                                 else if(type == "BARCODE"){
1180                                         inOligos >> group;
1181                     
1182                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1183                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1184                     
1185                     string temp = "";
1186                     while (!inOligos.eof())     {
1187                                                 char c = inOligos.get();
1188                                                 if (c == 10 || c == 13 || c == -1){     break;  }
1189                                                 else if (c == 32 || c == 9){;} //space or tab
1190                                                 else {  temp += c;  }
1191                                         }
1192                                         
1193                     //then this is illumina data with 4 columns
1194                     if (temp != "") {
1195                         hasPairedBarcodes = true;
1196                         string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1197                         group = temp;
1198                         
1199                         for(int i=0;i<reverseBarcode.length();i++){
1200                             reverseBarcode[i] = toupper(reverseBarcode[i]);
1201                             if(reverseBarcode[i] == 'U')        {       reverseBarcode[i] = 'T';        }
1202                         }
1203                         
1204                         reverseBarcode = reverseOligo(reverseBarcode);
1205                         oligosPair newPair(oligo, reverseBarcode);
1206                         
1207                         if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1208                         //check for repeat barcodes
1209                         string tempPair = oligo+reverseBarcode;
1210                         if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
1211                         else { uniqueBarcodes.insert(tempPair); }
1212                         
1213                         pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1214                         barcodeNameVector.push_back(group);
1215                     }else {
1216                         //check for repeat barcodes
1217                         map<string, int>::iterator itBar = barcodes.find(oligo);
1218                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1219                         
1220                         barcodes[oligo]=indexBarcode; indexBarcode++;
1221                         barcodeNameVector.push_back(group);
1222                     }
1223                                 }else if(type == "LINKER"){
1224                                         linker.push_back(oligo);
1225                                 }else if(type == "SPACER"){
1226                                         spacer.push_back(oligo);
1227                                 }
1228                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1229                         }
1230                         m->gobble(inOligos);
1231                 }
1232                 inOligos.close();
1233                 
1234         if (hasPairedBarcodes || hasPrimer) {
1235             pairedOligos = true;
1236             if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
1237         }
1238                 
1239         
1240                 //add in potential combos
1241                 if(barcodeNameVector.size() == 0){
1242                         barcodeNameVector.push_back("");
1243                 }
1244                 
1245                 if(primerNameVector.size() == 0){
1246                         primerNameVector.push_back("");
1247                 }
1248         
1249         if (pairedOligos) {
1250             for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1251                 for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1252                     
1253                     string primerName = primerNameVector[itPrimer->first];
1254                     string barcodeName = barcodeNameVector[itBar->first];
1255                     
1256                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1257                     else {
1258                         string comboGroupName = "";
1259                         string fastqFileName = "";
1260                         
1261                         if(primerName == ""){
1262                             comboGroupName = barcodeNameVector[itBar->first];
1263                         }
1264                         else{
1265                             if(barcodeName == ""){
1266                                 comboGroupName = primerNameVector[itPrimer->first];
1267                             }
1268                             else{
1269                                 comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1270                             }
1271                         }
1272                         uniqueNames.insert(comboGroupName);
1273                         
1274                         map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
1275                         if (itGroup2Barcode == Group2Barcode.end()) {
1276                             vector<string> tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse);
1277                             Group2Barcode[comboGroupName] = tempBarcodes;
1278                         }else {
1279                             Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse);
1280                         }
1281                         
1282                         itGroup2Barcode = Group2Primer.find(comboGroupName);
1283                         if (itGroup2Barcode == Group2Primer.end()) {
1284                             vector<string> tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
1285                             Group2Primer[comboGroupName] = tempPrimers;
1286                         }else {
1287                             Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
1288                         }
1289                     }
1290                 }
1291             }
1292         }else {
1293             for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1294                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1295                     
1296                     string primerName = primerNameVector[itPrimer->second];
1297                     string barcodeName = barcodeNameVector[itBar->second];
1298                     
1299                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1300                     else {
1301                         string comboGroupName = "";
1302                         string fastqFileName = "";
1303                         
1304                         if(primerName == ""){
1305                             comboGroupName = barcodeNameVector[itBar->second];
1306                         }
1307                         else{
1308                             if(barcodeName == ""){
1309                                 comboGroupName = primerNameVector[itPrimer->second];
1310                             }
1311                             else{
1312                                 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1313                             }
1314                         }
1315                         uniqueNames.insert(comboGroupName);
1316                         
1317                         map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
1318                         if (itGroup2Barcode == Group2Barcode.end()) {
1319                             vector<string> tempBarcodes; tempBarcodes.push_back(itBar->first);
1320                             Group2Barcode[comboGroupName] = tempBarcodes;
1321                         }else {
1322                             Group2Barcode[comboGroupName].push_back(itBar->first);
1323                         }
1324                         
1325                         itGroup2Barcode = Group2Primer.find(comboGroupName);
1326                         if (itGroup2Barcode == Group2Primer.end()) {
1327                             vector<string> tempPrimers; tempPrimers.push_back(itPrimer->first);
1328                             Group2Primer[comboGroupName] = tempPrimers;
1329                         }else {
1330                             Group2Primer[comboGroupName].push_back(itPrimer->first);
1331                         }
1332                     }
1333                 }
1334             }
1335         }
1336
1337                
1338         if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
1339         
1340         
1341                 return true;
1342                 
1343         }
1344         catch(exception& e) {
1345                 m->errorOut(e, "SRACommand", "readOligos");
1346                 exit(1);
1347         }
1348 }
1349 //********************************************************************/
1350 string SRACommand::reverseOligo(string oligo){
1351         try {
1352         string reverse = "";
1353         
1354         for(int i=oligo.length()-1;i>=0;i--){
1355             
1356             if(oligo[i] == 'A')         {       reverse += 'T'; }
1357             else if(oligo[i] == 'T'){   reverse += 'A'; }
1358             else if(oligo[i] == 'U'){   reverse += 'A'; }
1359             
1360             else if(oligo[i] == 'G'){   reverse += 'C'; }
1361             else if(oligo[i] == 'C'){   reverse += 'G'; }
1362             
1363             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1364             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1365             
1366             else if(oligo[i] == 'M'){   reverse += 'K'; }
1367             else if(oligo[i] == 'K'){   reverse += 'M'; }
1368             
1369             else if(oligo[i] == 'W'){   reverse += 'W'; }
1370             else if(oligo[i] == 'S'){   reverse += 'S'; }
1371             
1372             else if(oligo[i] == 'B'){   reverse += 'V'; }
1373             else if(oligo[i] == 'V'){   reverse += 'B'; }
1374             
1375             else if(oligo[i] == 'D'){   reverse += 'H'; }
1376             else if(oligo[i] == 'H'){   reverse += 'D'; }
1377             
1378             else                                                {       reverse += 'N'; }
1379         }
1380         
1381         
1382         return reverse;
1383     }
1384         catch(exception& e) {
1385                 m->errorOut(e, "SRACommand", "reverseOligo");
1386                 exit(1);
1387         }
1388 }
1389 //********************************************************************/
1390 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1391 bool SRACommand::checkCasesPlatforms(string& platform){
1392         try {
1393         string original = platform;
1394         bool isOkay = true;
1395         
1396         //remove users possible case errors
1397         for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
1398         
1399         //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1400         
1401             if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
1402             else { isOkay = false; }
1403         
1404             if (isOkay) {
1405                 if (platform == "454")   {  platform = "_LS454"; }
1406             }else {
1407                 m->mothurOut("[ERROR]: " + original + " is not a valid platform option.  Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
1408             }
1409             
1410             return isOkay;
1411     }
1412         catch(exception& e) {
1413                 m->errorOut(e, "SRACommand", "checkCasesPlatforms");
1414                 exit(1);
1415         }
1416 }
1417 //********************************************************************/
1418 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
1419 bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
1420         try {
1421         string original = instrumentModel;
1422         bool isOkay = true;
1423         
1424         //remove users possible case errors
1425         for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
1426         
1427         //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1428         if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
1429             if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
1430             else { isOkay = false; }
1431             if (isOkay) {
1432                 if (instrumentModel == "454_GS_FLX_TITANIUM")   {  instrumentModel = "454_GS_FLX_Titanium"; }
1433                 if (instrumentModel == "454_GS_JUNIOR")         {  instrumentModel = "454_GS_Junior";       }
1434                 if (instrumentModel == "UNSPECIFIED")           {  instrumentModel = "unspecified";         }
1435             }else {
1436                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
1437             }
1438             
1439         }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
1440             if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
1441             else { isOkay = false; }
1442             
1443             if (isOkay) {
1444                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER")          {  instrumentModel = "Illumina_Genome_Analyzer";        }
1445                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II")       {  instrumentModel = "Illumina_Genome_Analyzer_II";     }
1446                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX")      {  instrumentModel = "Illumina_Genome_Analyzer_IIx";    }
1447                 if (instrumentModel == "ILLUMINA_HISEQ_2000")               {  instrumentModel = "Illumina_HiSeq_2000";             }
1448                 if (instrumentModel == "ILLUMINA_HISEQ_1000")               {  instrumentModel = "Illumina_HiSeq_1000";             }
1449                 if (instrumentModel == "ILLUMINA_MISEQ")                    {  instrumentModel = "Illumina_MiSeq";                  }
1450                 if (instrumentModel == "UNSPECIFIED")                       {  instrumentModel = "unspecified";                     }
1451             }else {
1452                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
1453             }
1454             
1455         }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
1456             if ((instrumentModel == "ION_TORRENT_PGM")  || (instrumentModel == "UNSPECIFIED")) { }
1457             else { isOkay = false; }
1458             
1459             if (isOkay) {
1460                 if (instrumentModel == "ION_TORRENT_PGM")          {  instrumentModel = "Ion_Torrent_PGM";        }
1461                 if (instrumentModel == "UNSPECIFIED")              {  instrumentModel = "unspecified";            }
1462             }else {
1463                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
1464             }
1465         }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
1466             if ((instrumentModel == "PACBIO_RS")  || (instrumentModel == "UNSPECIFIED")) { }
1467             else { isOkay = false; }
1468             
1469             if (isOkay) {
1470                 if (instrumentModel == "PACBIO_RS")          {  instrumentModel = "PacBio_RS";        }
1471                 if (instrumentModel == "UNSPECIFIED")        {  instrumentModel = "unspecified";      }
1472             }else {
1473                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
1474             }
1475         }
1476         return isOkay;
1477     }
1478         catch(exception& e) {
1479                 m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
1480                 exit(1);
1481         }
1482 }
1483 //**********************************************************************************************************************
1484 //AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER
1485 bool SRACommand::checkCasesLibStrategy(string& libStrategy){
1486         try {
1487         string original = libStrategy;
1488         bool isOkay = true;
1489         
1490         //remove users possible case errors
1491         for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); }
1492         
1493         if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { }
1494         else { isOkay = false; }
1495         
1496         if (isOkay) {
1497             if (libStrategy == "RNA-SEQ")                   {  libStrategy = "RNA-Seq";                 }
1498             if (libStrategy == "MIRNA-SEQ")                 {  libStrategy = "miRNA-Seq";               }
1499             if (libStrategy == "CHIP-SEQ")                  {  libStrategy = "ChIP-Seq";                }
1500             if (libStrategy == "MNASE-SEQ")                 {  libStrategy = "MNase-Seq";               }
1501             if (libStrategy == "DNASE-HYPERSENSITIVITY")    {  libStrategy = "DNase-Hypersensitivity";  }
1502             if (libStrategy == "BISULFITE-SEQ")             {  libStrategy = "Bisulfite-Seq";           }
1503             if (libStrategy == "TN-SEQ")                    {  libStrategy = "Tn-Seq";                  }
1504             if (libStrategy == "FL-CDNA")                   {  libStrategy = "FL-cDNA";                 }
1505             if (libStrategy == "MRE-SEQ")                   {  libStrategy = "MRE-Seq";                 }
1506             if (libStrategy == "MEDIP-SEQ")                 {  libStrategy = "MeDIP-Seq";               }
1507             }else {
1508             m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option.  Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true;
1509         }
1510         
1511         return isOkay;
1512     }
1513         catch(exception& e) {
1514                 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1515                 exit(1);
1516         }
1517 }
1518
1519 //**********************************************************************************************************************
1520 //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
1521 bool SRACommand::checkCasesLibSource(string& libSource){
1522         try {
1523         string original = libSource;
1524         bool isOkay = true;
1525         
1526         //remove users possible case errors
1527         for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); }
1528         
1529         if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { }
1530         else { isOkay = false; }
1531         
1532         if (isOkay) {
1533             
1534         }else {
1535             m->mothurOut("[ERROR]: " + original + " is not a valid libsource option.  Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true;
1536         }
1537         
1538         return isOkay;
1539     }
1540         catch(exception& e) {
1541                 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1542                 exit(1);
1543         }
1544 }
1545
1546 //**********************************************************************************************************************
1547 //PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified
1548 bool SRACommand::checkCasesLibSelection(string& libSelection){
1549         try {
1550         string original = libSelection;
1551         bool isOkay = true;
1552         
1553         //remove users possible case errors
1554         for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); }
1555         
1556         if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { }
1557         else { isOkay = false; }
1558         
1559         if (isOkay) {
1560             if (libSelection == "CDNA")                                         {  libSelection = "cDNA";                                       }
1561             if (libSelection == "CHIP")                                         {  libSelection = "ChIP";                                       }
1562             if (libSelection == "MNASE")                                        {  libSelection = "MNase";                                      }
1563             if (libSelection == "DNASE")                                        {  libSelection = "DNAse";                                      }
1564             if (libSelection == "HYBRID_SELECTION")                             {  libSelection = "Hybrid_Selection";                           }
1565             if (libSelection == "REDUCED_REPRESENTATION")                       {  libSelection = "Reduced_Representation";                     }
1566             if (libSelection == "RESTRICTION_DIGEST")                           {  libSelection = "Restriction_Digest";                         }
1567             if (libSelection == "5-METHYLCYTIDINE_ANTIBODY")                    {  libSelection = "5-methylcytidine_antibody";                  }
1568             if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN")       {  libSelection = "MBD2_protein_methyl-CpG_binding_domain";     }
1569             if (libSelection == "SIZE_FRACTIONATION")                           {  libSelection = "size_fractionation";                         }
1570             if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD")                {  libSelection = "Padlock_probes_capture_method";              }
1571             if (libSelection == "OTHER")                                        {  libSelection = "other";                                      }
1572             if (libSelection == "UNSPECIFIED")                                  {  libSelection = "unspecified";                                }
1573             
1574         }else {
1575             m->mothurOut("[ERROR]: " + original + " is not a valid libselection option.  Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true;
1576         }
1577         
1578         return isOkay;
1579     }
1580         catch(exception& e) {
1581                 m->errorOut(e, "SRACommand", "checkCasesLibSelection");
1582                 exit(1);
1583         }
1584 }
1585 //**********************************************************************************************************************
1586 //METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER
1587 bool SRACommand::checkCasesDataType(string& dataType){
1588         try {
1589         string original = dataType;
1590         bool isOkay = true;
1591         
1592         //remove users possible case errors
1593         for (int i = 0; i < dataType.size(); i++) { dataType[i] = toupper(dataType[i]); }
1594         
1595         if ((dataType == "METAGENOME") || (dataType == "GENOME_SEQUENCING") || (dataType == "METAGENOMIC_ASSEMBLY") || (dataType == "ASSEMBLY") || (dataType == "TRANSCRIPTOME") || (dataType == "PROTEOMIC") || (dataType == "MAP") || (dataType == "CLONE_ENDS") || (dataType == "TARGETED_LOCI") || (dataType == "RANDOM_SURVEY") || (dataType == "EXOME") || (dataType == "VARIATION") || (dataType == "EPIGENOMICS") || (dataType == "PHENOTYPE") || (dataType == "GENOTYPE") || (dataType == "OTHER")) { }
1596         else { isOkay = false; }
1597         
1598         if (isOkay) {
1599             
1600         }else {
1601             m->mothurOut("[ERROR]: " + original + " is not a valid datatype option.  Valid datatype options are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER."); m->mothurOutEndLine(); abort = true;
1602         }
1603         
1604         return isOkay;
1605     }
1606         catch(exception& e) {
1607                 m->errorOut(e, "SRACommand", "checkCasesDataType");
1608                 exit(1);
1609         }
1610 }
1611 //**********************************************************************************************************************
1612 bool SRACommand::sanityCheckMiMarksGroups(){
1613         try {
1614         bool isOkay = true;
1615         
1616         for (int i = 0; i < Groups.size(); i++) {
1617             if (m->control_pressed) { break; }
1618             
1619             map<string, map<string, string> >::iterator it = mimarks.find(Groups[i]);
1620             if (it == mimarks.end()) {
1621                 isOkay = false;
1622                 m->mothurOut("[ERROR]: MIMarks file is missing group " + Groups[i] + ", please correct.\n");
1623             }
1624         }
1625         
1626         if (!isOkay) { m->control_pressed = true; }
1627         
1628         return isOkay;
1629     }
1630         catch(exception& e) {
1631                 m->errorOut(e, "SRACommand", "sanityCheckMiMarksGroups");
1632                 exit(1);
1633         }
1634 }
1635
1636 //**********************************************************************************************************************