]> git.donarmstrong.com Git - mothur.git/blob - sracommand.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / sracommand.cpp
1 //
2 //  sracommand.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 10/28/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "sracommand.h"
10 #include "sffinfocommand.h"
11 #include "parsefastaqcommand.h"
12
13 //**********************************************************************************************************************
14 vector<string> SRACommand::setParameters(){
15         try {
16         CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
17         CommandParameter poligos("oligos", "InputTypes", "", "", "oligos", "none", "none","",false,false,true); parameters.push_back(poligos);
18         CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
19                 CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
20         CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
21         CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
22         CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
23         //choose only one multiple options
24         CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
25         CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
26         CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
27         CommandParameter pdatatype("datatype", "String", "METAGENOME", "", "", "", "","",false,false); parameters.push_back(pdatatype);
28         CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
29         CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
30         CommandParameter porientation("orientation", "Multiple", "forward-reverse", "forward", "", "", "","",false,false); parameters.push_back(porientation);
31         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
32                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
33         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
34                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
35         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
36         
37          //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
38                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
39                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
40                 
41                 vector<string> myArray;
42                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "SRACommand", "setParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 string SRACommand::getHelpString(){
52         try {
53                 string helpString = "";
54                 helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
55                 helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, checkorient, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
56         helpString += "The sff parameter is used to provide the original sff file.\n";
57                 helpString += "The fastq parameter is used to provide the original fastq file.\n";
58         helpString += "The project parameter is used to provide your project file.\n";
59         helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by. It is required and must contain barcodes and primers, or you must provide a file option. \n";
60         helpString += "The mimark parameter is used to provide your mimarks file.  You can create the template for this file using the get.mimarkspackage command.\n";
61                 helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
62         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
63                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
64                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
65         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
66                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
67         helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";
68         helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
69         helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
70         helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
71         helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated.  \n";
72         helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
73         helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
74         helpString += "The datatype parameter is used to specify datatype. Default=METAGENOME. Choices are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
75                 helpString += "sra(sff=sff=GHL4YHV01.sff, GHL4YHV01.oligos, project=test.project, mimark=MIMarksData.txt)\n";
76                 return helpString;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "SRACommand", "getHelpString");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 string SRACommand::getOutputPattern(string type) {
85     try {
86         string pattern = "";
87         
88         if (type == "xml") {  pattern = "[filename],xml"; }
89         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
90         
91         return pattern;
92     }
93     catch(exception& e) {
94         m->errorOut(e, "SRACommand", "getOutputPattern");
95         exit(1);
96     }
97 }
98 //**********************************************************************************************************************
99 SRACommand::SRACommand(){
100         try {
101                 abort = true; calledHelp = true;
102                 setParameters();
103         vector<string> tempOutNames;
104                 outputTypes["xml"] = tempOutNames;
105         }
106         catch(exception& e) {
107                 m->errorOut(e, "SRACommand", "SRACommand");
108                 exit(1);
109         }
110 }
111 //**********************************************************************************************************************
112 SRACommand::SRACommand(string option)  {
113         try {
114                 abort = false; calledHelp = false;
115         libLayout = "single"; //controlled vocab
116                 
117                 //allow user to run help
118                 if(option == "help") { help(); abort = true; calledHelp = true; }
119                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
120                 
121                 else {
122                         //valid paramters for this command
123                         vector<string> myArray = setParameters();
124                         
125                         OptionParser parser(option);
126                         map<string,string> parameters = parser.getParameters();
127                         
128                         ValidParameters validParameter;
129                         map<string,string>::iterator it;
130                         //check to make sure all parameters are valid for command
131                         for (it = parameters.begin(); it != parameters.end(); it++) {
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
133                         }
134                         
135             vector<string> tempOutNames;
136             outputTypes["xml"] = tempOutNames;
137                         
138                         //if the user changes the input directory command factory will send this info to us in the output parameter
139                         inputDir = validParameter.validFile(parameters, "inputdir", false);
140                         if (inputDir == "not found"){   inputDir = "";          }
141                         else {
142             
143                 string path;
144                                 it = parameters.find("sff");
145                                 //user has given a template file
146                                 if(it != parameters.end()){
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["sff"] = inputDir + it->second;              }
150                                 }
151                                 
152                                 it = parameters.find("fastq");
153                                 //user has given a template file
154                                 if(it != parameters.end()){
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
158                                 }
159                 
160                 it = parameters.find("file");
161                                 //user has given a template file
162                                 if(it != parameters.end()){
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
166                                 }
167                 
168                 it = parameters.find("oligos");
169                                 //user has given a template file
170                                 if(it != parameters.end()){
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
174                                 }
175                 
176                 it = parameters.find("project");
177                                 //user has given a template file
178                                 if(it != parameters.end()){
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["project"] = inputDir + it->second;          }
182                                 }
183                 
184                 it = parameters.find("mimark");
185                                 //user has given a template file
186                                 if(it != parameters.end()){
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["mimark"] = inputDir + it->second;           }
190                                 }
191             }
192             
193                         //check for parameters
194             fastqfile = validParameter.validFile(parameters, "fastq", true);
195                         if (fastqfile == "not open") { fastqfile = "";  abort = true; }
196                         else if (fastqfile == "not found") { fastqfile = ""; }
197                         
198                         sfffile = validParameter.validFile(parameters, "sff", true);
199                         if (sfffile == "not open") {  sfffile = "";  abort = true; }
200                         else if (sfffile == "not found") { sfffile = ""; }
201             
202             file = validParameter.validFile(parameters, "file", true);
203                         if (file == "not open") {  file = "";  abort = true; }
204                         else if (file == "not found") { file = ""; }
205             
206             oligosfile = validParameter.validFile(parameters, "oligos", true);
207                         if (oligosfile == "not found")      {  oligosfile = "";     }
208             else if(oligosfile == "not open")   {       abort = true;           }
209                         else {  m->setOligosFile(oligosfile); }
210             
211             contactfile = validParameter.validFile(parameters, "project", true);
212                         if (contactfile == "not found")      {  contactfile = ""; m->mothurOut("[ERROR]: You must provide a project file before you can use the sra command."); m->mothurOutEndLine(); abort = true;    }
213                         else if(contactfile == "not open")      {       abort = true;           }
214             
215             mimarksfile = validParameter.validFile(parameters, "mimark", true);
216                         if (mimarksfile == "not found")      {  mimarksfile = ""; m->mothurOut("[ERROR]: You must provide a mimark file before you can use the sra command. You can create a template for this file using the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true;       }
217                         else if(mimarksfile == "not open")      {       abort = true;           }
218             
219             file = validParameter.validFile(parameters, "file", true);
220                         if (file == "not open") {  file = "";  abort = true; }
221                         else if (file == "not found") { file = ""; }
222                         
223             if ((file == "") && (oligosfile == "")) {
224                 m->mothurOut("[ERROR]: You must provide an oligos file or file with oligos files in them before you can use the sra command."); m->mothurOutEndLine(); abort = true;
225             }
226             
227                         if ((fastqfile == "") && (file == "") && (sfffile == "")) {
228                 m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
229             }
230             
231             //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
232                         platform = validParameter.validFile(parameters, "platform", false);         if (platform == "not found") { platform = "_LS454"; }
233                         if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
234                                  
235             if (!abort) { //don't check instrument model is platform is bad
236                 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
237                 instrumentModel = validParameter.validFile(parameters, "instrument", false);         if (instrumentModel == "not found") { instrumentModel = "454_GS"; }
238                 if (!checkCasesInstrumentModels(instrumentModel)) { abort = true; } //error message in function
239             }
240             //turn _ to spaces mothur's work around
241             for (int i = 0; i < instrumentModel.length(); i++) { if (instrumentModel[i] == '_') { instrumentModel[i] = ' '; } }
242             
243             libStrategy = validParameter.validFile(parameters, "libstrategy", false);         if (libStrategy == "not found") { libStrategy = "AMPLICON"; }
244             if (!checkCasesLibStrategy(libStrategy)) { abort = true; } //error message in function
245
246             //turn _ to spaces mothur's work around
247             for (int i = 0; i < libStrategy.length(); i++) { if (libStrategy[i] == '_') { libStrategy[i] = ' '; }  }
248             
249             libSource = validParameter.validFile(parameters, "libsource", false);         if (libSource == "not found") { libSource = "METAGENOMIC"; }
250             if (!checkCasesLibSource(libSource)) { abort = true; } //error message in function
251             
252             //turn _ to spaces mothur's work around
253             for (int i = 0; i < libSource.length(); i++) { if (libSource[i] == '_') { libSource[i] = ' '; }  }
254             
255             libSelection = validParameter.validFile(parameters, "libselection", false);         if (libSelection == "not found") { libSelection = "PCR"; }
256             if (!checkCasesLibSelection(libSelection)) { abort = true; } //error message in function
257             
258             //turn _ to spaces mothur's work around
259             for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; }  }
260             
261             dataType = validParameter.validFile(parameters, "datatype", false);         if (dataType == "not found") { dataType = "METAGENOME"; }
262             if (!checkCasesDataType(dataType)) { abort = true; } //error message in function
263             
264             //turn _ to spaces mothur's work around
265             for (int i = 0; i < dataType.length(); i++) { if (dataType[i] == '_') { dataType[i] = ' '; }  }
266             
267             orientation = validParameter.validFile(parameters, "orientation", false);         if (orientation == "not found") { orientation = "forward"; }
268             
269             if ((orientation == "forward") || (orientation == "reverse")) {  }
270             else {  m->mothurOut("[ERROR]: " + orientation + " is not a valid orientation option. Choices are: forward and reverse.\n"); m->mothurOutEndLine(); abort = true; }
271
272             
273             string temp = validParameter.validFile(parameters, "bdiffs", false);                if (temp == "not found"){       temp = "0";             }
274                         m->mothurConvert(temp, bdiffs);
275                         
276                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
277                         m->mothurConvert(temp, pdiffs);
278                         
279             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
280                         m->mothurConvert(temp, ldiffs);
281             
282             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
283                         m->mothurConvert(temp, sdiffs);
284                         
285                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
286                         m->mothurConvert(temp, tdiffs);
287                         
288                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
289             
290             checkorient = validParameter.validFile(parameters, "checkorient", false);           if (temp == "not found") { temp = "F"; }
291                                 
292                 }
293                 
294         }
295         catch(exception& e) {
296                 m->errorOut(e, "SRACommand", "SRACommand");
297                 exit(1);
298         }
299 }
300 //**********************************************************************************************************************
301 int SRACommand::execute(){
302         try {
303                 
304                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
305         
306         readContactFile();
307         readMIMarksFile();
308         if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
309         
310         if (m->control_pressed) { return 0; }
311         
312         //parse files
313         map<string, vector<string> > filesBySample;
314         isSFF = false;
315         
316         if (file != "")             {       readFile(filesBySample);        }
317         else if (sfffile != "")     {       parseSffFile(filesBySample);    }
318         else if (fastqfile != "")   {       parseFastqFile(filesBySample);  }
319         
320         for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {  Groups.push_back(*it); }
321         
322         sanityCheckMiMarksGroups();
323         
324         //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
325         checkGroups(filesBySample);
326         
327         //create xml file
328         string thisOutputDir = outputDir;
329         if (outputDir == "") {  thisOutputDir += m->hasPath(inputfile);  }
330                 map<string, string> variables;
331         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputfile));
332         string outputFileName = getOutputFileName("xml", variables);
333         outputNames.push_back(outputFileName); outputTypes["xml"].push_back(outputFileName);
334         ofstream out;
335         m->openOutputFile(outputFileName, out);
336         
337         //contacts portion
338         ////////////////////////////////////////////////////////
339         out << "<Submission>\n";
340         out << "\t<Description>\n";
341         out << "\t\t<Comment> New Submission. Generated by mothur version " + m->getVersion() + " </Comment> \n";
342         out << "\t\t<Submitter user_name=\"" + submissionName + "\"/>\n";
343         out << "\t\t<Organization type=\"" + centerType + "\">\n";
344         out << "\t\t<Name>" + centerName + "</Name>\n";
345         out << "\t\t<Contact> email=\"" + email + "\">\n";
346         out << "\t\t\t<Name>\n";
347         out << "\t\t\t\t<First>" + firstName + "</First>\n";
348         out << "\t\t\t\t<Last>" + firstName + "</Last>\n";
349         out << "\t\t\t</Name>\n";
350         out << "\t\t</Contact>\n";
351         out << "\t\t</Organization>\n";
352         out << "\t</Description>\n";
353         ////////////////////////////////////////////////////////
354         
355         //bioproject
356         ////////////////////////////////////////////////////////
357         out << "\t<Action>\n";
358         out << "\t\t<AddData target_db=\"BioProject\">\n";
359         out << "\t\t\t<Data content_type=\"XML\">\n";
360         out << "\t\t\t\t<XmlContent>\n";
361         out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
362         out << "\t\t\t\t\t\t<ProjectID>\n";
363         out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
364         out << "\t\t\t\t\t\t</ProjectID>\n";
365         out << "\t\t\t\t\t\t<Descriptor>\n";
366         out << "\t\t\t\t\t\t\t<Title>" + projectTitle + " </Title> \n";
367         out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
368         if (website != "") {
369             out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
370             out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
371             out << "\t\t\t\t\t\t\t</ExternalLink>\n";
372         }
373         out << "\t\t\t\t\t\t</Descriptor>\n";
374         out << "\t\t\t\t\t\t<ProjectType>\n";
375         out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eEnvironment\">\n";
376         out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
377         out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n";
378         out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
379         out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
380         out << "\t\t\t\t\t\t</ProjectType>\n";
381         out << "\t\t\t\t\t</Project>\n";
382         out << "\t\t\t\t</XmlContent>\n";
383         out << "\t\t\t</Data>\n";
384         out << "\t\t\t<Identifier>\n";
385         out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
386         out << "\t\t\t</Identifier>\n";
387         out << "\t\t</AddData>\n";
388         out << "\t</Action>\n";
389         ////////////////////////////////////////////////////////
390         
391         //bioSample
392         ////////////////////////////////////////////////////////
393         for (int i = 0; i < Groups.size(); i++) {
394             
395             string barcodeForThisSample = Group2Barcode[Groups[i]][0];
396             
397             if (m->control_pressed) { break; }
398             out << "\t<Action>\n";
399             out << "\t\t<AddData target_db=\"BioSample\">\n";
400             out << "\t\t\t<Data content_type=\"XML\">\n";
401             out << "\t\t\t\t<XmlContent>\n";
402             out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
403             out << "\t\t\t\t\t\t<SampleId>\n";
404             out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID> \n";
405             out << "\t\t\t\t\t\t</SampleId>\n";
406             out << "\t\t\t\t\t\t<Organism>\n";
407             string organismName = "metagenome";
408             map<string, string>::iterator itOrganism = Group2Organism.find(Groups[i]);
409             if (itOrganism != Group2Organism.end()) { organismName = itOrganism->second; } //user supplied acceptable organism, so use it.
410             out << "\t\t\t\t\t\t\t<OrganismName>" + organismName + " </OrganismName> \n";
411             out << "\t\t\t\t\t\t</Organism>\n";
412             out << "\t\t\t\t\t\t<Package>" + packageType + "</Package>n";
413             out << "\t\t\t\t\t\t<Attributes>n";
414             //add biosample required attributes
415             map<string, map<string, string> >:: iterator it = mimarks.find(Groups[i]);
416             if (it != mimarks.end()) {
417                 map<string, string> categories = it->second;
418                 for (map<string, string>:: iterator it2 = categories.begin(); it2 != categories.end(); it2++) {
419                     if (m->control_pressed) { break; }
420                     out << "\t\t\t\t\t\t\t<Attribute attribute_name=\"" + it2->first + "\">\"" + it2->second + "\"</Attribute>\n";
421                 }
422             }
423             out << "\t\t\t\t\t\t</Attributes>n";
424             out << "\t\t\t\t\t</BioSample>\n";
425             out << "\t\t\t\t</XmlContent>\n";
426             out << "\t\t\t</Data>\n";
427             out << "\t\t\t<Identifier>\n";
428             out << "\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
429             out << "\t\t\t</Identifier>\n";
430             out << "\t\t</AddData>\n";
431             out << "\t</Action>\n";
432         }
433         
434         //File objects
435         ////////////////////////////////////////////////////////
436         for (int i = 0; i < Groups.size(); i++) {
437             
438             vector<string> thisGroupsFiles = filesBySample[Groups[i]];
439             string barcodeForThisSample = Group2Barcode[Groups[i]][0];
440             
441             for (int j = 0; j < thisGroupsFiles.size(); j++) {
442                 string libId = thisGroupsFiles[j] + "." + barcodeForThisSample;
443                 
444                 if (m->control_pressed) { break; }
445                 out << "\t<Action>\n";
446                 out << "\t\t<AddFiles target_db=\"SRA\">\n";
447                 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
448                     vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
449                     libId = pieces[0] + barcodeForThisSample;
450                     out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
451                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
452                     out << "\t\t\t</File>\n";
453                     vector<string> thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]][0], thisBarcodes, '.');
454                     string forwardBarcode = thisBarcodes[0];
455                     string reverseBarcode = thisBarcodes[1];
456                     vector<string> thisPrimers; m->splitAtChar(Group2Primer[Groups[i]][0], thisPrimers, '.');
457                     string forwardPrimer = thisPrimers[0];
458                     string reversePrimer = thisPrimers[1];
459                     //attributes
460                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
461                     out << "\t\t\t<Attribute name=\"BarCode\">" + forwardBarcode + "</Attribute>\n";
462                     out << "\t\t\t<Attribute name=\"primer\">" + forwardPrimer + "</Attribute>\n";
463                     out << "\t\t\t<Attribute name=\"read_type\">forward</Attribute>\n";
464                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
465                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
466                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
467                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
468                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
469                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
470                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
471
472                     out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
473                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
474                     out << "\t\t\t</File>\n";
475                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
476                     out << "\t\t\t<Attribute name=\"BarCode\">" + reverseBarcode + "</Attribute>\n";
477                     out << "\t\t\t<Attribute name=\"primer\">" + reversePrimer + "</Attribute>\n";
478                     out << "\t\t\t<Attribute name=\"read_type\">reverse</Attribute>\n";
479                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
480                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
481                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
482                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
483                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
484                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
485                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
486
487                 }else { //single
488                     out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
489                     out << "\t\t\t\t<DataType>generic-data</DataType> \n";
490                     out << "\t\t\t</File>\n";
491                     //attributes
492                     out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
493                     out << "\t\t\t<Attribute name=\"BarCode\">" + Group2Barcode[Groups[i]][0] + "</Attribute>\n";
494                     out << "\t\t\t<Attribute name=\"primer\">" + Group2Primer[Groups[i]][0] + "</Attribute>\n";
495                     out << "\t\t\t<Attribute name=\"read_type\">" + orientation + "</Attribute>\n";
496                     out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
497                     out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
498                     out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
499                     out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
500                     out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
501                     out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
502                     out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
503
504                 }
505                 ///////////////////bioProject info
506                 out << "\t\t\t<AttributeRefId name=\"BioProject\">\n";
507                 out << "\t\t\t\t<RefId>\n";
508                 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
509                 out << "\t\t\t\t</RefId>\n";
510                 out << "\t\t\t</AttributeRefId>\n";
511                 //////////////////bioSample info
512                 out << "\t\t\t<AttributeRefId name=\"BioSample\">\n";
513                 out << "\t\t\t\t<RefId>\n";
514                 out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
515                 out << "\t\t\t\t</RefId>\n";
516                 out << "\t\t\t</AttributeRefId>\n";
517                 //libID
518                 out << "\t\t\t<Identifier>\n";
519                 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
520                     vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
521                     libId = pieces[0] + barcodeForThisSample;
522                 }
523                 out << "\t\t\t\t<LocalId>" + libId + " </LocalId>\n";
524                 out << "\t\t\t</Identifier>\n";
525                 out << "\t\t</AddFiles>\n";
526                 out << "\t</Action>\n";
527             }
528         }
529         out << "</Submission>\n";
530         out.close();
531         
532         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
533                 
534         //output files created by command
535                 m->mothurOutEndLine();
536                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
537                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
538                 m->mothurOutEndLine();
539         return 0;
540                 
541     }
542         catch(exception& e) {
543                 m->errorOut(e, "SRACommand", "SRACommand");
544                 exit(1);
545         }
546 }
547 //**********************************************************************************************************************
548 int SRACommand::readContactFile(){
549         try {
550         lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; website = ""; projectName = "";
551         projectTitle = ""; grantAgency = ""; grantId = ""; grantTitle = "";
552         
553         ifstream in;
554         m->openInputFile(contactfile, in);
555         
556         while(!in.eof()) {
557             
558             if (m->control_pressed) { break; }
559             
560             string key, value;
561             in >> key; m->gobble(in);
562             value = m->getline(in); m->gobble(in);
563             
564             for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
565             
566             if (key == "USERNAME")          {   submissionName = value; }
567             else if (key == "LAST")         {   lastName = value;       }
568             else if (key == "FIRST")        {   firstName = value;      }
569             else if (key == "EMAIL")        {   email = value;          }
570             else if (key == "CENTER")       {   centerName = value;     }
571             else if (key == "TYPE")         {
572                 centerType = value;
573                 for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
574                 if ((centerType == "consortium") || (centerType == "center") ||  (centerType == "institute") ||  (centerType == "lab")) {}
575                 else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option.  Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
576             }else if (key == "DESCRIPTION")     {   description = value;    }
577             else if (key == "WEBSITE")          {   website = value;        }
578             else if (key == "PROJECTNAME")      {   projectName = value;    }
579             else if (key == "PROJECTTITLE")     {   projectTitle = value;   }
580             else if (key == "GRANTID")          {   grantId = value;        }
581             else if (key == "GRANTTITLE")       {   grantTitle = value;     }
582             else if (key == "GRANTAGENCY")      {   grantAgency = value;    }
583         }
584         in.close();
585         
586         if (lastName == "") { m->mothurOut("[ERROR]: missing last name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
587         if (firstName == "") { m->mothurOut("[ERROR]: missing first name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
588         if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
589         if (email == "") { m->mothurOut("[ERROR]: missing email from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
590         if (centerName == "") { m->mothurOut("[ERROR]: missing center name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
591         if (centerType == "") { m->mothurOut("[ERROR]: missing center type from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
592         if (description == "") { m->mothurOut("[ERROR]: missing description from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
593         if (projectTitle == "") { m->mothurOut("[ERROR]: missing project title from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
594         if (projectName == "") { m->mothurOut("[ERROR]: missing project name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
595
596         return 0;
597     }
598         catch(exception& e) {
599                 m->errorOut(e, "SRACommand", "readContactFile");
600                 exit(1);
601         }
602 }
603 //**********************************************************************************************************************
604 //air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water
605 //all packages require: *sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon
606 //air: *altitude
607 //host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, plant_associated: *host
608 //microbial, sediment, soil: *depth     *elev
609 //water: *depth
610 int SRACommand::readMIMarksFile(){
611         try {
612         //acceptable organisms
613         vector<string> acceptableOrganisms;
614         bool organismError = false;
615         //ecological
616         acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("air metagenome"); acceptableOrganisms.push_back("anaerobic digester metagenome"); acceptableOrganisms.push_back("ant fungus garden metagenome"); acceptableOrganisms.push_back("aquatic metagenome"); acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("beach sand metagenome"); acceptableOrganisms.push_back("biofilm metagenome"); acceptableOrganisms.push_back("biofilter metagenome"); acceptableOrganisms.push_back("biogas fermenter metagenome"); acceptableOrganisms.push_back("bioreactor metagenome"); acceptableOrganisms.push_back("bioreactor sludge metagenome"); acceptableOrganisms.push_back("clinical metagenome"); acceptableOrganisms.push_back("coal metagenome"); acceptableOrganisms.push_back("compost metagenome"); acceptableOrganisms.push_back("dust metagenome"); acceptableOrganisms.push_back("fermentation metagenome"); acceptableOrganisms.push_back("food fermentation metagenome"); acceptableOrganisms.push_back("food metagenome"); acceptableOrganisms.push_back("freshwater metagenome"); acceptableOrganisms.push_back("freshwater sediment metagenome"); acceptableOrganisms.push_back("groundwater metagenome"); acceptableOrganisms.push_back("halite metagenome"); acceptableOrganisms.push_back("hot springs metagenome"); acceptableOrganisms.push_back("hydrocarbon metagenome"); acceptableOrganisms.push_back("hydrothermal vent metagenome"); acceptableOrganisms.push_back("hypersaline lake metagenome"); acceptableOrganisms.push_back("ice metagenome"); acceptableOrganisms.push_back("indoor metagenome"); acceptableOrganisms.push_back("industrial waste metagenome"); acceptableOrganisms.push_back("mangrove metagenome"); acceptableOrganisms.push_back("marine metagenome"); acceptableOrganisms.push_back("marine sediment metagenome"); acceptableOrganisms.push_back("microbial mat metagenome"); acceptableOrganisms.push_back("mine drainage metagenome"); acceptableOrganisms.push_back("mixed culture metagenome"); acceptableOrganisms.push_back("oil production facility metagenome"); acceptableOrganisms.push_back("paper pulp metagenome"); acceptableOrganisms.push_back("permafrost metagenome"); acceptableOrganisms.push_back("plastisphere metagenome"); acceptableOrganisms.push_back("power plant metagenome"); acceptableOrganisms.push_back("retting rhizosphere metagenome"); acceptableOrganisms.push_back("rock metagenome"); acceptableOrganisms.push_back("salt lake metagenome"); acceptableOrganisms.push_back("saltern metagenome"); acceptableOrganisms.push_back("sediment metagenome"); acceptableOrganisms.push_back("snow metagenome"); acceptableOrganisms.push_back("soil metagenome"); acceptableOrganisms.push_back("stromatolite metagenome"); acceptableOrganisms.push_back("terrestrial metagenome"); acceptableOrganisms.push_back("tomb wall metagenome"); acceptableOrganisms.push_back("wastewater metagenome"); acceptableOrganisms.push_back("wetland metagenome"); acceptableOrganisms.push_back("whale fall metagenome");
617         //oganismal
618         acceptableOrganisms.push_back("algae metagenome"); acceptableOrganisms.push_back("ant metagenome"); acceptableOrganisms.push_back("bat metagenome"); acceptableOrganisms.push_back("beetle metagenome"); acceptableOrganisms.push_back("bovine gut metagenome"); acceptableOrganisms.push_back("bovine metagenome"); acceptableOrganisms.push_back("chicken gut metagenome"); acceptableOrganisms.push_back("coral metagenome"); acceptableOrganisms.push_back("echinoderm metagenome"); acceptableOrganisms.push_back("endophyte metagenome"); acceptableOrganisms.push_back("epibiont metagenome"); acceptableOrganisms.push_back("fish metagenome"); acceptableOrganisms.push_back("fossil metagenome"); acceptableOrganisms.push_back("gill metagenome"); acceptableOrganisms.push_back("gut metagenome"); acceptableOrganisms.push_back("honeybee metagenome"); acceptableOrganisms.push_back("human gut metagenome"); acceptableOrganisms.push_back("human lung metagenome"); acceptableOrganisms.push_back("human metagenome"); acceptableOrganisms.push_back("human nasal/pharyngeal metagenome"); acceptableOrganisms.push_back("human oral metagenome"); acceptableOrganisms.push_back("human skin metagenome"); acceptableOrganisms.push_back("insect gut metagenome"); acceptableOrganisms.push_back("insect metagenome"); acceptableOrganisms.push_back("mollusc metagenome"); acceptableOrganisms.push_back("mosquito metagenome"); acceptableOrganisms.push_back("mouse gut metagenome"); acceptableOrganisms.push_back("mouse metagenome"); acceptableOrganisms.push_back("mouse skin metagenome"); acceptableOrganisms.push_back("nematode metagenome"); acceptableOrganisms.push_back("oral metagenome"); acceptableOrganisms.push_back("phyllosphere metagenome"); acceptableOrganisms.push_back("pig metagenome"); acceptableOrganisms.push_back("plant metagenome"); acceptableOrganisms.push_back("primate metagenome"); acceptableOrganisms.push_back("rat metagenome"); acceptableOrganisms.push_back("root metagenome"); acceptableOrganisms.push_back("sea squirt metagenome"); acceptableOrganisms.push_back("seed metagenome"); acceptableOrganisms.push_back("shoot metagenome"); acceptableOrganisms.push_back("skin metagenome"); acceptableOrganisms.push_back("snake metagenome"); acceptableOrganisms.push_back("sponge metagenome"); acceptableOrganisms.push_back("stomach metagenome"); acceptableOrganisms.push_back("symbiont metagenome"); acceptableOrganisms.push_back("termite gut metagenome"); acceptableOrganisms.push_back("termite metagenome"); acceptableOrganisms.push_back("upper respiratory tract metagenome"); acceptableOrganisms.push_back("urine metagenome"); acceptableOrganisms.push_back("viral metagenome"); acceptableOrganisms.push_back("wallaby gut metagenome"); acceptableOrganisms.push_back("wasp metagenome"); acceptableOrganisms.push_back("sythetic metagenome"); acceptableOrganisms.push_back("metagenome");
619         
620         vector<string> requiredFieldsForPackage;
621         requiredFieldsForPackage.push_back("sample_name"); requiredFieldsForPackage.push_back("organism");
622         requiredFieldsForPackage.push_back("collection_date"); requiredFieldsForPackage.push_back("biome");
623         requiredFieldsForPackage.push_back("feature"); requiredFieldsForPackage.push_back("material");
624         requiredFieldsForPackage.push_back("geo_loc_name"); requiredFieldsForPackage.push_back("lat_lon");
625         requiredFieldsForPackage.push_back("seq_methods"); requiredFieldsForPackage.push_back("title");
626         vector<string> chooseAtLeastOneForPackage;
627         
628         ifstream in;
629         m->openInputFile(mimarksfile, in);
630         
631         //read comments
632         string temp; packageType = "";
633         while(!in.eof()) {
634             
635             if (m->control_pressed) { break; }
636             temp = m->getline(in); m->gobble(in);
637             
638             if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
639             
640             if (temp[0] == '#') {
641                 int pos = temp.find("Environmental");
642                 if (pos != string::npos) {
643                     for (int i = pos+14; i < temp.length(); i++) {
644                         if (!isspace(temp[i])) { packageType += temp[i]; }
645                         else { i+= temp.length(); }
646                     }
647                 }
648             }
649             else{ break; } //hit headers line
650          }
651         
652         vector<string> headers; m->splitAtChar(temp, headers, '\t');
653         m->removeBlanks(headers);
654         //remove * from required's
655         for (int i = 0; i < headers.size(); i++) {
656             if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); }
657             if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); chooseAtLeastOneForPackage.push_back(headers[i]); }  //secondary condition
658             if (m->debug) { m->mothurOut("[DEBUG]: " + headers[i] + "\n"); }
659         }
660         
661         if (m->debug) {  m->mothurOut("[DEBUG]: packageType = '" + packageType + "'\n");   }
662         
663         //check to make sure package has all its required parts
664         //MIMARKS.specimen.water.3.0
665         if (packageType == "MIMARKS.specimen.air.3.0") {   requiredFieldsForPackage.push_back("altitude");  }
666         else if ((packageType == "MIMARKS.specimen.host-associated.3.0") || (packageType == "MIMARKS.specimen.human-associated.3.0") || (packageType == "MIMARKS.specimen.human-gut.3.0") || (packageType == "MIMARKS.specimen.human-oral.3.0") || (packageType == "MIMARKS.specimen.human-skin.3.0") || (packageType == "MIMARKS.specimen.human-vaginal.3.0") || (packageType == "MIMARKS.specimen.plant-associated.3.0")) {  requiredFieldsForPackage.push_back("host");  }
667         else if ((packageType == "MIMARKS.specimen.microbial.3.0") || (packageType == "MIMARKS.specimen.sediment.3.0") || (packageType == "soil")) {   requiredFieldsForPackage.push_back("depth");  requiredFieldsForPackage.push_back("elev"); }
668         else if (packageType == "MIMARKS.specimen.water.3.0") {   requiredFieldsForPackage.push_back("depth");  }
669         else if ((packageType == "MIMARKS.specimen.miscellaneous.3.0") || (packageType == "wastewater")) { }
670         else {
671             m->mothurOut("[ERROR]: unknown package " + packageType + ", please correct.\n"); m->control_pressed = true; in.close(); return 0;
672         }
673         
674         if (!m->isSubset(headers, requiredFieldsForPackage)){
675             string requiredFields = "";
676             for (int i = 0; i < requiredFieldsForPackage.size()-1; i++) { requiredFields += requiredFieldsForPackage[i] + ", "; } requiredFields += requiredFieldsForPackage[requiredFieldsForPackage.size()-1];
677             m->mothurOut("[ERROR]: missing required fields for package, please correct. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
678         }
679         
680         if (m->debug) {  m->mothurOut("[DEBUG]: chooseAtLeastOneForPackage.size() = " + toString(chooseAtLeastOneForPackage.size()) + "\n");   }
681         
682         if (!m->inUsersGroups(chooseAtLeastOneForPackage, headers)){ //returns true if any of the choose at least ones are in headers
683             string requiredFields = "";
684             for (int i = 0; i < chooseAtLeastOneForPackage.size()-1; i++) { requiredFields += chooseAtLeastOneForPackage[i] + ", "; cout << chooseAtLeastOneForPackage[i] << endl; }
685             if (chooseAtLeastOneForPackage.size() < 1) { requiredFields += chooseAtLeastOneForPackage[chooseAtLeastOneForPackage.size()-1]; }
686             m->mothurOut("[ERROR]: missing a choose at least one fields for the package, please correct. These are marked with '**'. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
687         }
688         
689         map<string, bool> allNA;  for (int i = 1; i < headers.size(); i++) {  allNA[headers[i]] = true; }
690         while(!in.eof()) {
691             
692             if (m->control_pressed) { break; }
693             
694             temp = m->getline(in);  m->gobble(in);
695             
696             if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
697             
698             string original = temp;
699             vector<string> linePieces; m->splitAtChar(temp, linePieces, '\t');
700             m->removeBlanks(linePieces);
701             
702             if (linePieces.size() != headers.size()) { m->mothurOut("[ERROR]: line: " + original + " contains " + toString(linePieces.size()) + " columns, but you have " + toString(headers.size()) + " column headers, please correct.\n"); m->control_pressed = true; }
703             else {
704                 map<string, map<string, string> >:: iterator it = mimarks.find(linePieces[0]);
705                 
706                 if (it == mimarks.end()) {
707                     map<string, string> categories;
708                     //start after *sample_name
709                     for (int i = 1; i < headers.size(); i++) {
710                         categories[headers[i]] = linePieces[i];
711                         //check the users inputs for appropriate organisms
712                         if (headers[i] == "organism") {
713                             if (!m->inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism
714                                 organismError = true;
715                                 m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to acceptable 'metagenome'. NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i];
716                             }
717                             Group2Organism[linePieces[0]] = linePieces[i];
718                         }
719                         if (linePieces[i] != "NA") {  allNA[headers[i]] = false;     }
720                     }
721                     
722                     //does this sample already match an existing sample?
723                     bool isOkaySample = true;
724                     for (map<string, map<string, string> >:: iterator it2 = mimarks.begin(); it2 != mimarks.end(); it2++) {
725                         if (m->control_pressed) { break; }
726                         bool allSame = true;
727                         for (int i = 1; i < headers.size(); i++) {
728                             if ((it2->second)[headers[i]] != categories[headers[i]]) { allSame = false; }
729                         }
730                         if (allSame) { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sample to " + it2->first + ". It has all the same attributes in the MIMarks file. Samples must have distinguishing features to be uploaded to the NCBI library, please correct.\n"); m->control_pressed = true; isOkaySample = false; }
731                     }
732                     if (isOkaySample) { mimarks[linePieces[0]] = categories; }
733                 }else {
734                     m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sampleName. Sample names must be unique, please correct.\n"); m->control_pressed = true;
735                 }
736             }
737         }
738         in.close();
739         
740         //add in values for "scrap" group
741         map<string, string> categories;
742         //start after *sample_name
743         for (int i = 1; i < headers.size(); i++) {
744             categories[headers[i]] = "NA";
745             if (headers[i] == "organism")       { categories[headers[i]] = "metagenome"; }
746             if (headers[i] == "seq_methods")    { categories[headers[i]] = "these sequences were scrapped"; }
747             if (headers[i] == "title")          { categories[headers[i]] = "these sequences were scrapped"; }
748         }
749         mimarks["scrap"] = categories;
750         Group2Organism["scrap"] = "metagenome";
751         
752         if (organismError) {
753             string organismTypes = "";
754             for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; }
755             organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1];
756             m->mothurOut("\n[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n\n\n");
757         }
758         
759         return 0;
760     }
761         catch(exception& e) {
762                 m->errorOut(e, "SRACommand", "readMIMarksFile");
763                 exit(1);
764         }
765 }
766
767 //**********************************************************************************************************************
768 // going to have to rework this to allow for other options --
769 /*
770  file option 1
771  
772  sfffile1   oligosfile1
773  sfffile2   oligosfile2
774  ...
775  
776  file option 2
777  
778  fastqfile1 oligosfile1
779  fastqfile2 oligosfile2
780  ...
781  
782  file option 3
783  
784  fastqfile  fastqfile   group
785  fastqfile  fastqfile   group
786  fastqfile  fastqfile   group
787  ...
788  
789 */
790
791 int SRACommand::readFile(map<string, vector<string> >& files){
792         try {
793         //vector<string> theseFiles;
794         inputfile = file;
795         files.clear();
796         
797         ifstream in;
798         m->openInputFile(file, in);
799         
800         while(!in.eof()) {
801             
802             if (m->control_pressed) { return 0; }
803             
804             string line = m->getline(in);  m->gobble(in);
805             vector<string> pieces = m->splitWhiteSpace(line);
806             
807             string group = "";
808             string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
809             if (pieces.size() == 2) {
810                 thisFileName1 = pieces[0];
811                 thisFileName2 = pieces[1];
812             }else if (pieces.size() == 3) {
813                 thisFileName1 = pieces[1];
814                 thisFileName2 = pieces[2];
815                 string group = pieces[0];
816                 libLayout = "paired";
817             }else {
818                 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
819             }
820             
821             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2  + ".\n"); }
822             
823             if (inputDir != "") {
824                 string path = m->hasPath(thisFileName1);
825                 if (path == "") {  thisFileName1 = inputDir + thisFileName1;  }
826                 
827                 path = m->hasPath(thisFileName2);
828                 if (path == "") {  thisFileName2 = inputDir + thisFileName2;  }
829             }
830             
831             //check to make sure both are able to be opened
832             ifstream in2;
833             int openForward = m->openInputFile(thisFileName1, in2, "noerror");
834             
835             //if you can't open it, try default location
836             if (openForward == 1) {
837                 
838                 if (m->getDefaultPath() != "") { //default path is set
839                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
840                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
841                     ifstream in3;
842                     openForward = m->openInputFile(tryPath, in3, "noerror");
843                     in3.close();
844                     thisFileName1 = tryPath;
845                 }
846             }
847             
848             //if you can't open it, try output location
849             if (openForward == 1) {
850                 if (m->getOutputDir() != "") { //default path is set
851                     string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
852                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
853                     ifstream in4;
854                     openForward = m->openInputFile(tryPath, in4, "noerror");
855                     thisFileName1 = tryPath;
856                     in4.close();
857                 }
858             }
859             
860             if (openForward == 1) { //can't find it
861                 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
862             }else{  in2.close();  }
863             
864             int openReverse = 1;
865             
866             ifstream in3;
867             openReverse = m->openInputFile(thisFileName2, in3, "noerror");
868             
869             //if you can't open it, try default location
870             if (openReverse == 1) {
871                 if (m->getDefaultPath() != "") { //default path is set
872                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
873                     m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
874                     ifstream in3;
875                     openReverse = m->openInputFile(tryPath, in3, "noerror");
876                     in3.close();
877                     thisFileName2 = tryPath;
878                 }
879             }
880             
881             //if you can't open it, try output location
882             if (openReverse == 1) {
883                 if (m->getOutputDir() != "") { //default path is set
884                     string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
885                     m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
886                     ifstream in4;
887                     openReverse = m->openInputFile(tryPath, in4, "noerror");
888                     thisFileName2 = tryPath;
889                     in4.close();
890                 }
891             }
892             
893             if (openReverse == 1) { //can't find it
894                 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
895             }else{  in3.close();  }
896            
897             
898             if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
899                 //process pair
900                 int pos = thisFileName1.find(".sff");
901                 if (pos != string::npos) {//these files are sff files
902                     isSFF = true;
903                     sfffile = thisFileName1; oligosfile = thisFileName2;
904                     if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
905                     readOligos();
906                     if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
907                     parseSffFile(files);
908                     if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + sfffile + "\n"); }
909                 }else{
910                     isSFF = false;
911                     fastqfile = thisFileName1; oligosfile = thisFileName2;
912                     if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
913                     readOligos();
914                     if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
915                     parseFastqFile(files);
916                     if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + fastqfile + "\n"); }
917                 }
918                 
919             }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
920                 map<string, vector<string> >::iterator it = files.find(group);
921                 if (it == files.end()) {
922                     vector<string> temp; temp.push_back(thisFileName1 + " " + thisFileName2); files[group] = temp;
923                 }else {
924                     files[group].push_back(thisFileName1 + " " + thisFileName2);
925                 }
926             }
927         }
928         in.close();
929     
930         inputfile = file;
931         
932         return 0;
933     }
934         catch(exception& e) {
935                 m->errorOut(e, "SRACommand", "readFile");
936                 exit(1);
937         }
938 }
939 //**********************************************************************************************************************
940 int SRACommand::parseSffFile(map<string, vector<string> >& files){
941         try {
942         vector<string> theseFiles;
943         inputfile = sfffile;
944         libLayout = "single"; //controlled vocab
945         
946         isSFF = true;
947         //run sffinfo to parse sff file into individual sampled sff files
948         string commandString = "sff=" + sfffile;
949         
950         commandString += ", oligos=" + oligosfile;
951         //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
952         if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
953         if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
954         if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
955         if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
956         if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
957         if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; }
958         
959         m->mothurOutEndLine();
960         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
961         m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
962         m->mothurCalling = true;
963         
964         Command* sffinfoCommand = new SffInfoCommand(commandString);
965         sffinfoCommand->execute();
966         
967         map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
968         map<string, vector<string> >::iterator it = filenames.find("sff");
969         if (it != filenames.end()) { theseFiles = it->second; }
970         else { m->control_pressed = true; } // error in sffinfo
971         
972         delete sffinfoCommand;
973         m->mothurCalling = false;
974         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
975         
976         mapGroupToFile(files, theseFiles);
977         
978         return 0;
979     }
980         catch(exception& e) {
981                 m->errorOut(e, "SRACommand", "readFile");
982                 exit(1);
983         }
984 }
985
986 //**********************************************************************************************************************
987 int SRACommand::parseFastqFile(map<string, vector<string> >& files){
988         try {
989         vector<string> theseFiles;
990         inputfile = fastqfile;
991         libLayout = "single"; //controlled vocab
992         
993         //run sffinfo to parse sff file into individual sampled sff files
994         string commandString = "fastq=" + fastqfile;
995         
996         commandString += ", oligos=" + oligosfile;
997         //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
998         if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
999         if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
1000         if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
1001         if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
1002         if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
1003         if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; }
1004        
1005         m->mothurOutEndLine();
1006         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1007         m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
1008         m->mothurCalling = true;
1009         
1010         Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
1011         fastqinfoCommand->execute();
1012         
1013         map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
1014         map<string, vector<string> >::iterator it = filenames.find("fastq");
1015         if (it != filenames.end()) { theseFiles = it->second; }
1016         else { m->control_pressed = true; } // error in sffinfo
1017         
1018         delete fastqinfoCommand;
1019         m->mothurCalling = false;
1020         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
1021         
1022         mapGroupToFile(files, theseFiles);
1023         
1024         return 0;
1025     }
1026         catch(exception& e) {
1027                 m->errorOut(e, "SRACommand", "readFile");
1028                 exit(1);
1029         }
1030 }
1031 //***************************************************************************************************************
1032 //maps group to file
1033 int SRACommand::mapGroupToFile(map<string, vector<string> >& files, vector<string> theseFiles){
1034         try {
1035         
1036         for (int i = 0; i < Groups.size(); i++) {
1037             
1038             set<int> matches;
1039             for (int j = 0; j < theseFiles.size(); j++) {
1040                 int pos = theseFiles[j].find(Groups[i]);
1041                 if (pos != string::npos) { //you have a potential match, make sure you dont have a case of partial name
1042                     if (theseFiles[j][pos+Groups[i].length()] == '.') { //final.soil.sff vs final.soil2.sff both would match soil.
1043                         matches.insert(i);
1044                     }
1045                 }
1046             }
1047             
1048             if(matches.size() == 1) {
1049                 map<string, vector<string> >::iterator it = files.find(Groups[i]);
1050                 if (it == files.end()) {
1051                     vector<string> temp; temp.push_back(theseFiles[*matches.begin()]); files[Groups[i]] = temp;
1052                 }else {
1053                     files[Groups[i]].push_back(theseFiles[*matches.begin()]);
1054                 }
1055             }
1056         }
1057         return 0;
1058     }
1059         catch(exception& e) {
1060                 m->errorOut(e, "SRACommand", "checkGroups");
1061                 exit(1);
1062         }
1063 }
1064
1065 //***************************************************************************************************************
1066 //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
1067 int SRACommand::checkGroups(map<string, vector<string> >& files){
1068         try {
1069         vector<string> newGroups;
1070         for (int i = 0; i < Groups.size(); i++) {
1071             
1072             map<string, vector<string> >::iterator it = files.find(Groups[i]);
1073              //no files for this group, remove it
1074             if (it == files.end()) { }
1075             else { newGroups.push_back(Groups[i]); }
1076         }
1077         
1078         Groups = newGroups;
1079         
1080         return 0;
1081     }
1082         catch(exception& e) {
1083                 m->errorOut(e, "SRACommand", "checkGroups");
1084                 exit(1);
1085         }
1086 }
1087 //***************************************************************************************************************
1088 int SRACommand::readOligos(){
1089         try {
1090                 Oligos oligos(oligosfile);
1091         
1092         if (m->control_pressed) { return false; } //error in reading oligos
1093         
1094         if (oligos.hasPairedBarcodes())     {   pairedOligos = true;    }
1095         else                                {  pairedOligos = false;    }
1096         
1097         set<string> uniqueNames; //used to cleanup outputFileNames
1098         if (pairedOligos) {
1099             map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
1100             map<int, oligosPair> primers = oligos.getPairedPrimers();
1101             for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1102                 for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1103                     
1104                     string primerName = oligos.getPrimerName(itPrimer->first);
1105                     string barcodeName = oligos.getBarcodeName(itBar->first);
1106                     
1107                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1108                     else if ((primerName == "") && (barcodeName == "")) { } //do nothing
1109                     else {
1110                         string comboGroupName = "";
1111                         string fastaFileName = "";
1112                         string qualFileName = "";
1113                         string nameFileName = "";
1114                         string countFileName = "";
1115                         
1116                         if(primerName == ""){
1117                             comboGroupName = barcodeName;
1118                         }else{
1119                             if(barcodeName == ""){
1120                                 comboGroupName = primerName;
1121                             }
1122                             else{
1123                                 comboGroupName = barcodeName + "." + primerName;
1124                             }
1125                         }
1126                         uniqueNames.insert(comboGroupName);
1127                         
1128                         map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
1129                         if (itGroup2Barcode == Group2Barcode.end()) {
1130                             vector<string> tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse);
1131                             Group2Barcode[comboGroupName] = tempBarcodes;
1132                         }else {
1133                             Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse);
1134                         }
1135                         
1136                         itGroup2Barcode = Group2Primer.find(comboGroupName);
1137                         if (itGroup2Barcode == Group2Primer.end()) {
1138                             vector<string> tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
1139                             Group2Primer[comboGroupName] = tempPrimers;
1140                         }else {
1141                             Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
1142                         }
1143                     }
1144                 }
1145             }
1146         }else {
1147             map<string, int> barcodes = oligos.getBarcodes() ;
1148             map<string, int> primers = oligos.getPrimers();
1149             for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1150                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1151                     
1152                     string primerName = oligos.getPrimerName(itPrimer->second);
1153                     string barcodeName = oligos.getBarcodeName(itBar->second);
1154                     
1155                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1156                     else if ((primerName == "") && (barcodeName == "")) { } //do nothing
1157                     else {
1158                         string comboGroupName = "";
1159                         string fastaFileName = "";
1160                         string qualFileName = "";
1161                         string nameFileName = "";
1162                         string countFileName = "";
1163                         
1164                         if(primerName == ""){
1165                             comboGroupName = barcodeName;
1166                         }else{
1167                             if(barcodeName == ""){
1168                                 comboGroupName = primerName;
1169                             }
1170                             else{
1171                                 comboGroupName = barcodeName + "." + primerName;
1172                             }
1173                         }
1174                         uniqueNames.insert(comboGroupName);
1175                         
1176                         map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
1177                         if (itGroup2Barcode == Group2Barcode.end()) {
1178                             vector<string> tempBarcodes; tempBarcodes.push_back(itBar->first);
1179                             Group2Barcode[comboGroupName] = tempBarcodes;
1180                         }else {
1181                             Group2Barcode[comboGroupName].push_back(itBar->first);
1182                         }
1183                         
1184                         itGroup2Barcode = Group2Primer.find(comboGroupName);
1185                         if (itGroup2Barcode == Group2Primer.end()) {
1186                             vector<string> tempPrimers; tempPrimers.push_back(itPrimer->first);
1187                             Group2Primer[comboGroupName] = tempPrimers;
1188                         }else {
1189                             Group2Primer[comboGroupName].push_back(itPrimer->first);
1190                         }
1191                     }
1192                 }
1193             }
1194         }
1195         
1196         if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
1197         
1198                 return true;
1199                 
1200         }
1201         catch(exception& e) {
1202                 m->errorOut(e, "SRACommand", "readOligos");
1203                 exit(1);
1204         }
1205 }
1206 //********************************************************************/
1207 string SRACommand::reverseOligo(string oligo){
1208         try {
1209         string reverse = "";
1210         
1211         for(int i=oligo.length()-1;i>=0;i--){
1212             
1213             if(oligo[i] == 'A')         {       reverse += 'T'; }
1214             else if(oligo[i] == 'T'){   reverse += 'A'; }
1215             else if(oligo[i] == 'U'){   reverse += 'A'; }
1216             
1217             else if(oligo[i] == 'G'){   reverse += 'C'; }
1218             else if(oligo[i] == 'C'){   reverse += 'G'; }
1219             
1220             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1221             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1222             
1223             else if(oligo[i] == 'M'){   reverse += 'K'; }
1224             else if(oligo[i] == 'K'){   reverse += 'M'; }
1225             
1226             else if(oligo[i] == 'W'){   reverse += 'W'; }
1227             else if(oligo[i] == 'S'){   reverse += 'S'; }
1228             
1229             else if(oligo[i] == 'B'){   reverse += 'V'; }
1230             else if(oligo[i] == 'V'){   reverse += 'B'; }
1231             
1232             else if(oligo[i] == 'D'){   reverse += 'H'; }
1233             else if(oligo[i] == 'H'){   reverse += 'D'; }
1234             
1235             else                                                {       reverse += 'N'; }
1236         }
1237         
1238         
1239         return reverse;
1240     }
1241         catch(exception& e) {
1242                 m->errorOut(e, "SRACommand", "reverseOligo");
1243                 exit(1);
1244         }
1245 }
1246 //********************************************************************/
1247 //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1248 bool SRACommand::checkCasesPlatforms(string& platform){
1249         try {
1250         string original = platform;
1251         bool isOkay = true;
1252         
1253         //remove users possible case errors
1254         for (int i = 0; i < platform.size(); i++) { platform[i] = toupper(platform[i]); }
1255         
1256         //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1257         
1258             if ((platform == "_LS454") || (platform == "ILLUMINA") || (platform == "ION_TORRENT") || (platform == "PACBIO_SMRT") || (platform == "454")) { }
1259             else { isOkay = false; }
1260         
1261             if (isOkay) {
1262                 if (platform == "454")   {  platform = "_LS454"; }
1263             }else {
1264                 m->mothurOut("[ERROR]: " + original + " is not a valid platform option.  Valid platform options are _LS454, ILLUMINA-ION, TORRENT or PACBIO_SMRT."); m->mothurOutEndLine(); abort = true;
1265             }
1266             
1267             return isOkay;
1268     }
1269         catch(exception& e) {
1270                 m->errorOut(e, "SRACommand", "checkCasesPlatforms");
1271                 exit(1);
1272         }
1273 }
1274 //********************************************************************/
1275 //454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified
1276 bool SRACommand::checkCasesInstrumentModels(string& instrumentModel){
1277         try {
1278         string original = instrumentModel;
1279         bool isOkay = true;
1280         
1281         //remove users possible case errors
1282         for (int i = 0; i < instrumentModel.size(); i++) { instrumentModel[i] = toupper(instrumentModel[i]); }
1283         
1284         //_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
1285         if (platform == "_LS454") { //instrument model options are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-unspecified
1286             if ((instrumentModel == "454_GS") || (instrumentModel == "454_GS_20") || (instrumentModel == "454_GS_FLX") || (instrumentModel == "454_GS_FLX_TITANIUM") || (instrumentModel == "454_GS_JUNIOR") || (instrumentModel == "UNSPECIFIED")) { }
1287             else { isOkay = false; }
1288             if (isOkay) {
1289                 if (instrumentModel == "454_GS_FLX_TITANIUM")   {  instrumentModel = "454_GS_FLX_Titanium"; }
1290                 if (instrumentModel == "454_GS_JUNIOR")         {  instrumentModel = "454_GS_Junior";       }
1291                 if (instrumentModel == "UNSPECIFIED")           {  instrumentModel = "unspecified";         }
1292             }else {
1293                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are 454_GS, 454_GS_20, 454_GS_FLX, 454_GS_FLX_Titanium, 454_GS_Junior or unspecified."); m->mothurOutEndLine(); abort = true;
1294             }
1295             
1296         }else if (platform == "ILLUMINA") { //instrument model options are Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-unspecified
1297             if ((instrumentModel == "ILLUMINA_GENOME_ANALYZER") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II") || (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX") || (instrumentModel == "ILLUMINA_HISEQ_2000") || (instrumentModel == "ILLUMINA_HISEQ_1000") || (instrumentModel == "ILLUMINA_MISEQ") || (instrumentModel == "UNSPECIFIED")) { }
1298             else { isOkay = false; }
1299             
1300             if (isOkay) {
1301                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER")          {  instrumentModel = "Illumina_Genome_Analyzer";        }
1302                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_II")       {  instrumentModel = "Illumina_Genome_Analyzer_II";     }
1303                 if (instrumentModel == "ILLUMINA_GENOME_ANALYZER_IIX")      {  instrumentModel = "Illumina_Genome_Analyzer_IIx";    }
1304                 if (instrumentModel == "ILLUMINA_HISEQ_2000")               {  instrumentModel = "Illumina_HiSeq_2000";             }
1305                 if (instrumentModel == "ILLUMINA_HISEQ_1000")               {  instrumentModel = "Illumina_HiSeq_1000";             }
1306                 if (instrumentModel == "ILLUMINA_MISEQ")                    {  instrumentModel = "Illumina_MiSeq";                  }
1307                 if (instrumentModel == "UNSPECIFIED")                       {  instrumentModel = "unspecified";                     }
1308             }else {
1309                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are Illumina_Genome_Analyzer, Illumina_Genome_Analyzer_II, Illumina_Genome_Analyzer_IIx, Illumina_HiSeq_2000, Illumina_HiSeq_1000, Illumina_MiSeq or unspecified."); m->mothurOutEndLine(); abort = true;
1310             }
1311             
1312         }else if (platform == "ION_TORRENT") { //instrument model options are Ion_Torrent_PGM-unspecified
1313             if ((instrumentModel == "ION_TORRENT_PGM")  || (instrumentModel == "UNSPECIFIED")) { }
1314             else { isOkay = false; }
1315             
1316             if (isOkay) {
1317                 if (instrumentModel == "ION_TORRENT_PGM")          {  instrumentModel = "Ion_Torrent_PGM";        }
1318                 if (instrumentModel == "UNSPECIFIED")              {  instrumentModel = "unspecified";            }
1319             }else {
1320                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are Ion_Torrent_PGM or unspecified."); m->mothurOutEndLine(); abort = true;
1321             }
1322         }else if (platform == "PACBIO_SMRT") { //instrument model options are PacBio_RS-unspecified
1323             if ((instrumentModel == "PACBIO_RS")  || (instrumentModel == "UNSPECIFIED")) { }
1324             else { isOkay = false; }
1325             
1326             if (isOkay) {
1327                 if (instrumentModel == "PACBIO_RS")          {  instrumentModel = "PacBio_RS";        }
1328                 if (instrumentModel == "UNSPECIFIED")        {  instrumentModel = "unspecified";      }
1329             }else {
1330                 m->mothurOut("[ERROR]: " + original + " is not a valid instrument option for the " + platform + " platform.  Valid instrument options are PacBio_RS or unspecified."); m->mothurOutEndLine(); abort = true;
1331             }
1332         }
1333         return isOkay;
1334     }
1335         catch(exception& e) {
1336                 m->errorOut(e, "SRACommand", "checkCasesInstrumentModels");
1337                 exit(1);
1338         }
1339 }
1340 //**********************************************************************************************************************
1341 //AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER
1342 bool SRACommand::checkCasesLibStrategy(string& libStrategy){
1343         try {
1344         string original = libStrategy;
1345         bool isOkay = true;
1346         
1347         //remove users possible case errors
1348         for (int i = 0; i < libStrategy.size(); i++) { libStrategy[i] = toupper(libStrategy[i]); }
1349         
1350         if ((libStrategy == "AMPLICON") || (libStrategy == "WGA") || (libStrategy == "WGS") || (libStrategy == "WGX") || (libStrategy == "RNA-SEQ") || (libStrategy == "MIRNA-SEQ") || (libStrategy == "WCS") || (libStrategy == "CLONE") || (libStrategy == "POOLCLONE") || (libStrategy == "CLONEEND") || (libStrategy == "FINISHING") || (libStrategy == "CHIP-SEQ") || (libStrategy == "MNASE-SEQ") || (libStrategy == "DNASE-HYPERSENSITIVITY") || (libStrategy == "BISULFITE-SEQ") || (libStrategy == "TN-SEQ") || (libStrategy == "EST") || (libStrategy == "FL-CDNA") || (libStrategy == "CTS") || (libStrategy == "MRE-SEQ")|| (libStrategy == "MEDIP-SEQ") || (libStrategy == "MBD-SEQ") || (libStrategy == "OTHER")) { }
1351         else { isOkay = false; }
1352         
1353         if (isOkay) {
1354             if (libStrategy == "RNA-SEQ")                   {  libStrategy = "RNA-Seq";                 }
1355             if (libStrategy == "MIRNA-SEQ")                 {  libStrategy = "miRNA-Seq";               }
1356             if (libStrategy == "CHIP-SEQ")                  {  libStrategy = "ChIP-Seq";                }
1357             if (libStrategy == "MNASE-SEQ")                 {  libStrategy = "MNase-Seq";               }
1358             if (libStrategy == "DNASE-HYPERSENSITIVITY")    {  libStrategy = "DNase-Hypersensitivity";  }
1359             if (libStrategy == "BISULFITE-SEQ")             {  libStrategy = "Bisulfite-Seq";           }
1360             if (libStrategy == "TN-SEQ")                    {  libStrategy = "Tn-Seq";                  }
1361             if (libStrategy == "FL-CDNA")                   {  libStrategy = "FL-cDNA";                 }
1362             if (libStrategy == "MRE-SEQ")                   {  libStrategy = "MRE-Seq";                 }
1363             if (libStrategy == "MEDIP-SEQ")                 {  libStrategy = "MeDIP-Seq";               }
1364             }else {
1365             m->mothurOut("[ERROR]: " + original + " is not a valid libstrategy option.  Valid libstrategy options are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq or OTHER."); m->mothurOutEndLine(); abort = true;
1366         }
1367         
1368         return isOkay;
1369     }
1370         catch(exception& e) {
1371                 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1372                 exit(1);
1373         }
1374 }
1375
1376 //**********************************************************************************************************************
1377 //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
1378 bool SRACommand::checkCasesLibSource(string& libSource){
1379         try {
1380         string original = libSource;
1381         bool isOkay = true;
1382         
1383         //remove users possible case errors
1384         for (int i = 0; i < libSource.size(); i++) { libSource[i] = toupper(libSource[i]); }
1385         
1386         if ((libSource == "METAGENOMIC") || (libSource == "GENOMIC") || (libSource == "TRANSCRIPTOMIC") || (libSource == "METATRANSCRIPTOMIC") || (libSource == "SYNTHETIC") || (libSource == "VIRAL_RNA") || (libSource == "OTHER")) { }
1387         else { isOkay = false; }
1388         
1389         if (isOkay) {
1390             
1391         }else {
1392             m->mothurOut("[ERROR]: " + original + " is not a valid libsource option.  Valid libsource options are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA or OTHER."); m->mothurOutEndLine(); abort = true;
1393         }
1394         
1395         return isOkay;
1396     }
1397         catch(exception& e) {
1398                 m->errorOut(e, "SRACommand", "checkCasesLibStrategy");
1399                 exit(1);
1400         }
1401 }
1402
1403 //**********************************************************************************************************************
1404 //PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified
1405 bool SRACommand::checkCasesLibSelection(string& libSelection){
1406         try {
1407         string original = libSelection;
1408         bool isOkay = true;
1409         
1410         //remove users possible case errors
1411         for (int i = 0; i < libSelection.size(); i++) { libSelection[i] = toupper(libSelection[i]); }
1412         
1413         if ((libSelection == "PCR") || (libSelection == "RANDOM") || (libSelection == "RANDOM_PCR") || (libSelection == "RT-PCR") || (libSelection == "HMPR") || (libSelection == "MF") || (libSelection == "CF-S") || (libSelection == "CF-H") || (libSelection == "CF-T") || (libSelection == "CF-M") || (libSelection == "MDA") || (libSelection == "MSLL") || (libSelection == "CDNA") || (libSelection == "CHIP") || (libSelection == "MNASE") || (libSelection == "DNASE") || (libSelection == "HYBRID_SELECTION") || (libSelection == "REDUCED_REPRESENTATION") || (libSelection == "RESTRICTION_DIGEST") || (libSelection == "5-METHYLCYTIDINE_ANTIBODY") || (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN") || (libSelection == "CAGE") || (libSelection == "RACE") || (libSelection == "SIZE_FRACTIONATION") || (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD") || (libSelection == "OTHER") || (libSelection == "UNSPECIFIED")) { }
1414         else { isOkay = false; }
1415         
1416         if (isOkay) {
1417             if (libSelection == "CDNA")                                         {  libSelection = "cDNA";                                       }
1418             if (libSelection == "CHIP")                                         {  libSelection = "ChIP";                                       }
1419             if (libSelection == "MNASE")                                        {  libSelection = "MNase";                                      }
1420             if (libSelection == "DNASE")                                        {  libSelection = "DNAse";                                      }
1421             if (libSelection == "HYBRID_SELECTION")                             {  libSelection = "Hybrid_Selection";                           }
1422             if (libSelection == "REDUCED_REPRESENTATION")                       {  libSelection = "Reduced_Representation";                     }
1423             if (libSelection == "RESTRICTION_DIGEST")                           {  libSelection = "Restriction_Digest";                         }
1424             if (libSelection == "5-METHYLCYTIDINE_ANTIBODY")                    {  libSelection = "5-methylcytidine_antibody";                  }
1425             if (libSelection == "MBD2_PROTEIN_METHYL-CPG_BINDING_DOMAIN")       {  libSelection = "MBD2_protein_methyl-CpG_binding_domain";     }
1426             if (libSelection == "SIZE_FRACTIONATION")                           {  libSelection = "size_fractionation";                         }
1427             if (libSelection == "PADLOCK_PROBES_CAPTURE_METHOD")                {  libSelection = "Padlock_probes_capture_method";              }
1428             if (libSelection == "OTHER")                                        {  libSelection = "other";                                      }
1429             if (libSelection == "UNSPECIFIED")                                  {  libSelection = "unspecified";                                }
1430             
1431         }else {
1432             m->mothurOut("[ERROR]: " + original + " is not a valid libselection option.  Valid libselection options are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other or unspecified."); m->mothurOutEndLine(); abort = true;
1433         }
1434         
1435         return isOkay;
1436     }
1437         catch(exception& e) {
1438                 m->errorOut(e, "SRACommand", "checkCasesLibSelection");
1439                 exit(1);
1440         }
1441 }
1442 //**********************************************************************************************************************
1443 //METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER
1444 bool SRACommand::checkCasesDataType(string& dataType){
1445         try {
1446         string original = dataType;
1447         bool isOkay = true;
1448         
1449         //remove users possible case errors
1450         for (int i = 0; i < dataType.size(); i++) { dataType[i] = toupper(dataType[i]); }
1451         
1452         if ((dataType == "METAGENOME") || (dataType == "GENOME_SEQUENCING") || (dataType == "METAGENOMIC_ASSEMBLY") || (dataType == "ASSEMBLY") || (dataType == "TRANSCRIPTOME") || (dataType == "PROTEOMIC") || (dataType == "MAP") || (dataType == "CLONE_ENDS") || (dataType == "TARGETED_LOCI") || (dataType == "RANDOM_SURVEY") || (dataType == "EXOME") || (dataType == "VARIATION") || (dataType == "EPIGENOMICS") || (dataType == "PHENOTYPE") || (dataType == "GENOTYPE") || (dataType == "OTHER")) { }
1453         else { isOkay = false; }
1454         
1455         if (isOkay) {
1456             
1457         }else {
1458             m->mothurOut("[ERROR]: " + original + " is not a valid datatype option.  Valid datatype options are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER."); m->mothurOutEndLine(); abort = true;
1459         }
1460         
1461         return isOkay;
1462     }
1463         catch(exception& e) {
1464                 m->errorOut(e, "SRACommand", "checkCasesDataType");
1465                 exit(1);
1466         }
1467 }
1468 //**********************************************************************************************************************
1469 bool SRACommand::sanityCheckMiMarksGroups(){
1470         try {
1471         bool isOkay = true;
1472         
1473         for (int i = 0; i < Groups.size(); i++) {
1474             if (m->control_pressed) { break; }
1475             
1476             map<string, map<string, string> >::iterator it = mimarks.find(Groups[i]);
1477             if (it == mimarks.end()) {
1478                 isOkay = false;
1479                 m->mothurOut("[ERROR]: MIMarks file is missing group " + Groups[i] + ", please correct.\n");
1480             }
1481         }
1482         
1483         if (!isOkay) { m->control_pressed = true; }
1484         
1485         return isOkay;
1486     }
1487         catch(exception& e) {
1488                 m->errorOut(e, "SRACommand", "sanityCheckMiMarksGroups");
1489                 exit(1);
1490         }
1491 }
1492
1493 //**********************************************************************************************************************