]> git.donarmstrong.com Git - mothur.git/commitdiff
working of get.mimarkspackage and sra command
authorSarah Westcott <mothur.westcott@gmail.com>
Tue, 1 Apr 2014 18:13:11 +0000 (14:13 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Tue, 1 Apr 2014 18:13:11 +0000 (14:13 -0400)
commandfactory.cpp
getmimarkspackagecommand.cpp
getmimarkspackagecommand.h
mothurout.cpp
mothurout.h
sracommand.cpp
sracommand.h

index e8e9c2754677ee639fce944fbc533d3bd9eb98db..8c5c0faef60a480a189f98994afc250e4c36a8c6 100644 (file)
 #include "kruskalwalliscommand.h"
 #include "sracommand.h"
 #include "mergesfffilecommand.h"
+#include "getmimarkspackagecommand.h"
 
 /*******************************************************/
 
@@ -323,6 +324,7 @@ CommandFactory::CommandFactory(){
     commands["kruskal.wallis"]      = "kruskal.wallis";
     commands["sra"]                 = "sra";
     commands["merge.sfffiles"]      = "merge.sfffiles";
+    commands["get.mimarkspackage"]  = "get.mimarkspackage";
     
 
 }
@@ -552,6 +554,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "kruskal.wallis")        {      command = new KruskalWallisCommand(optionString);           }
         else if(commandName == "sra")                   {      command = new SRACommand(optionString);                     }
         else if(commandName == "merge.sfffiles")        {      command = new MergeSfffilesCommand(optionString);           }
+        else if(commandName == "get.mimarkspackage")    {      command = new GetMIMarksPackageCommand(optionString);       }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -722,6 +725,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "kruskal.wallis")        {      pipecommand = new KruskalWallisCommand(optionString);           }
         else if(commandName == "sra")                   {      pipecommand = new SRACommand(optionString);                     }
         else if(commandName == "merge.sfffiles")        {      pipecommand = new MergeSfffilesCommand(optionString);           }
+        else if(commandName == "get.mimarkspackage")    {      pipecommand = new GetMIMarksPackageCommand(optionString);       }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -878,6 +882,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "kruskal.wallis")        {      shellcommand = new KruskalWallisCommand();          }
         else if(commandName == "sra")                   {      shellcommand = new SRACommand();                    }
         else if(commandName == "merge.sfffiles")        {      shellcommand = new MergeSfffilesCommand();          }
+        else if(commandName == "get.mimarkspackage")    {      shellcommand = new GetMIMarksPackageCommand();      }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 43138e1cefd75544e9cd699985fad484fc21a862..2f37405a509125f25d77d7baf3363ca0cb030e4c 100644 (file)
@@ -7,13 +7,17 @@
 //
 
 #include "getmimarkspackagecommand.h"
+#include "groupmap.h"
 
 //**********************************************************************************************************************
 vector<string> GetMIMarksPackageCommand::setParameters(){
        try {
         //files that have dependancies
         CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pgroup);
+        CommandParameter pfile("file", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pfile);
         CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos);
+        CommandParameter ppackage("package", "Multiple", "air-host_associated-human_associated-human_gut-human_oral-human_skin-human_vaginal-microbial-miscellaneous-plant_associated-sediment-soil-wastewater-water", "miscellaneous", "", "", "","",false,false,true); parameters.push_back(ppackage);
+        CommandParameter prequiredonly("requiredonly", "Boolean", "", "F", "", "", "","",false,false, true); parameters.push_back(prequiredonly);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
@@ -30,14 +34,15 @@ vector<string> GetMIMarksPackageCommand::setParameters(){
 string GetMIMarksPackageCommand::getHelpString(){
        try {
                string helpString = "";
-               helpString += "The get.mimarkspackage command creates a mimarks package form with your groups. The required fields are flagged with * characters.\n";
+               helpString += "The get.mimarkspackage command creates a mimarks package form with your groups. The required fields are flagged with * characters. Fields marked with '**' indicated they are in a group where at least one of the fields is required.\n";
         helpString += "Further documentation on the different packages and required formats can be found here, http://www.mothur.org/wiki/MIMarks_Data_Packages.\n";
-               helpString += "The get.mimarkspackage command parameters are: oligos, group and package. oligos or group is required.\n";
+               helpString += "The get.mimarkspackage command parameters are: oligos, group, package and requiredonly. oligos or group is required.\n";
                helpString += "The oligos parameter is used to provide your oligos file so mothur can extract your group names.\n";
         helpString += "The group parameter is used to provide your group file so mothur can extract your group names.\n";
-        helpString += "The package parameter is used to select the mimarks package you would like to use. Default=???\n";
+        helpString += "The package parameter is used to select the mimarks package you would like to use. The choices are: air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or waterc. Default=miscellaneous.\n";
+        helpString += "The requiredonly parameter is used to indicate you only want the required mimarks feilds printed. Default=F.\n";
                helpString += "The get.mimarkspackage command should be in the following format: get.mimarkspackage(oligos=yourOligosFile, package=yourPackage)\n";
-               helpString += "get.mimarkspackage(oligos=GQY1XT001.oligos, package=???)\n";
+               helpString += "get.mimarkspackage(oligos=GQY1XT001.oligos, package=human_gut)\n";
                return helpString;
        }
        catch(exception& e) {
@@ -121,42 +126,57 @@ GetMIMarksPackageCommand::GetMIMarksPackageCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("file");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                }
                                
             }
             
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") {  groupfile = "";  abort = true; }
                        else if (groupfile == "not found") { groupfile = ""; }
-            else {  m->setGroupFile(groupfile); }
+            else {  m->setGroupFile(groupfile); inputfile = groupfile; }
+            
+            file = validParameter.validFile(parameters, "file", true);
+                       if (file == "not open") {  file = "";  abort = true; }
+                       else if (file == "not found") { file = ""; }
+            else {  inputfile = file; }
             
             oligosfile = validParameter.validFile(parameters, "oligos", true);
                        if (oligosfile == "not found")      {   oligosfile = "";        }
                        else if(oligosfile == "not open")       {       abort = true;           }
-                       else {  m->setOligosFile(oligosfile); }
+                       else {  m->setOligosFile(oligosfile); inputfile = oligosfile; }
 
-            if ((groupfile != "") && (oligosfile != "")) {
-                m->mothurOut("[ERROR]: You may not use a group file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
+            if ((groupfile != "") && (oligosfile != "") && (file != "")) {
+                m->mothurOut("[ERROR]: You may not use a group file, file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
             }
 
-            if ((groupfile == "") && (oligosfile == "")) {
+            if ((groupfile == "") && (oligosfile == "") && (file == "")) {
                 oligosfile = m->getOligosFile();
-                if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
+                if (oligosfile != "") { inputfile = oligosfile;  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
                 else {
                     groupfile = m->getGroupFile();
-                    if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                    if (groupfile != "") { inputfile = groupfile;  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
                     else {
-                        m->mothurOut("[ERROR]: You must provide groupfile or oligos file for the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true;
+                        m->mothurOut("[ERROR]: You must provide file, groupfile or oligos file for the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true;
                     }
                 }
             }
             
-            package = validParameter.validFile(parameters, "package", false);         if (package == "not found") { package = "package"; }
-            //if (!checkCasesPackage(package)) { abort = true; } //error message in function
+            package = validParameter.validFile(parameters, "package", false);         if (package == "not found") { package = "miscellaneous"; }
             
-            //turn _ to spaces mothur's work around
-            for (int i = 0; i < package.length(); i++) { if (package[i] == '_') { package[i] = ' '; }  }
-
-
+            if ((package == "air") || (package == "host_associated") || (package == "human_associated") || (package == "human_gut") || (package == "human_oral") || (package == "human_skin") || (package == "human_vaginal") || (package == "microbial") || (package == "miscellaneous") || (package == "plant_associated") || (package == "sediment") || (package == "soil") || (package == "wastewater") || (package == "water")) {}
+            else {
+                m->mothurOut("[ERROR]: " + package + " is not a valid package selection. Choices are: air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water. Aborting.\n."); abort = true;
+            }
+            
+            string temp;
+                       temp = validParameter.validFile(parameters, "requiredonly", false);     if(temp == "not found"){        temp = "F";     }
+                       requiredonly = m->isTrue(temp);
                }
                
        }
@@ -172,10 +192,131 @@ int GetMIMarksPackageCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
-
-        
+        if (oligosfile != "") { readOligos();   }
+        else if (file != "")  { readFile();     }
+        else {  GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); }
         
+        if (outputDir == "") { outputDir += m->hasPath(inputfile); }
+        map<string, string> variables;
+               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
+               string outputFileName = getOutputFileName("tsv", variables);
                
+        ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputNames.push_back(outputFileName); outputTypes["tsv"].push_back(outputFileName);
+        
+        out << "#This is a tab-delimited file. Additional Documentation can be found at http://www.mothur.org/wiki/MIMarks_Data_Packages." << endl;
+        out << "#Please fill all the required fields indicated with '*'" << endl;
+        out << "#Fields marked with '**' indicated they are in a group where at least one of the fields is required." << endl;
+        out << "#Unknown or inapplicable fields can be assigned NA value." << endl;
+        out << "#You may add extra custom fields to this template. Make sure all the fields are separated by tabs." << endl;
+        out << "#You may remove any fields not required (marked with '*'). Make sure all the fields are separated by tabs." << endl;
+        out << "#You can edit this template using Microsoft Excel or any other editor. But while saving the file please make sure to save them as 'TAB-DELIMITED' TEXT FILE." << endl;
+        
+        if (package == "air") {
+            out << "#Environmental:MIMARKS.specimen.air.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *altitude" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *altitude       barometric_press        carb_dioxide    carb_monoxide   chem_administration     elev    humidity        methane misc_param      organism_count  oxygen  oxy_stat_samp   perturbation    pollutants      resp_part_matter        samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp solar_irradiance        temp    ventilation_rate        ventilation_type        volatile_org_comp       wind_direction  wind_speed" << endl;
+            }
+        }else if (package == "host_associated") {
+            out << "#Environmental:MIMARKS.specimen.host-associated.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host   **clone **isolate   **strain" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods **clone **isolate   **strain    rel_to_oxygen   samp_collect_device     samp_mat_process        *host   age     altitude        blood_press_diast       blood_press_syst        body_habitat    body_product    tissue  chem_administration     depth   diet    disease_stat    dry_mass        elev    family_relationship     genotype        gravidity       height_or_length        host_body_temp  host_color      host_growth_cond        host_shape      host_subject_id host_taxid      infra_specific_name     infra_specific_rank     last_meal       life_stage      misc_param      organism_count  oxy_stat_samp   perturbation    phenotype       samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     substrate       temp    tot_mass" << endl;
+            }
+        }else if (package == "human_associated") {
+            out << "#Environmental:MIMARKS.specimen.human-associated.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   hiv_stat        ihmc_ethnicity  ihmc_medication_code    age     amniotic_fluid_color    foetal_health_stat      gestation_state maternal_health_stat    blood_blood_disord      body_product    tissue  body_mass_index chem_administration     diet    disease_stat    drug_usage      family_relationship     genotype        height  host_body_temp  host_subject_id last_meal       nose_throat_disord      pulmonary_disord        diet_last_six_month     medic_hist_perform      misc_param      occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pet_farm_animal pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     smoker  study_complt_stat       temp    tot_mass        travel_out_six_month    twin_sibling    urine_collect_meth      kidney_disord   urogenit_tract_disor    weight_loss_3_month" << endl;
+            }
+        }else if (package == "human_gut") {
+            out << "#Environmental:MIMARKS.specimen.human-gut.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   ihmc_ethnicity  ihmc_medication_code    age     body_product    tissue  body_mass_index chem_administration     diet    disease_stat    family_relationship     gastrointest_disord     genotype        height  host_body_temp  host_subject_id last_meal       liver_disord    medic_hist_perform      misc_param      occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     special_diet    temp    tot_mass" << endl;
+            }
+        }else if (package == "human_oral") {
+            out << "#Environmental:MIMARKS.specimen.human-oral.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   ihmc_ethnicity  ihmc_medication_code    age     body_product    tissue  body_mass_index chem_administration     diet    disease_stat    family_relationship     genotype        height  host_body_temp  host_subject_id last_meal       medic_hist_perform      misc_param      nose_mouth_teeth_throat_disord  occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     temp    time_last_toothbrush    tot_mass" << endl;
+            }
+        }else if (package == "human_skin") {
+            out << "#Environmental:MIMARKS.specimen.human-skin.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   ihmc_ethnicity  ihmc_medication_code    age     body_product    tissue  body_mass_index chem_administration     dermatology_disord      diet    disease_stat    dominant_hand   family_relationship     genotype        height  host_body_temp  host_subject_id last_meal       medic_hist_perform      misc_param      occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     temp    time_since_last_wash    tot_mass" << endl;
+            }
+        }else if (package == "human_vaginal") {
+            out << "#Environmental:MIMARKS.specimen.human-vaginal.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   hrt     ihmc_ethnicity  ihmc_medication_code    age     birth_control   body_product    tissue  body_mass_index chem_administration     diet    disease_stat    douche  family_relationship     genotype        gynecologic_disord      height  host_body_temp  host_subject_id hysterectomy    last_meal       medic_hist_perform      menarche        menopause       misc_param      occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pregnancy       pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     sexual_act      temp    tot_mass        urogenit_disord" << endl;
+            }
+        }else if (package == "microbial") {
+            out << "#Environmental:MIMARKS.specimen.microbial.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *depth  *elev" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *depth  *elev   alkalinity      alkyl_diethers  altitude        aminopept_act   ammonium        bacteria_carb_prod      biomass bishomohopanol  bromide calcium carb_nitro_ratio        chem_administration     chloride        chlorophyll     diether_lipids  diss_carb_dioxide       diss_hydrogen   diss_inorg_carb diss_org_carb   diss_org_nitro  diss_oxygen     glucosidase_act magnesium       mean_frict_vel  mean_peak_frict_vel     methane misc_param      n_alkanes       nitrate nitrite nitro   org_carb        org_matter      org_nitro       organism_count  oxy_stat_samp   ph      part_org_carb   perturbation    petroleum_hydrocarb     phaeopigments   phosphate       phosplipid_fatt_acid    potassium       pressure        redox_potential salinity        samp_size       samp_store_dur  samp_store_loc  samp_store_temp silicate        sodium  sulfate sulfide temp    tot_carb        tot_nitro       tot_org_carb    turbidity       water_content" << endl;
+            }
+        }else if (package == "miscellaneous") {
+            out << "#Environmental:MIMARKS.specimen.miscellaneous.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *title  *seq_methods    *lat_lon" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        alkalinity      altitude        ammonium        biomass bromide calcium chem_administration     chloride        chlorophyll     current density depth   diether_lipids  diss_carb_dioxide       diss_hydrogen   diss_inorg_carb diss_org_nitro  diss_oxygen     elev    misc_param      nitrate nitrite nitro   org_carb        org_matter      org_nitro       organism_count  oxy_stat_samp   ph      perturbation    phosphate       phosplipid_fatt_acid    potassium       pressure        salinity        samp_size       samp_store_dur  samp_store_loc  samp_store_temp silicate        sodium  sulfate sulfide temp" << endl;
+            }
+        }else if (package == "plant_associated") {
+            out << "#Environmental:MIMARKS.specimen.plant-associated.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   age     air_temp_regm   altitude        antibiotic_regm body_product    chem_administration     chem_mutagen    climate_environment     depth   disease_stat    dry_mass        elev    fertilizer_regm fungicide_regm  gaseous_environment     genotype        gravity growth_hormone_regm     growth_med      height_or_length        herbicide_regm  host_taxid      humidity_regm   infra_specific_name     infra_specific_rank     life_stage      mechanical_damage       mineral_nutr_regm       misc_param      non_mineral_nutr_regm   organism_count  oxy_stat_samp   ph_regm perturbation    pesticide_regm  phenotype       tissue  plant_product   radiation_regm  rainfall_regm   salt_regm       samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp season_environment      standing_water_regm     temp    tiss_cult_growth_med    tot_mass        water_temp_regm watering_regm   wet_mass" << endl;
+            }
+        }else if (package == "sediment") {
+            out << "#Environmental:MIMARKS.specimen.sediment.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *depth  *elev" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *depth  *elev   alkalinity      alkyl_diethers  aminopept_act   ammonium        bacteria_carb_prod      biomass bishomohopanol  bromide calcium carb_nitro_ratio        chem_administration     chloride        chlorophyll     density diether_lipids  diss_carb_dioxide       diss_hydrogen   diss_inorg_carb diss_org_carb   diss_org_nitro  diss_oxygen     glucosidase_act magnesium       mean_frict_vel  mean_peak_frict_vel     methane misc_param      n_alkanes       nitrate nitrite nitro   org_carb        org_matter      org_nitro       organism_count  oxy_stat_samp   ph      particle_class  part_org_carb   perturbation    petroleum_hydrocarb     phaeopigments   phosphate       phosplipid_fatt_acid    porosity        potassium       pressure        redox_potential salinity        samp_size       samp_store_dur  samp_store_loc  samp_store_temp sediment_type   silicate        sodium  sulfate sulfide temp    tidal_stage     tot_carb        tot_nitro       tot_org_carb    turbidity       water_content" << endl;
+            }
+        }else if (package == "soil") {
+            out << "#Environmental:MIMARKS.specimen.soil.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *depth  *elev" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *depth  *elev   altitude        sieving cur_land_use    cur_vegetation_meth     cur_vegetation  drainage_class  al_sat  al_sat_meth     heavy_metals_meth       heavy_metals    salinity_meth   extreme_salinity        fao_class       agrochem_addition       crop_rotation   extreme_event   fire    flooding        previous_land_use_meth  previous_land_use       tillage horizon_meth    horizon link_class_info link_climate_info       link_addit_analys       annual_season_precpt    annual_season_temp      microbial_biomass_meth  microbial_biomass       misc_param      other   ph_meth ph      pool_dna_extracts       profile_position        samp_size       samp_weight_dna_ext     slope_aspect    slope_gradient  soil_type_meth  soil_type       local_class_meth        local_class     store_cond      texture_meth    texture tot_n_meth      tot_n   tot_org_c_meth  tot_org_carb    water_content_soil_meth water_content_soil" << endl;
+            }
+        }else if (package == "wastewater") {
+            out << "#Environmental:MIMARKS.specimen.wastewater.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        alkalinity      biochem_oxygen_dem      chem_administration     chem_oxygen_dem depth   efficiency_percent      emulsions       gaseous_substances      indust_eff_percent      inorg_particles misc_param      nitrate org_particles   organism_count  oxy_stat_samp   ph      perturbation    phosphate       pre_treatment   primary_treatment       reactor_type    samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp secondary_treatment     sewage_type     sludge_retent_time      sodium  soluble_inorg_mat       soluble_org_mat suspend_solids  temp    tertiary_treatment      tot_nitro       tot_phosphate   wastewater_type" << endl;
+            }
+        }else if (package == "water") {
+            out << "#Environmental:MIMARKS.specimen.water.3.0" << endl;
+            if (requiredonly) {
+                out << "*sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *depth" << endl;
+            }else {
+                out << "*sample_name   description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *depth  alkalinity      alkyl_diethers  aminopept_act   ammonium        atmospheric_data        bacteria_carb_prod      biomass bishomohopanol  bromide calcium carb_nitro_ratio        chem_administration     chloride        chlorophyll     current density diether_lipids  diss_carb_dioxide       diss_hydrogen   diss_inorg_carb diss_inorg_nitro        diss_inorg_phosp        diss_org_carb   diss_org_nitro  diss_oxygen     elev    glucosidase_act light_intensity magnesium       mean_frict_vel  mean_peak_frict_vel     misc_param      n_alkanes       nitrate nitrite nitro   org_carb        org_matter      org_nitro       organism_count  oxy_stat_samp   ph      part_org_carb   part_org_nitro  perturbation    petroleum_hydrocarb     phaeopigments   phosphate       phosplipid_fatt_acid    photon_flux     potassium       pressure        primary_prod    redox_potential salinity        samp_size       samp_store_dur  samp_store_loc  samp_store_temp silicate        sodium  soluble_react_phosp     sulfate sulfide suspend_part_matter     temp    tidal_stage     tot_depth_water_col     tot_diss_nitro  tot_inorg_nitro tot_nitro       tot_part_carb   tot_phosp" << endl;
+            }
+        }
+        
+        for (int i = 0; i < Groups.size(); i++) {  out << Groups[i] << '\t' << endl; }
+        
+        out.close();
+        
         //output files created by command
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -185,7 +326,296 @@ int GetMIMarksPackageCommand::execute(){
                
     }
        catch(exception& e) {
-               m->errorOut(e, "GetMIMarksPackageCommand", "GetMIMarksPackageCommand");
+               m->errorOut(e, "GetMIMarksPackageCommand", "execute");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int GetMIMarksPackageCommand::readOligos(){
+       try {
+               ifstream inOligos;
+               m->openInputFile(oligosfile, inOligos);
+               
+               string type, oligo, roligo, group;
+        vector<string> primerNameVector, barcodeNameVector;
+        set<string> uniquePrimers;
+        set<string> uniqueBarcodes;
+               
+               while(!inOligos.eof()){
+            
+                       inOligos >> type;
+            
+                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+            
+                       if(type[0] == '#'){
+                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                               m->gobble(inOligos);
+                       }
+                       else{
+                               m->gobble(inOligos);
+                               //make type case insensitive
+                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
+                               
+                               inOligos >> oligo;
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
+                               
+                               for(int i=0;i<oligo.length();i++){
+                                       oligo[i] = toupper(oligo[i]);
+                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
+                               }
+                               
+                               if(type == "FORWARD"){
+                                       group = "";
+                                       
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                                       
+                                       primerNameVector.push_back(group);
+                               }
+                else if (type == "PRIMER"){
+                    m->gobble(inOligos);
+                                       
+                    inOligos >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    
+                    group = "";
+                    
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                    
+                                       primerNameVector.push_back(group);
+                }else if(type == "BARCODE"){
+                                       inOligos >> group;
+                    
+                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
+                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
+                    
+                    string temp = "";
+                    while (!inOligos.eof())    {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  temp += c;  }
+                                       }
+                                       
+                    //then this is illumina data with 4 columns
+                    if (temp != "") {
+                        
+                        string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+                        group = temp;
+                        
+                        barcodeNameVector.push_back(group);
+                    }else {
+                        barcodeNameVector.push_back(group);
+                    }
+                               }
+                       }
+                       m->gobble(inOligos);
+               }
+               inOligos.close();
+        
+               //add in potential combos
+               if(barcodeNameVector.size() == 0){
+                       barcodeNameVector.push_back("");
+               }
+               
+               if(primerNameVector.size() == 0){
+                       primerNameVector.push_back("");
+               }
+        
+        set<string> uniqueNames;
+        for(int i = 0; i <  barcodeNameVector.size(); i++){
+            for(int j = 0; j < primerNameVector.size(); j++){
+                
+                string primerName = primerNameVector[j];
+                string barcodeName = barcodeNameVector[i];
+                
+                if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                else if ((primerName == "") && (barcodeName == "")) { }
+                else {
+                    string comboGroupName = "";
+                    
+                    if(primerName == ""){
+                        comboGroupName = barcodeNameVector[i];
+                    }
+                    else{
+                        if(barcodeName == ""){
+                            comboGroupName = primerNameVector[j];
+                        }
+                        else{
+                            comboGroupName = barcodeNameVector[i] + "." + primerNameVector[j];
+                        }
+                    }
+                    uniqueNames.insert(comboGroupName);
+                }
+            }
+        }
+        
+        
+        
+        if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
+        
+        for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {  Groups.push_back(*it); }
+        
+               return true;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetMIMarksPackageCommand", "readOligos");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+// going to have to rework this to allow for other options --
+/*
+ file option 1
+ sfffile1   oligosfile1
+ sfffile2   oligosfile2
+ ...
+ file option 2
+ fastqfile1 oligosfile1
+ fastqfile2 oligosfile2
+ ...
+ file option 3
+ fastqfile  fastqfile   group
+ fastqfile  fastqfile   group
+ fastqfile  fastqfile   group
+ ...
+ */
+
+int GetMIMarksPackageCommand::readFile(){
+       try {
+        //vector<string> theseFiles;
+        inputfile = file;
+        
+        ifstream in;
+        m->openInputFile(file, in);
+        
+        while(!in.eof()) {
+            
+            if (m->control_pressed) { return 0; }
+            
+            string line = m->getline(in);  m->gobble(in);
+            vector<string> pieces = m->splitWhiteSpace(line);
+            
+            string group = "";
+            string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
+            if (pieces.size() == 2) {
+                thisFileName1 = pieces[0];
+                thisFileName2 = pieces[1];
+            }else if (pieces.size() == 3) {
+                thisFileName1 = pieces[1];
+                thisFileName2 = pieces[2];
+                string group = pieces[0];
+            }else {
+                m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
+            }
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2  + ".\n"); }
+            
+            //check to make sure both are able to be opened
+            ifstream in2;
+            int openForward = m->openInputFile(thisFileName1, in2, "noerror");
+            
+            //if you can't open it, try default location
+            if (openForward == 1) {
+                if (m->getDefaultPath() != "") { //default path is set
+                    string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
+                    m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
+                    ifstream in3;
+                    openForward = m->openInputFile(tryPath, in3, "noerror");
+                    in3.close();
+                    thisFileName1 = tryPath;
+                }
+            }
+            
+            //if you can't open it, try output location
+            if (openForward == 1) {
+                if (m->getOutputDir() != "") { //default path is set
+                    string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
+                    m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                    ifstream in4;
+                    openForward = m->openInputFile(tryPath, in4, "noerror");
+                    thisFileName1 = tryPath;
+                    in4.close();
+                }
+            }
+            
+            if (openForward == 1) { //can't find it
+                m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
+            }else{  in2.close();  }
+            
+            int openReverse = 1;
+            
+            ifstream in3;
+            openReverse = m->openInputFile(thisFileName2, in3, "noerror");
+            
+            //if you can't open it, try default location
+            if (openReverse == 1) {
+                if (m->getDefaultPath() != "") { //default path is set
+                    string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
+                    m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
+                    ifstream in3;
+                    openReverse = m->openInputFile(tryPath, in3, "noerror");
+                    in3.close();
+                    thisFileName2 = tryPath;
+                }
+            }
+            
+            //if you can't open it, try output location
+            if (openReverse == 1) {
+                if (m->getOutputDir() != "") { //default path is set
+                    string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
+                    m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                    ifstream in4;
+                    openReverse = m->openInputFile(tryPath, in4, "noerror");
+                    thisFileName2 = tryPath;
+                    in4.close();
+                }
+            }
+            
+            if (openReverse == 1) { //can't find it
+                m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
+            }else{  in3.close();  }
+            
+            
+            if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
+                    oligosfile = thisFileName2;
+                    if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
+                    readOligos();
+            }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
+                Groups.push_back(group);
+            }
+        }
+        in.close();
+        
+        inputfile = file;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetMIMarksPackageCommand", "readFile");
                exit(1);
        }
 }
index e45dfe738af3c435a15619d39a8feb75b6613568..ccef8326ac779ff1976d14bb62a4e8dd2a76d67b 100644 (file)
@@ -32,10 +32,13 @@ public:
     void help() { m->mothurOut(getHelpString()); }
     
 private:
-    bool abort;
-    string oligosfile, groupfile, package;
+    bool abort, requiredonly;
+    string oligosfile, groupfile, package, inputfile, file;
     string outputDir;
-    vector<string> outputNames;
+    vector<string> outputNames, Groups;
+    
+    int readOligos();
+    int readFile();
 };
 
 /**************************************************************************************************/
index 313deef5bc913fdb2efa77e1b55d203f69e50277..baa7710eddc63901f570725af70b5d847918cb11 100644 (file)
@@ -2823,7 +2823,7 @@ bool MothurOut::isSubset(vector<string> bigset, vector<string> subset) {
         
                if (subset.size() > bigset.size()) { return false;  }
                
-               //check if each guy in suset is also in bigset
+               //check if each guy in subset is also in bigset
                for (int i = 0; i < subset.size(); i++) {
                        bool match = false;
                        for (int j = 0; j < bigset.size(); j++) {
@@ -3599,6 +3599,26 @@ bool MothurOut::inUsersGroups(vector<string> groupnames, vector<string> Groups)
                exit(1);
        }       
 }
+/**************************************************************************************************/
+//removes entries that are only white space
+int MothurOut::removeBlanks(vector<string>& tempVector) {
+       try {
+               vector<string> newVector;
+               for (int i = 0; i < tempVector.size(); i++) {
+            bool isBlank = true;
+            for (int j = 0; j < tempVector[i].length(); j++) {
+                if (!isspace(tempVector[i][j])) { isBlank = false; j+= tempVector[i].length(); } //contains non space chars, break out and save
+            }
+            if (!isBlank) { newVector.push_back(tempVector[i]); }
+        }
+        tempVector = newVector;
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "removeBlanks");
+               exit(1);
+       }
+}
 /***********************************************************************/
 //this function determines if the user has given us labels that are smaller than the given label.
 //if so then it returns true so that the calling function can run the previous valid distance.
index d5b7e5f806b54bdd2b9c8283cd6d781d5fe2abe1..a57fb136ba3d41b210e671e2fe85848a4d40f359 100644 (file)
@@ -132,7 +132,7 @@ class MothurOut {
                //searchs and checks
                bool checkReleaseVersion(ifstream&, string);
                bool anyLabelsToProcess(string, set<string>&, string);
-               bool inUsersGroups(vector<string>, vector<string>);
+               bool inUsersGroups(vector<string>, vector<string>); //returns true if any of the strings in first vector are in second vector
         bool inUsersGroups(vector<int>, vector< vector<int> >);
                bool inUsersGroups(string, vector<string>);
         bool inUsersGroups(int, vector<int>);
@@ -158,6 +158,7 @@ class MothurOut {
                void splitAtDash(string&, vector<string>&);
                void splitAtChar(string&, vector<string>&, char);
         void splitAtChar(string&, string&, char);
+        int removeBlanks(vector<string>&);
         vector<string> splitWhiteSpaceWithQuotes(string);
                int removeConfidences(string&);
         string removeQuotes(string);
index aa35365fe1d85a58a0c71b48fb7bb5206dadb751..02e98991c892e922bc7c5a8f8234739e8b42c40a 100644 (file)
 vector<string> SRACommand::setParameters(){
        try {
         CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
-        CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pgroup);
-        CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos);
-        CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
+        CommandParameter poligos("oligos", "InputTypes", "", "", "oligos", "none", "none","",false,false,true); parameters.push_back(poligos);
+        CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
                CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
-        CommandParameter pcontact("contact", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
+        CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
+        CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
         //choose only one multiple options
         CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
         CommandParameter pinstrument("instrument", "Multiple", "454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified", "454_GS", "", "", "","",false,false); parameters.push_back(pinstrument);
         CommandParameter plibstrategy("libstrategy", "String", "AMPLICON", "", "", "", "","",false,false); parameters.push_back(plibstrategy);
+        CommandParameter pdatatype("datatype", "String", "METAGENOME", "", "", "", "","",false,false); parameters.push_back(pdatatype);
         CommandParameter plibsource("libsource", "String", "METAGENOMIC", "", "", "", "","",false,false); parameters.push_back(plibsource);
         CommandParameter plibselection("libselection", "String", "PCR", "", "", "", "","",false,false); parameters.push_back(plibselection);
-        
+        CommandParameter porientation("orientation", "Multiple", "forward-reverse", "forward", "", "", "","",false,false); parameters.push_back(porientation);
         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
@@ -50,26 +51,26 @@ string SRACommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
-               helpString += "The sra command parameters are: sff, fastq, file, oligos, contact, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, group, platform, libstrategy, libsource, libselection and instrument.\n";
+               helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
         helpString += "The sff parameter is used to provide the original sff file.\n";
                helpString += "The fastq parameter is used to provide the original fastq file.\n";
-        helpString += "The contact parameter is used to provide your contact file.\n";
-        helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by.\n";
-        helpString += "The group parameter is used to provide the group file to parse your sff or fastq file by.\n";
+        helpString += "The project parameter is used to provide your project file.\n";
+        helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by. It is required and must contain barcodes and primers, or you must provide a file option. \n";
+        helpString += "The mimark parameter is used to provide your mimarks file.  You can create the template for this file using the get.mimarkspackage command.\n";
                helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files or paired fastq files with a group assignment. File lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile.\n";
         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
                helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
-        helpString += "The platform parameter is used to specify platfrom you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
+        helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
+        helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
         helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
         helpString += "The libstrategy parameter is used to specify library strategy. Default=AMPLICON. Choices are AMPLICON,WGA,WGS,WGX,RNA-Seq,miRNA-Seq,WCS,CLONE,POOLCLONE,CLONEEND,FINISHING,ChIP-Seq,MNase-Seq,DNase-Hypersensitivity,Bisulfite-Seq,Tn-Seq,EST,FL-cDNA,CTS,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER. This is a controlled vocabulary section in the XML file that will be generated.  \n";
         helpString += "The libsource parameter is used to specify library source. Default=METAGENOMIC. Choices are METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
         helpString += "The libselection parameter is used to specify library selection. Default=PCR. Choices are PCR,RANDOM,RANDOM_PCR,RT-PCR,HMPR,MF,CF-S,CF-H,CF-T,CF-M,MDA,MSLL,cDNA,ChIP,MNase,DNAse,Hybrid_Selection,Reduced_Representation,Restriction_Digest,5-methylcytidine_antibody,MBD2_protein_methyl-CpG_binding_domain,CAGE,RACE,size_fractionation,Padlock_probes_capture_method,other,unspecified. This is a controlled vocabulary section in the XML file that will be generated. \n";
-        
-               helpString += "The sra should be in the following format: \n";
-               helpString += "sra(...)\n";
+        helpString += "The datatype parameter is used to specify datatype. Default=METAGENOME. Choices are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER. This is a controlled vocabulary section in the XML file that will be generated. \n";
+               helpString += "sra(sff=sff=GHL4YHV01.sff, GHL4YHV01.oligos, project=test.project, mimark=MIMarksData.txt)\n";
                return helpString;
        }
        catch(exception& e) {
@@ -162,28 +163,28 @@ SRACommand::SRACommand(string option)  {
                                        if (path == "") {       parameters["file"] = inputDir + it->second;             }
                                }
                 
-                it = parameters.find("group");
+                it = parameters.find("oligos");
                                //user has given a template file
                                if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
                                }
                 
-                it = parameters.find("oligos");
+                it = parameters.find("project");
                                //user has given a template file
                                if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                                       if (path == "") {       parameters["project"] = inputDir + it->second;          }
                                }
                 
-                it = parameters.find("contact");
+                it = parameters.find("mimark");
                                //user has given a template file
                                if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["contact"] = inputDir + it->second;          }
+                                       if (path == "") {       parameters["mimark"] = inputDir + it->second;           }
                                }
             }
             
@@ -200,46 +201,31 @@ SRACommand::SRACommand(string option)  {
                        if (file == "not open") {  file = "";  abort = true; }
                        else if (file == "not found") { file = ""; }
             
-            groupfile = validParameter.validFile(parameters, "group", true);
-                       if (groupfile == "not open") {  groupfile = "";  abort = true; }
-                       else if (groupfile == "not found") { groupfile = ""; }
-            else {  m->setGroupFile(groupfile); }
-            
             oligosfile = validParameter.validFile(parameters, "oligos", true);
-                       if (oligosfile == "not found")      {   oligosfile = "";        }
-                       else if(oligosfile == "not open")       {       abort = true;           }
+                       if (oligosfile == "not found")      {  oligosfile = "";     }
+            else if(oligosfile == "not open")  {       abort = true;           }
                        else {  m->setOligosFile(oligosfile); }
             
-            contactfile = validParameter.validFile(parameters, "contact", true);
-                       if (contactfile == "not found")      {  contactfile = ""; m->mothurOut("[ERROR]: You must provide a contact file before you can use the sra command."); m->mothurOutEndLine(); abort = true;    }
+            contactfile = validParameter.validFile(parameters, "project", true);
+                       if (contactfile == "not found")      {  contactfile = ""; m->mothurOut("[ERROR]: You must provide a project file before you can use the sra command."); m->mothurOutEndLine(); abort = true;    }
                        else if(contactfile == "not open")      {       abort = true;           }
             
+            mimarksfile = validParameter.validFile(parameters, "mimark", true);
+                       if (mimarksfile == "not found")      {  mimarksfile = ""; m->mothurOut("[ERROR]: You must provide a mimark file before you can use the sra command. You can create a template for this file using the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true;       }
+                       else if(mimarksfile == "not open")      {       abort = true;           }
+            
             file = validParameter.validFile(parameters, "file", true);
                        if (file == "not open") {  file = "";  abort = true; }
                        else if (file == "not found") { file = ""; }
                        
-                       if ((fastqfile == "") && (sfffile == "") && (sfffile == "")) {
-                m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
+            if ((file == "") && (oligosfile == "")) {
+                m->mothurOut("[ERROR]: You must provide an oligos file or file with oligos files in them before you can use the sra command."); m->mothurOutEndLine(); abort = true;
             }
             
-            if ((groupfile != "") && (oligosfile != "")) {
-                m->mothurOut("[ERROR]: You may not use a group file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
+                       if ((fastqfile == "") && (file == "") && (sfffile == "")) {
+                m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
             }
             
-            if ((fastqfile != "") || (sfffile != "")) {
-                if ((groupfile == "") && (oligosfile == "")) {
-                    oligosfile = m->getOligosFile();
-                                       if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
-                                       else {
-                                               groupfile = m->getGroupFile();
-                        if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
-                        else {
-                            m->mothurOut("[ERROR]: You must provide groupfile or oligos file if splitting a fastq or sff file."); m->mothurOutEndLine(); abort = true;
-                        }
-                                       }
-                }
-            }
-                                   
             //use only one Mutliple type _LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT
                        platform = validParameter.validFile(parameters, "platform", false);         if (platform == "not found") { platform = "_LS454"; }
                        if (!checkCasesPlatforms(platform)) { abort = true; } //error message in function
@@ -269,6 +255,17 @@ SRACommand::SRACommand(string option)  {
             
             //turn _ to spaces mothur's work around
             for (int i = 0; i < libSelection.length(); i++) { if (libSelection[i] == '_') { libSelection[i] = ' '; }  }
+            
+            dataType = validParameter.validFile(parameters, "datatype", false);         if (dataType == "not found") { dataType = "METAGENOME"; }
+            if (!checkCasesDataType(dataType)) { abort = true; } //error message in function
+            
+            //turn _ to spaces mothur's work around
+            for (int i = 0; i < dataType.length(); i++) { if (dataType[i] == '_') { dataType[i] = ' '; }  }
+            
+            orientation = validParameter.validFile(parameters, "orientation", false);         if (orientation == "not found") { orientation = "forward"; }
+            
+            if ((orientation == "forward") || (orientation == "reverse")) {  }
+            else {  m->mothurOut("[ERROR]: " + orientation + " is not a valid orientation option. Choices are: forward and reverse.\n"); m->mothurOutEndLine(); abort = true; }
 
             
             string temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found"){       temp = "0";             }
@@ -303,8 +300,8 @@ int SRACommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
         readContactFile();
-        if (oligosfile != "") {  readOligos(); Groups.push_back("scrap"); }
-        if (groupfile != "")  {  GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); Groups.push_back("scrap"); }
+        readMIMarksFile();
+        if (oligosfile != "") { readOligos(); Groups.push_back("scrap"); }
         
         if (m->control_pressed) { return 0; }
         
@@ -316,6 +313,8 @@ int SRACommand::execute(){
         else if (sfffile != "")     {       parseSffFile(filesBySample);    }
         else if (fastqfile != "")   {       parseFastqFile(filesBySample);  }
         
+        sanityCheckMiMarksGroups();
+        
         //checks groups and files returned from parse - removes any groups that did not get reads assigned to them, orders files.
         checkGroups(filesBySample);
         
@@ -355,25 +354,21 @@ int SRACommand::execute(){
         out << "\t\t\t\t<XmlContent>\n";
         out << "\t\t\t\t\t<Project schema_version=\"2.0\">\n";
         out << "\t\t\t\t\t\t<ProjectID>\n";
-        ///////////////////////out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + ProjectID + " </SPUID> \n";
+        out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
         out << "\t\t\t\t\t\t</ProjectID>\n";
         out << "\t\t\t\t\t\t<Descriptor>\n";
-        ////////////////////out << "\t\t\t\t\t\t\t<Title>" + title + " </Title> \n";
+        out << "\t\t\t\t\t\t\t<Title>" + projectTitle + " </Title> \n";
         out << "\t\t\t\t\t\t\t<Description><p>" + description + "</p></Description> \n";
-        out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
-        /////////////////////////out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
-        out << "\t\t\t\t\t\t\t</ExternalLink>\n";
-        out << "\t\t\t\t\t\t\t<Relevance>\n";
-        //////////////////////out << "\t\t\t\t\t\t\t\t<Medical>" + medicalRelevance + "</Medical>\n";
-        out << "\t\t\t\t\t\t\t</Relevance>\n";
+        if (website != "") {
+            out << "\t\t\t\t\t\t\t<ExternalLink label=\"Website name\">\n";
+            out << "\t\t\t\t\t\t\t\t<URL>" + website + "</URL>\n";
+            out << "\t\t\t\t\t\t\t</ExternalLink>\n";
+        }
         out << "\t\t\t\t\t\t</Descriptor>\n";
         out << "\t\t\t\t\t\t<ProjectType>\n";
-        /////////////////////////out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eMultiisolate\">\n"; //<!-- controlled vocabulary? -->
-        out << "\t\t\t\t\t\t\t\t<Organism>\n";
-        ////////////////////out << "\t\t\t\t\t\t\t\t\t<OrganismName>" + scientificName + " </OrganismName> \n";
-        out << "\t\t\t\t\t\t\t\t</Organism>\n";
+        out << "\t\t\t\t\t\t\t<ProjectTypeSubmission sample_scope=\"eEnvironment\">\n";
         out << "\t\t\t\t\t\t\t\t<IntendedDataTypeSet>\n";
-        ////////////////////out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n"; <!-- controlled vocabulary? -->
+        out << "\t\t\t\t\t\t\t\t\t<DataType>" + dataType + " </DataType> \n";
         out << "\t\t\t\t\t\t\t\t</IntendedDataTypeSet>\n";
         out << "\t\t\t\t\t\t\t</ProjectTypeSubmission>\n";
         out << "\t\t\t\t\t\t</ProjectType>\n";
@@ -381,7 +376,7 @@ int SRACommand::execute(){
         out << "\t\t\t\t</XmlContent>\n";
         out << "\t\t\t</Data>\n";
         out << "\t\t\t<Identifier>\n";
-        ////////////////////////////out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + ProjectID + " </SPUID>\n";
+        out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
         out << "\t\t\t</Identifier>\n";
         out << "\t\t</AddData>\n";
         out << "\t</Action>\n";
@@ -391,102 +386,140 @@ int SRACommand::execute(){
         ////////////////////////////////////////////////////////
         for (int i = 0; i < Groups.size(); i++) {
             
-            vector<string> thisGroupsFiles = filesBySample[Groups[i]];
             string barcodeForThisSample = Group2Barcode[Groups[i]];
             
-            for (int j = 0; j < thisGroupsFiles.size(); j++) {
-                if (m->control_pressed) { break; }
-                out << "\t<Action>\n";
-                out << "\t\t<AddData target_db=\"BioSample\">\n";
-                out << "\t\t\t<Data content_type=\"XML\">\n";
-                out << "\t\t\t\t<XmlContent>\n";
-                out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
-                out << "\t\t\t\t\t\t<SampleId>\n";
-                out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + Groups[i] + " </SPUID> \n";
-                out << "\t\t\t\t\t\t</SampleId>\n";
-                out << "\t\t\t\t\t\t<Descriptor>\n";
-                ////////////////////out << "\t\t\t\t\t\t\t<Title>" + title + " </Title> \n";
-                out << "\t\t\t\t\t\t</Descriptor>\n";
-                out << "\t\t\t\t\t\t<Organism>\n";
-                ////////////////////out << "\t\t\t\t\t\t\t<OrganismName>" + scientificName + " </OrganismName> \n";
-                out << "\t\t\t\t\t\t</Organism>\n";
-                out << "\t\t\t\t\t\t<BioProject>\n";
-                ///////////////////////out << "\t\t\t\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + BioProject + " </SPUID> \n";
-                out << "\t\t\t\t\t\t</BioProject>\n";
-                out << "\t\t\t\t\t\t<Package>MIMARKS.specimen</Package>n";
-                out << "\t\t\t\t\t\t<Attributes>n";
-                //add biosample required attributes
-                ///////////////////////////////////////////////////////////////////////
-                
-                out << "\t\t\t\t\t\t</Attributes>n";
-                out << "\t\t\t\t\t</BioSample>\n";
-                out << "\t\t\t\t</XmlContent>\n";
-                out << "\t\t\t</Data>\n";
-                
-                //libID
-                out << "\t\t\t<Identifier>\n";
-                string libId = thisGroupsFiles[j] + barcodeForThisSample;
-                if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
-                    vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
-                    libId = pieces[0] + barcodeForThisSample;
+            if (m->control_pressed) { break; }
+            out << "\t<Action>\n";
+            out << "\t\t<AddData target_db=\"BioSample\">\n";
+            out << "\t\t\t<Data content_type=\"XML\">\n";
+            out << "\t\t\t\t<XmlContent>\n";
+            out << "\t\t\t\t\t<BioSample schema_version=\"2.0\">\n";
+            out << "\t\t\t\t\t\t<SampleId>\n";
+            out << "\t\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID> \n";
+            out << "\t\t\t\t\t\t</SampleId>\n";
+            out << "\t\t\t\t\t\t<Organism>\n";
+            string organismName = "metagenome";
+            map<string, string>::iterator itOrganism = Group2Organism.find(Groups[i]);
+            if (itOrganism != Group2Organism.end()) { organismName = itOrganism->second; } //user supplied acceptable organism, so use it.
+            out << "\t\t\t\t\t\t\t<OrganismName>" + organismName + " </OrganismName> \n";
+            out << "\t\t\t\t\t\t</Organism>\n";
+            out << "\t\t\t\t\t\t<Package>" + packageType + "</Package>n";
+            out << "\t\t\t\t\t\t<Attributes>n";
+            //add biosample required attributes
+            map<string, map<string, string> >:: iterator it = mimarks.find(Groups[i]);
+            if (it != mimarks.end()) {
+                map<string, string> categories = it->second;
+                for (map<string, string>:: iterator it2 = categories.begin(); it2 != categories.end(); it2++) {
+                    if (m->control_pressed) { break; }
+                    out << "\t\t\t\t\t\t\t<Attribute attribute_name=\"" + it2->first + "\">\"" + it2->second + "\"</Attribute>\n";
                 }
-                out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + libId + " </SPUID>\n";
-                out << "\t\t\t</Identifier>\n";
-                
-                out << "\t\t</AddData>\n";
-                out << "\t</Action>\n";
             }
+            out << "\t\t\t\t\t\t</Attributes>n";
+            out << "\t\t\t\t\t</BioSample>\n";
+            out << "\t\t\t\t</XmlContent>\n";
+            out << "\t\t\t</Data>\n";
+            out << "\t\t\t<Identifier>\n";
+            out << "\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
+            out << "\t\t\t</Identifier>\n";
+            out << "\t\t</AddData>\n";
+            out << "\t</Action>\n";
         }
         
+        //File objects
+        ////////////////////////////////////////////////////////
         for (int i = 0; i < Groups.size(); i++) {
             
             vector<string> thisGroupsFiles = filesBySample[Groups[i]];
             string barcodeForThisSample = Group2Barcode[Groups[i]];
             
             for (int j = 0; j < thisGroupsFiles.size(); j++) {
-            if (m->control_pressed) { break; }
+                string libId = thisGroupsFiles[j] + "." + barcodeForThisSample;
+                
+                if (m->control_pressed) { break; }
                 out << "\t<Action>\n";
                 out << "\t\t<AddFiles target_db=\"SRA\">\n";
                 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
                     vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
+                    libId = pieces[0] + barcodeForThisSample;
                     out << "\t\t\t<File file_path=\"" + pieces[0] + "\">\n";
-                    ////////////////////out << "\t\t\t\t<DataType>fastq</DataType> \n";  //since its paired we know its fastq, is the dataType the fileType???
+                    out << "\t\t\t\t<DataType>generic-data</DataType> \n";
                     out << "\t\t\t</File>\n";
+                    vector<string> thisBarcodes; m->splitAtChar(Group2Barcode[Groups[i]], thisBarcodes, '.');
+                    string forwardBarcode = thisBarcodes[0];
+                    string reverseBarcode = thisBarcodes[1];
+                    vector<string> thisPrimers; m->splitAtChar(Group2Primer[Groups[i]], thisPrimers, '.');
+                    string forwardPrimer = thisPrimers[0];
+                    string reversePrimer = thisPrimers[1];
+                    //attributes
+                    out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"BarCode\">" + forwardBarcode + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"primer\">" + forwardPrimer + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"read_type\">forward</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
+
                     out << "\t\t\t<File file_path=\"" + pieces[1] + "\">\n";
-                    ////////////////////out << "\t\t\t\t<DataType>fastq</DataType> \n";  //since its paired we know its fastq, is the dataType the fileType???
+                    out << "\t\t\t\t<DataType>generic-data</DataType> \n";
                     out << "\t\t\t</File>\n";
+                    out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"BarCode\">" + reverseBarcode + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"primer\">" + reversePrimer + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"read_type\">reverse</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
+
                 }else { //single
                     out << "\t\t\t<File file_path=\"" + thisGroupsFiles[j] + "\">\n";
-                    string dataType = "fastq";
-                    if (isSFF) { dataType = "sff"; }
-                    ////////////////////out << "\t\t\t\t<DataType>" + dataType + " </DataType> \n";  //is the dataType the fileType???
+                    out << "\t\t\t\t<DataType>generic-data</DataType> \n";
                     out << "\t\t\t</File>\n";
+                    //attributes
+                    out << "\t\t\t<Attribute name=\"title\">" + mimarks[Groups[i]]["title"] + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"BarCode\">" + Group2Barcode[Groups[i]] + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"primer\">" + Group2Primer[Groups[i]] + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"read_type\">" + orientation + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_name\">" + libId + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
+                    out << "\t\t\t<Attribute name=\"library_construction_protocol\">" + mimarks[Groups[i]]["seq_methods"] + "</Attribute>\n";
+
                 }
-                //attributes
-                out << "\t\t\t<Attribute name=\"instrument_model\">" + instrumentModel + "</Attribute>\n";
-                out << "\t\t\t<Attribute name=\"library_strategy\">" + libStrategy + "</Attribute>\n";
-                out << "\t\t\t<Attribute name=\"library_source\">" + libSource + "</Attribute>\n";
-                out << "\t\t\t<Attribute name=\"library_selection\">" + libSelection + "</Attribute>\n";
-                out << "\t\t\t<Attribute name=\"library_layout\">" + libLayout + "</Attribute>\n";
-                
-                //////////////////bioSample info
                 ///////////////////bioProject info
-                
+                out << "\t\t\t<AttributeRefId name=\"BioProject\">\n";
+                out << "\t\t\t\t<RefId>\n";
+                out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + projectName + " </SPUID> \n";
+                out << "\t\t\t\t</RefId>\n";
+                out << "\t\t\t</AttributeRefId>\n";
+                //////////////////bioSample info
+                out << "\t\t\t<AttributeRefId name=\"BioSample\">\n";
+                out << "\t\t\t\t<RefId>\n";
+                out << "\t\t\t\t\t<SPUID spuid_namespace=\"" + centerName + "\">" + Groups[i] + " </SPUID>\n";
+                out << "\t\t\t\t</RefId>\n";
+                out << "\t\t\t</AttributeRefId>\n";
                 //libID
                 out << "\t\t\t<Identifier>\n";
-                string libId = thisGroupsFiles[j] + barcodeForThisSample;
                 if (libLayout == "paired") { //adjust the libID because the thisGroupsFiles[j] contains two filenames
                     vector<string> pieces = m->splitWhiteSpace(thisGroupsFiles[j]);
                     libId = pieces[0] + barcodeForThisSample;
                 }
-                out << "\t\t\t\t<SPUID spuid_namespace=\"Institute name\">" + libId + " </SPUID>\n";
+                out << "\t\t\t\t<LocalId>" + libId + " </LocalId>\n";
                 out << "\t\t\t</Identifier>\n";
                 out << "\t\t</AddFiles>\n";
                 out << "\t</Action>\n";
             }
         }
-        
-        ////////////////////////////////////////////////////////
         out << "</Submission>\n";
         out.close();
         
@@ -508,7 +541,8 @@ int SRACommand::execute(){
 //**********************************************************************************************************************
 int SRACommand::readContactFile(){
        try {
-        lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = "";
+        lastName = ""; firstName = ""; submissionName = ""; email = ""; centerName = ""; centerType = ""; description = ""; website = ""; projectName = "";
+        projectTitle = ""; grantAgency = ""; grantId = ""; grantTitle = "";
         
         ifstream in;
         m->openInputFile(contactfile, in);
@@ -523,28 +557,36 @@ int SRACommand::readContactFile(){
             
             for (int i = 0; i < key.length(); i++) { key[i] = toupper(key[i]); }
             
-            if (key == "USERNAME")       {   submissionName = value; }
-            else if (key == "LAST")        {   lastName = value;       }
-            else if (key == "FIRST")       {   firstName = value;      }
-            else if (key == "EMAIL")            {   email = value;          }
-            else if (key == "CENTER")      {   centerName = value;     }
-            else if (key == "TYPE")      {
+            if (key == "USERNAME")          {   submissionName = value; }
+            else if (key == "LAST")         {   lastName = value;       }
+            else if (key == "FIRST")        {   firstName = value;      }
+            else if (key == "EMAIL")        {   email = value;          }
+            else if (key == "CENTER")       {   centerName = value;     }
+            else if (key == "TYPE")         {
                 centerType = value;
                 for (int i = 0; i < centerType.length(); i++) { centerType[i] = tolower(centerType[i]); }
                 if ((centerType == "consortium") || (centerType == "center") ||  (centerType == "institute") ||  (centerType == "lab")) {}
                 else { m->mothurOut("[ERROR]: " + centerType + " is not a center type option.  Valid center type options are consortium, center, institute and lab. This is a controlled vocabulary section in the XML file that will be generated."); m->mothurOutEndLine(); m->control_pressed = true; }
             }else if (key == "DESCRIPTION")     {   description = value;    }
+            else if (key == "WEBSITE")          {   website = value;        }
+            else if (key == "PROJECTNAME")      {   projectName = value;    }
+            else if (key == "PROJECTTITLE")     {   projectTitle = value;   }
+            else if (key == "GRANTID")          {   grantId = value;        }
+            else if (key == "GRANTTITLE")       {   grantTitle = value;     }
+            else if (key == "GRANTAGENCY")      {   grantAgency = value;    }
         }
         in.close();
         
-        if (lastName == "") { m->mothurOut("[ERROR]: missing last name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
-        if (firstName == "") { m->mothurOut("[ERROR]: missing first name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
-        if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
-        if (email == "") { m->mothurOut("[ERROR]: missing email from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
-        if (centerName == "") { m->mothurOut("[ERROR]: missing center name from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
-        if (centerType == "") { m->mothurOut("[ERROR]: missing center type from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
-        if (description == "") { m->mothurOut("[ERROR]: missing description from contacts file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
-        
+        if (lastName == "") { m->mothurOut("[ERROR]: missing last name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+        if (firstName == "") { m->mothurOut("[ERROR]: missing first name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+        if (submissionName == "") { m->mothurOut("[ERROR]: missing submission name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+        if (email == "") { m->mothurOut("[ERROR]: missing email from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+        if (centerName == "") { m->mothurOut("[ERROR]: missing center name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+        if (centerType == "") { m->mothurOut("[ERROR]: missing center type from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+        if (description == "") { m->mothurOut("[ERROR]: missing description from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+        if (projectTitle == "") { m->mothurOut("[ERROR]: missing project title from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+        if (projectName == "") { m->mothurOut("[ERROR]: missing project name from project file, quitting."); m->mothurOutEndLine(); m->control_pressed = true; }
+
         return 0;
     }
        catch(exception& e) {
@@ -552,6 +594,169 @@ int SRACommand::readContactFile(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+//air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water
+//all packages require: *sample_name   *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon
+//air: *altitude
+//host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, plant_associated: *host
+//microbial, sediment, soil: *depth    *elev
+//water: *depth
+int SRACommand::readMIMarksFile(){
+       try {
+        //acceptable organisms
+        vector<string> acceptableOrganisms;
+        bool organismError = false;
+        //ecological
+        acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("air metagenome"); acceptableOrganisms.push_back("anaerobic digester metagenome"); acceptableOrganisms.push_back("ant fungus garden metagenome"); acceptableOrganisms.push_back("aquatic metagenome"); acceptableOrganisms.push_back("activated carbon metagenome"); acceptableOrganisms.push_back("activated sludge metagenome"); acceptableOrganisms.push_back("beach sand metagenome"); acceptableOrganisms.push_back("biofilm metagenome"); acceptableOrganisms.push_back("biofilter metagenome"); acceptableOrganisms.push_back("biogas fermenter metagenome"); acceptableOrganisms.push_back("bioreactor metagenome"); acceptableOrganisms.push_back("bioreactor sludge metagenome"); acceptableOrganisms.push_back("clinical metagenome"); acceptableOrganisms.push_back("coal metagenome"); acceptableOrganisms.push_back("compost metagenome"); acceptableOrganisms.push_back("dust metagenome"); acceptableOrganisms.push_back("fermentation metagenome"); acceptableOrganisms.push_back("food fermentation metagenome"); acceptableOrganisms.push_back("food metagenome"); acceptableOrganisms.push_back("freshwater metagenome"); acceptableOrganisms.push_back("freshwater sediment metagenome"); acceptableOrganisms.push_back("groundwater metagenome"); acceptableOrganisms.push_back("halite metagenome"); acceptableOrganisms.push_back("hot springs metagenome"); acceptableOrganisms.push_back("hydrocarbon metagenome"); acceptableOrganisms.push_back("hydrothermal vent metagenome"); acceptableOrganisms.push_back("hypersaline lake metagenome"); acceptableOrganisms.push_back("ice metagenome"); acceptableOrganisms.push_back("indoor metagenome"); acceptableOrganisms.push_back("industrial waste metagenome"); acceptableOrganisms.push_back("mangrove metagenome"); acceptableOrganisms.push_back("marine metagenome"); acceptableOrganisms.push_back("marine sediment metagenome"); acceptableOrganisms.push_back("microbial mat metagenome"); acceptableOrganisms.push_back("mine drainage metagenome"); acceptableOrganisms.push_back("mixed culture metagenome"); acceptableOrganisms.push_back("oil production facility metagenome"); acceptableOrganisms.push_back("paper pulp metagenome"); acceptableOrganisms.push_back("permafrost metagenome"); acceptableOrganisms.push_back("plastisphere metagenome"); acceptableOrganisms.push_back("power plant metagenome"); acceptableOrganisms.push_back("retting rhizosphere metagenome"); acceptableOrganisms.push_back("rock metagenome"); acceptableOrganisms.push_back("salt lake metagenome"); acceptableOrganisms.push_back("saltern metagenome"); acceptableOrganisms.push_back("sediment metagenome"); acceptableOrganisms.push_back("snow metagenome"); acceptableOrganisms.push_back("soil metagenome"); acceptableOrganisms.push_back("stromatolite metagenome"); acceptableOrganisms.push_back("terrestrial metagenome"); acceptableOrganisms.push_back("tomb wall metagenome"); acceptableOrganisms.push_back("wastewater metagenome"); acceptableOrganisms.push_back("wetland metagenome"); acceptableOrganisms.push_back("whale fall metagenome");
+        //oganismal
+        acceptableOrganisms.push_back("algae metagenome"); acceptableOrganisms.push_back("ant metagenome"); acceptableOrganisms.push_back("bat metagenome"); acceptableOrganisms.push_back("beetle metagenome"); acceptableOrganisms.push_back("bovine gut metagenome"); acceptableOrganisms.push_back("bovine metagenome"); acceptableOrganisms.push_back("chicken gut metagenome"); acceptableOrganisms.push_back("coral metagenome"); acceptableOrganisms.push_back("echinoderm metagenome"); acceptableOrganisms.push_back("endophyte metagenome"); acceptableOrganisms.push_back("epibiont metagenome"); acceptableOrganisms.push_back("fish metagenome"); acceptableOrganisms.push_back("fossil metagenome"); acceptableOrganisms.push_back("gill metagenome"); acceptableOrganisms.push_back("gut metagenome"); acceptableOrganisms.push_back("honeybee metagenome"); acceptableOrganisms.push_back("human gut metagenome"); acceptableOrganisms.push_back("human lung metagenome"); acceptableOrganisms.push_back("human metagenome"); acceptableOrganisms.push_back("human nasal/pharyngeal metagenome"); acceptableOrganisms.push_back("human oral metagenome"); acceptableOrganisms.push_back("human skin metagenome"); acceptableOrganisms.push_back("insect gut metagenome"); acceptableOrganisms.push_back("insect metagenome"); acceptableOrganisms.push_back("mollusc metagenome"); acceptableOrganisms.push_back("mosquito metagenome"); acceptableOrganisms.push_back("mouse gut metagenome"); acceptableOrganisms.push_back("mouse metagenome"); acceptableOrganisms.push_back("mouse skin metagenome"); acceptableOrganisms.push_back("nematode metagenome"); acceptableOrganisms.push_back("oral metagenome"); acceptableOrganisms.push_back("phyllosphere metagenome"); acceptableOrganisms.push_back("pig metagenome"); acceptableOrganisms.push_back("plant metagenome"); acceptableOrganisms.push_back("primate metagenome"); acceptableOrganisms.push_back("rat metagenome"); acceptableOrganisms.push_back("root metagenome"); acceptableOrganisms.push_back("sea squirt metagenome"); acceptableOrganisms.push_back("seed metagenome"); acceptableOrganisms.push_back("shoot metagenome"); acceptableOrganisms.push_back("skin metagenome"); acceptableOrganisms.push_back("snake metagenome"); acceptableOrganisms.push_back("sponge metagenome"); acceptableOrganisms.push_back("stomach metagenome"); acceptableOrganisms.push_back("symbiont metagenome"); acceptableOrganisms.push_back("termite gut metagenome"); acceptableOrganisms.push_back("termite metagenome"); acceptableOrganisms.push_back("upper respiratory tract metagenome"); acceptableOrganisms.push_back("urine metagenome"); acceptableOrganisms.push_back("viral metagenome"); acceptableOrganisms.push_back("wallaby gut metagenome"); acceptableOrganisms.push_back("wasp metagenome"); acceptableOrganisms.push_back("sythetic metagenome"); acceptableOrganisms.push_back("metagenome");
+        
+        vector<string> requiredFieldsForPackage;
+        requiredFieldsForPackage.push_back("sample_name"); requiredFieldsForPackage.push_back("organism");
+        requiredFieldsForPackage.push_back("collection_date"); requiredFieldsForPackage.push_back("biome");
+        requiredFieldsForPackage.push_back("feature"); requiredFieldsForPackage.push_back("material");
+        requiredFieldsForPackage.push_back("geo_loc_name"); requiredFieldsForPackage.push_back("lat_lon");
+        requiredFieldsForPackage.push_back("seq_methods"); requiredFieldsForPackage.push_back("title");
+        vector<string> chooseAtLeastOneForPackage;
+        
+        ifstream in;
+        m->openInputFile(mimarksfile, in);
+        
+        //read comments
+        string temp; packageType = "";
+        while(!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            temp = m->getline(in); m->gobble(in);
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
+            
+            if (temp[0] == '#') {
+                int pos = temp.find("Environmental");
+                if (pos != string::npos) {
+                    for (int i = pos+14; i < temp.length(); i++) {
+                        if (!isspace(temp[i])) { packageType += temp[i]; }
+                        else { i+= temp.length(); }
+                    }
+                }
+            }
+            else{ break; } //hit headers line
+         }
+        
+        vector<string> headers; m->splitAtChar(temp, headers, '\t');
+        m->removeBlanks(headers);
+        //remove * from required's
+        for (int i = 0; i < headers.size(); i++) {
+            if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); }
+            if (headers[i][0] == '*') { headers[i] = headers[i].substr(1); chooseAtLeastOneForPackage.push_back(headers[i]); }  //secondary condition
+            if (m->debug) { m->mothurOut("[DEBUG]: " + headers[i] + "\n"); }
+        }
+        
+        if (m->debug) {  m->mothurOut("[DEBUG]: packageType = '" + packageType + "'\n");   }
+        
+        //check to make sure package has all its required parts
+        //MIMARKS.specimen.water.3.0
+        if (packageType == "MIMARKS.specimen.air.3.0") {   requiredFieldsForPackage.push_back("altitude");  }
+        else if ((packageType == "MIMARKS.specimen.host-associated.3.0") || (packageType == "MIMARKS.specimen.human-associated.3.0") || (packageType == "MIMARKS.specimen.human-gut.3.0") || (packageType == "MIMARKS.specimen.human-oral.3.0") || (packageType == "MIMARKS.specimen.human-skin.3.0") || (packageType == "MIMARKS.specimen.human-vaginal.3.0") || (packageType == "MIMARKS.specimen.plant-associated.3.0")) {  requiredFieldsForPackage.push_back("host");  }
+        else if ((packageType == "MIMARKS.specimen.microbial.3.0") || (packageType == "MIMARKS.specimen.sediment.3.0") || (packageType == "soil")) {   requiredFieldsForPackage.push_back("depth");  requiredFieldsForPackage.push_back("elev"); }
+        else if (packageType == "MIMARKS.specimen.water.3.0") {   requiredFieldsForPackage.push_back("depth");  }
+        else if ((packageType == "MIMARKS.specimen.miscellaneous.3.0") || (packageType == "wastewater")) { }
+        else {
+            m->mothurOut("[ERROR]: unknown package " + packageType + ", please correct.\n"); m->control_pressed = true; in.close(); return 0;
+        }
+        
+        if (!m->isSubset(headers, requiredFieldsForPackage)){
+            string requiredFields = "";
+            for (int i = 0; i < requiredFieldsForPackage.size()-1; i++) { requiredFields += requiredFieldsForPackage[i] + ", "; } requiredFields += requiredFieldsForPackage[requiredFieldsForPackage.size()-1];
+            m->mothurOut("[ERROR]: missing required fields for package, please correct. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
+        }
+        
+        if (m->debug) {  m->mothurOut("[DEBUG]: chooseAtLeastOneForPackage.size() = " + toString(chooseAtLeastOneForPackage.size()) + "\n");   }
+        
+        if (!m->inUsersGroups(chooseAtLeastOneForPackage, headers)){ //returns true if any of the choose at least ones are in headers
+            string requiredFields = "";
+            for (int i = 0; i < chooseAtLeastOneForPackage.size()-1; i++) { requiredFields += chooseAtLeastOneForPackage[i] + ", "; cout << chooseAtLeastOneForPackage[i] << endl; }
+            if (chooseAtLeastOneForPackage.size() < 1) { requiredFields += chooseAtLeastOneForPackage[chooseAtLeastOneForPackage.size()-1]; }
+            m->mothurOut("[ERROR]: missing a choose at least one fields for the package, please correct. These are marked with '**'. Required fields are " + requiredFields + ".\n"); m->control_pressed = true; in.close(); return 0;
+        }
+        
+        map<string, bool> allNA;  for (int i = 1; i < headers.size(); i++) {  allNA[headers[i]] = true; }
+        while(!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            temp = m->getline(in);  m->gobble(in);
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: " + temp + "\n"); }
+            
+            string original = temp;
+            vector<string> linePieces; m->splitAtChar(temp, linePieces, '\t');
+            m->removeBlanks(linePieces);
+            
+            if (linePieces.size() != headers.size()) { m->mothurOut("[ERROR]: line: " + original + " contains " + toString(linePieces.size()) + " columns, but you have " + toString(headers.size()) + " column headers, please correct.\n"); m->control_pressed = true; }
+            else {
+                map<string, map<string, string> >:: iterator it = mimarks.find(linePieces[0]);
+                
+                if (it == mimarks.end()) {
+                    map<string, string> categories;
+                    //start after *sample_name
+                    for (int i = 1; i < headers.size(); i++) {
+                        categories[headers[i]] = linePieces[i];
+                        //check the users inputs for appropriate organisms
+                        if (headers[i] == "organism") {
+                            if (!m->inUsersGroups(linePieces[i], acceptableOrganisms)) { //not an acceptable organism
+                                organismError = true;
+                                m->mothurOut("[WARNING]: " + linePieces[i]+ " is not an acceptable organism, changing to metagenome. You can correct the issue and rerun the command, or NCBI will allow you to modify the organism after submission.\n"); linePieces[i] = "metagenome"; categories[headers[i]] = linePieces[i];
+                            }
+                            Group2Organism[linePieces[0]] = linePieces[i];
+                        }
+                        if (linePieces[i] != "NA") {  allNA[headers[i]] = false;     }
+                    }
+                    
+                    //does this sample already match an existing sample?
+                    bool isOkaySample = true;
+                    for (map<string, map<string, string> >:: iterator it2 = mimarks.begin(); it2 != mimarks.end(); it2++) {
+                        if (m->control_pressed) { break; }
+                        bool allSame = true;
+                        for (int i = 1; i < headers.size(); i++) {
+                            if ((it2->second)[headers[i]] != categories[headers[i]]) { allSame = false; }
+                        }
+                        if (allSame) { m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sample to " + it2->first + ". It has all the same attributes in the MIMarks file. Samples must have distinguishing features to be uploaded to the NCBI library, please correct.\n"); m->control_pressed = true; isOkaySample = false; }
+                    }
+                    if (isOkaySample) { mimarks[linePieces[0]] = categories; }
+                }else {
+                    m->mothurOut("[ERROR]: " + linePieces[0]+ " is a duplicate sampleName. Sample names must be unique, please correct.\n"); m->control_pressed = true;
+                }
+            }
+        }
+        in.close();
+        
+        //add in values for "scrap" group
+        map<string, string> categories;
+        //start after *sample_name
+        for (int i = 1; i < headers.size(); i++) {
+            categories[headers[i]] = "NA";
+            if (headers[i] == "organism")       { categories[headers[i]] = "metagenome"; }
+            if (headers[i] == "seq_methods")    { categories[headers[i]] = "these sequences were scrapped"; }
+            if (headers[i] == "title")          { categories[headers[i]] = "these sequences were scrapped"; }
+        }
+        mimarks["scrap"] = categories;
+        Group2Organism["scrap"] = "metagenome";
+        
+        if (organismError) {
+            string organismTypes = "";
+            for (int i = 0; i < acceptableOrganisms.size()-1; i++) { organismTypes += acceptableOrganisms[i] + ", "; }
+            organismTypes += acceptableOrganisms[acceptableOrganisms.size()-1];
+            m->mothurOut("[WARNING]: The acceptable organism choices are: " + organismTypes + ".\n");
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "readMIMarksFile");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
 // going to have to rework this to allow for other options --
@@ -579,7 +784,7 @@ int SRACommand::readContactFile(){
 
 int SRACommand::readFile(map<string, vector<string> >& files){
        try {
-        vector<string> theseFiles;
+        //vector<string> theseFiles;
         inputfile = file;
         files.clear();
         
@@ -641,8 +846,10 @@ int SRACommand::readFile(map<string, vector<string> >& files){
                 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
             }else{  in2.close();  }
             
+            int openReverse = 1;
+            
             ifstream in3;
-            int openReverse = m->openInputFile(thisFileName2, in3, "noerror");
+            openReverse = m->openInputFile(thisFileName2, in3, "noerror");
             
             //if you can't open it, try default location
             if (openReverse == 1) {
@@ -671,22 +878,27 @@ int SRACommand::readFile(map<string, vector<string> >& files){
             if (openReverse == 1) { //can't find it
                 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
             }else{  in3.close();  }
-            
-            
+           
             
             if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
                 //process pair
-                int pos = theseFiles[0].find(".sff");
+                int pos = thisFileName1.find(".sff");
                 if (pos != string::npos) {//these files are sff files
                     isSFF = true;
                     sfffile = thisFileName1; oligosfile = thisFileName2;
+                    if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
                     readOligos();
+                    if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
                     parseSffFile(files);
+                    if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + sfffile + "\n"); }
                 }else{
                     isSFF = false;
                     fastqfile = thisFileName1; oligosfile = thisFileName2;
+                    if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
                     readOligos();
+                    if (m->debug) { m->mothurOut("[DEBUG]: about to parse\n"); }
                     parseFastqFile(files);
+                    if (m->debug) { m->mothurOut("[DEBUG]: done parsing " + fastqfile + "\n"); }
                 }
                 
             }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
@@ -719,16 +931,15 @@ int SRACommand::parseSffFile(map<string, vector<string> >& files){
         isSFF = true;
         //run sffinfo to parse sff file into individual sampled sff files
         string commandString = "sff=" + sfffile;
-        if (groupfile != "") { commandString += ", group=" + groupfile; }
-        else if (oligosfile != "") {
-            commandString += ", oligos=" + oligosfile;
-            //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
-            if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
-            if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
-            if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
-            if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
-            if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
-        }
+        
+        commandString += ", oligos=" + oligosfile;
+        //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+        if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+        if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+        if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+        if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+        if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+        
         m->mothurOutEndLine();
         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
         m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
@@ -765,16 +976,15 @@ int SRACommand::parseFastqFile(map<string, vector<string> >& files){
         
         //run sffinfo to parse sff file into individual sampled sff files
         string commandString = "fastq=" + fastqfile;
-        if (groupfile != "") { commandString += ", group=" + groupfile; }
-        else if (oligosfile != "") {
-            commandString += ", oligos=" + oligosfile;
-            //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
-            if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
-            if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
-            if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
-            if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
-            if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
-        }
+        
+        commandString += ", oligos=" + oligosfile;
+        //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+        if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+        if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+        if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+        if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+        if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+       
         m->mothurOutEndLine();
         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
         m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
@@ -865,7 +1075,12 @@ int SRACommand::readOligos(){
                
                string type, oligo, roligo, group;
         bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
-        
+        map<int, oligosPair> pairedBarcodes;
+        map<int, oligosPair> pairedPrimers;
+        map<string, int> barcodes;
+        map<string, int> primers;
+        vector<string>  linker;
+        vector<string>  spacer, revPrimer;
                int indexPrimer = 0;
                int indexBarcode = 0;
         int indexPairedPrimer = 0;
@@ -1055,6 +1270,7 @@ int SRACommand::readOligos(){
                         }
                         uniqueNames.insert(comboGroupName);
                         Group2Barcode[comboGroupName] = (itBar->second).forward+"."+(itBar->second).reverse;
+                        Group2Primer[comboGroupName] = (itPrimer->second).forward+"."+(itPrimer->second).reverse;
                     }
                 }
             }
@@ -1083,6 +1299,7 @@ int SRACommand::readOligos(){
                         }
                         uniqueNames.insert(comboGroupName);
                         Group2Barcode[comboGroupName] = itBar->first;
+                        Group2Primer[comboGroupName] = itPrimer->first;
                     }
                 }
             }
@@ -1270,6 +1487,7 @@ bool SRACommand::checkCasesLibStrategy(string& libStrategy){
                exit(1);
        }
 }
+
 //**********************************************************************************************************************
 //METAGENOMIC,GENOMIC,TRANSCRIPTOMIC,METATRANSCRIPTOMIC,SYNTHETIC,VIRAL_RNA,OTHER
 bool SRACommand::checkCasesLibSource(string& libSource){
@@ -1336,5 +1554,55 @@ bool SRACommand::checkCasesLibSelection(string& libSelection){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+//METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER
+bool SRACommand::checkCasesDataType(string& dataType){
+       try {
+        string original = dataType;
+        bool isOkay = true;
+        
+        //remove users possible case errors
+        for (int i = 0; i < dataType.size(); i++) { dataType[i] = toupper(dataType[i]); }
+        
+        if ((dataType == "METAGENOME") || (dataType == "GENOME_SEQUENCING") || (dataType == "METAGENOMIC_ASSEMBLY") || (dataType == "ASSEMBLY") || (dataType == "TRANSCRIPTOME") || (dataType == "PROTEOMIC") || (dataType == "MAP") || (dataType == "CLONE_ENDS") || (dataType == "TARGETED_LOCI") || (dataType == "RANDOM_SURVEY") || (dataType == "EXOME") || (dataType == "VARIATION") || (dataType == "EPIGENOMICS") || (dataType == "PHENOTYPE") || (dataType == "GENOTYPE") || (dataType == "OTHER")) { }
+        else { isOkay = false; }
+        
+        if (isOkay) {
+            
+        }else {
+            m->mothurOut("[ERROR]: " + original + " is not a valid datatype option.  Valid datatype options are METAGENOME,GENOME_SEQUENCING,METAGENOMIC_ASSEMBLY,ASSEMBLY,TRANSCRIPTOME,PROTEOMIC,MAP,CLONE_ENDS,TARGETED_LOCI,RANDOM_SURVEY,EXOME,VARIATION,EPIGENOMICS,PHENOTYPE,GENOTYPE,OTHER."); m->mothurOutEndLine(); abort = true;
+        }
+        
+        return isOkay;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "checkCasesDataType");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+bool SRACommand::sanityCheckMiMarksGroups(){
+       try {
+        bool isOkay = true;
+        
+        for (int i = 0; i < Groups.size(); i++) {
+            if (m->control_pressed) { break; }
+            
+            map<string, map<string, string> >::iterator it = mimarks.find(Groups[i]);
+            if (it == mimarks.end()) {
+                isOkay = false;
+                m->mothurOut("[ERROR]: MIMarks file is missing group " + Groups[i] + ", please correct.\n");
+            }
+        }
+        
+        if (!isOkay) { m->control_pressed = true; }
+        
+        return isOkay;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "sanityCheckMiMarksGroups");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
index 5cc371447ecb0b47605c642eb523afe7d2d7bac6..c53a17ddc038361c1dab613723eeef17b37e736b 100644 (file)
@@ -36,27 +36,28 @@ public:
 private:
     bool abort, isSFF, pairedOligos;
     int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
-    string sfffile, fastqfile, outputDir, groupfile, file, oligosfile, contactfile, inputfile;
-    string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType;
-    string submissionName, lastName, firstName, email, centerName, centerType, description;
-    vector<string> outputNames, Groups, revPrimer;
+    string sfffile, fastqfile, outputDir, file, oligosfile, contactfile, inputfile, mimarksfile;
+    string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType, dataType;
+    string submissionName, lastName, firstName, email, centerName, centerType, description, website, orientation, packageType;
+    string projectName, grantId, grantTitle, grantAgency, projectTitle;
+    vector<string> outputNames, Groups;
     vector<string> primerNameVector;
     vector<string> barcodeNameVector;
     map<string, string> Group2Barcode;
-    map<int, oligosPair> pairedBarcodes;
-    map<int, oligosPair> pairedPrimers;
-       map<string, int> barcodes;
-       map<string, int> primers;
-    vector<string>  linker;
-    vector<string>  spacer;
+    map<string, string> Group2Primer;
+    map<string, string> Group2Organism;
+    map<string, map<string, string> > mimarks;  //group -> <field -> valueForGroup> ex.  F003D001 -> <lat_lon -> 42.282026 -83.733850>
 
     bool checkCasesInstrumentModels(string&);
     bool checkCasesPlatforms(string&);
     bool checkCasesLibStrategy(string&);
     bool checkCasesLibSource(string&);
     bool checkCasesLibSelection(string&);
+    bool checkCasesDataType(string&);
+    bool sanityCheckMiMarksGroups();
     int readFile(map<string, vector<string> >&);
     int readContactFile();
+    int readMIMarksFile();
     int readOligos();
     int parseSffFile(map<string, vector<string> >&);
     int parseFastqFile(map<string, vector<string> >&);