]> git.donarmstrong.com Git - mothur.git/blob - getmimarkspackagecommand.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / getmimarkspackagecommand.cpp
1 //
2 //  getmimarkspackagecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/25/14.
6 //  Copyright (c) 2014 Schloss Lab. All rights reserved.
7 //
8
9 #include "getmimarkspackagecommand.h"
10 #include "groupmap.h"
11
12
13 //**********************************************************************************************************************
14 vector<string> GetMIMarksPackageCommand::setParameters(){
15         try {
16         //files that have dependancies
17         CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pgroup);
18         CommandParameter pfile("file", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pfile);
19         CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos);
20         CommandParameter ppackage("package", "Multiple", "air-host_associated-human_associated-human_gut-human_oral-human_skin-human_vaginal-microbial-miscellaneous-plant_associated-sediment-soil-wastewater-water", "miscellaneous", "", "", "","",false,false,true); parameters.push_back(ppackage);
21         CommandParameter prequiredonly("requiredonly", "Boolean", "", "F", "", "", "","",false,false, true); parameters.push_back(prequiredonly);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "GetMIMarksPackageCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string GetMIMarksPackageCommand::getHelpString(){
36         try {
37                 string helpString = "";
38                 helpString += "The get.mimarkspackage command creates a mimarks package form with your groups. The required fields are flagged with * characters. \n";
39         helpString += "Further documentation on the different packages and required formats can be found here, http://www.mothur.org/wiki/MIMarks_Data_Packages.\n";
40                 helpString += "The get.mimarkspackage command parameters are: oligos, group, package and requiredonly. oligos or group is required.\n";
41                 helpString += "The oligos parameter is used to provide your oligos file so mothur can extract your group names.\n";
42         helpString += "The group parameter is used to provide your group file so mothur can extract your group names.\n";
43         helpString += "The package parameter is used to select the mimarks package you would like to use. The choices are: air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or waterc. Default=miscellaneous.\n";
44         helpString += "The requiredonly parameter is used to indicate you only want the required mimarks feilds printed. Default=F.\n";
45                 helpString += "The get.mimarkspackage command should be in the following format: get.mimarkspackage(oligos=yourOligosFile, package=yourPackage)\n";
46                 helpString += "get.mimarkspackage(oligos=GQY1XT001.oligos, package=human_gut)\n";
47                 return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "GetMIMarksPackageCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string GetMIMarksPackageCommand::getOutputPattern(string type) {
56     try {
57         string pattern = "";
58         
59         if (type == "tsv") {  pattern = "[filename],tsv"; }
60         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
61         
62         return pattern;
63     }
64     catch(exception& e) {
65         m->errorOut(e, "GetMIMarksPackageCommand", "getOutputPattern");
66         exit(1);
67     }
68 }
69 //**********************************************************************************************************************
70 GetMIMarksPackageCommand::GetMIMarksPackageCommand(){
71         try {
72                 abort = true; calledHelp = true;
73                 setParameters();
74         vector<string> tempOutNames;
75                 outputTypes["tsv"] = tempOutNames;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "GetMIMarksPackageCommand", "GetMIMarksPackageCommand");
79                 exit(1);
80         }
81 }
82 //**********************************************************************************************************************
83 GetMIMarksPackageCommand::GetMIMarksPackageCommand(string option)  {
84         try {
85         
86                 abort = false; calledHelp = false;
87                 
88                 //allow user to run help
89                 if(option == "help") { help(); abort = true; calledHelp = true; }
90                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91                 
92                 else {
93                         //valid paramters for this command
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         map<string,string>::iterator it;
101                         //check to make sure all parameters are valid for command
102                         for (it = parameters.begin(); it != parameters.end(); it++) {
103                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
104                         }
105                         
106             vector<string> tempOutNames;
107                         outputTypes["tsv"] = tempOutNames;
108             
109                         //if the user changes the input directory command factory will send this info to us in the output parameter
110                         inputDir = validParameter.validFile(parameters, "inputdir", false);
111                         if (inputDir == "not found"){   inputDir = "";          }
112                         else {
113                 
114                                 string path;
115                                 it = parameters.find("oligos");
116                                 //user has given a template file
117                                 if(it != parameters.end()){
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
121                                 }
122                                 
123                                 it = parameters.find("group");
124                                 //user has given a template file
125                                 if(it != parameters.end()){
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
129                                 }
130                 
131                 it = parameters.find("file");
132                                 //user has given a template file
133                                 if(it != parameters.end()){
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                 }
137                                 
138             }
139             
140                         groupfile = validParameter.validFile(parameters, "group", true);
141                         if (groupfile == "not open") {  groupfile = "";  abort = true; }
142                         else if (groupfile == "not found") { groupfile = ""; }
143             else {  m->setGroupFile(groupfile); inputfile = groupfile; }
144             
145             file = validParameter.validFile(parameters, "file", true);
146                         if (file == "not open") {  file = "";  abort = true; }
147                         else if (file == "not found") { file = ""; }
148             else {  inputfile = file; }
149             
150             oligosfile = validParameter.validFile(parameters, "oligos", true);
151                         if (oligosfile == "not found")      {   oligosfile = "";        }
152                         else if(oligosfile == "not open")       {       abort = true;           }
153                         else {  m->setOligosFile(oligosfile); inputfile = oligosfile; }
154
155             if ((groupfile != "") && (oligosfile != "") && (file != "")) {
156                 m->mothurOut("[ERROR]: You may not use a group file, file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
157             }
158
159             if ((groupfile == "") && (oligosfile == "") && (file == "")) {
160                 oligosfile = m->getOligosFile();
161                 if (oligosfile != "") { inputfile = oligosfile;  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
162                 else {
163                     groupfile = m->getGroupFile();
164                     if (groupfile != "") { inputfile = groupfile;  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
165                     else {
166                         m->mothurOut("[ERROR]: You must provide file, groupfile or oligos file for the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true;
167                     }
168                 }
169             }
170             
171             package = validParameter.validFile(parameters, "package", false);         if (package == "not found") { package = "miscellaneous"; }
172             
173             if ((package == "air") || (package == "host_associated") || (package == "human_associated") || (package == "human_gut") || (package == "human_oral") || (package == "human_skin") || (package == "human_vaginal") || (package == "microbial") || (package == "miscellaneous") || (package == "plant_associated") || (package == "sediment") || (package == "soil") || (package == "wastewater") || (package == "water")) {}
174             else {
175                 m->mothurOut("[ERROR]: " + package + " is not a valid package selection. Choices are: air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water. Aborting.\n."); abort = true;
176             }
177             
178             string temp;
179                         temp = validParameter.validFile(parameters, "requiredonly", false);     if(temp == "not found"){        temp = "F";     }
180                         requiredonly = m->isTrue(temp);
181                 }
182                 
183         }
184         catch(exception& e) {
185                 m->errorOut(e, "GetMIMarksPackageCommand", "GetMIMarksPackageCommand");
186                 exit(1);
187         }
188 }
189 //**********************************************************************************************************************
190
191 int GetMIMarksPackageCommand::execute(){
192         try {
193                 
194                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
195         
196         if (oligosfile != "") { Oligos oligos(oligosfile); Groups = oligos.getGroupNames();  }
197         else if (file != "")  { readFile();     }
198         else {  GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); }
199         
200         if (outputDir == "") { outputDir += m->hasPath(inputfile); }
201         map<string, string> variables;
202                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
203                 string outputFileName = getOutputFileName("tsv", variables);
204                 
205         ofstream out;
206                 m->openOutputFile(outputFileName, out);
207                 outputNames.push_back(outputFileName); outputTypes["tsv"].push_back(outputFileName);
208         
209         out << "#This is a tab-delimited file. Additional Documentation can be found at http://www.mothur.org/wiki/MIMarks_Data_Packages." << endl;
210         out << "#Please fill all the required fields indicated with '*'" << endl;
211         out << "#Unknown or inapplicable fields can be assigned NA value." << endl;
212         out << "#You may add extra custom fields to this template. Make sure all the fields are separated by tabs." << endl;
213         out << "#You may remove any fields not required (marked with '*'). Make sure all the fields are separated by tabs." << endl;
214         out << "#You can edit this template using Microsoft Excel or any other editor. But while saving the file please make sure to save them as 'TAB-DELIMITED' TEXT FILE." << endl;
215         
216         if (package == "air") {
217             out << "#Environmental:MIMARKS.specimen.air.3.0" << endl;
218             if (requiredonly) {
219                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *altitude" << endl;
220             }else {
221                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *altitude       barometric_press        carb_dioxide    carb_monoxide   chem_administration     elev    humidity        methane misc_param      organism_count  oxygen  oxy_stat_samp   perturbation    pollutants      resp_part_matter        samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp solar_irradiance        temp    ventilation_rate        ventilation_type        volatile_org_comp       wind_direction  wind_speed" << endl;
222             }
223         }else if (package == "host_associated") {
224             out << "#Environmental:MIMARKS.specimen.host-associated.3.0" << endl;
225             if (requiredonly) {
226                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host   " << endl;
227             }else {
228                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods    rel_to_oxygen        samp_collect_device     samp_mat_process        *host   age     altitude        blood_press_diast       blood_press_syst        body_habitat    body_product    tissue  chem_administration     depth   diet    disease_stat    dry_mass        elev    family_relationship     genotype        gravidity       height_or_length        host_body_temp  host_color      host_growth_cond        host_shape      host_subject_id host_taxid      infra_specific_name     infra_specific_rank     last_meal       life_stage      misc_param      organism_count  oxy_stat_samp   perturbation    phenotype       samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     substrate       temp    tot_mass" << endl;
229             }
230         }else if (package == "human_associated") {
231             out << "#Environmental:MIMARKS.specimen.human-associated.3.0" << endl;
232             if (requiredonly) {
233                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
234             }else {
235                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   hiv_stat        ihmc_ethnicity  ihmc_medication_code    age     amniotic_fluid_color    foetal_health_stat      gestation_state maternal_health_stat    blood_blood_disord      body_product    tissue  body_mass_index chem_administration     diet    disease_stat    drug_usage      family_relationship     genotype        height  host_body_temp  host_subject_id last_meal       nose_throat_disord      pulmonary_disord        diet_last_six_month     medic_hist_perform      misc_param      occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pet_farm_animal pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     smoker  study_complt_stat       temp    tot_mass        travel_out_six_month    twin_sibling    urine_collect_meth      kidney_disord   urogenit_tract_disor    weight_loss_3_month" << endl;
236             }
237         }else if (package == "human_gut") {
238             out << "#Environmental:MIMARKS.specimen.human-gut.3.0" << endl;
239             if (requiredonly) {
240                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
241             }else {
242                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   ihmc_ethnicity  ihmc_medication_code    age     body_product    tissue  body_mass_index chem_administration     diet    disease_stat    family_relationship     gastrointest_disord     genotype        height  host_body_temp  host_subject_id last_meal       liver_disord    medic_hist_perform      misc_param      occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     special_diet    temp    tot_mass" << endl;
243             }
244         }else if (package == "human_oral") {
245             out << "#Environmental:MIMARKS.specimen.human-oral.3.0" << endl;
246             if (requiredonly) {
247                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
248             }else {
249                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   ihmc_ethnicity  ihmc_medication_code    age     body_product    tissue  body_mass_index chem_administration     diet    disease_stat    family_relationship     genotype        height  host_body_temp  host_subject_id last_meal       medic_hist_perform      misc_param      nose_mouth_teeth_throat_disord  occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     temp    time_last_toothbrush    tot_mass" << endl;
250             }
251         }else if (package == "human_skin") {
252             out << "#Environmental:MIMARKS.specimen.human-skin.3.0" << endl;
253             if (requiredonly) {
254                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
255             }else {
256                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   ihmc_ethnicity  ihmc_medication_code    age     body_product    tissue  body_mass_index chem_administration     dermatology_disord      diet    disease_stat    dominant_hand   family_relationship     genotype        height  host_body_temp  host_subject_id last_meal       medic_hist_perform      misc_param      occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     temp    time_since_last_wash    tot_mass" << endl;
257             }
258         }else if (package == "human_vaginal") {
259             out << "#Environmental:MIMARKS.specimen.human-vaginal.3.0" << endl;
260             if (requiredonly) {
261                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
262             }else {
263                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   hrt     ihmc_ethnicity  ihmc_medication_code    age     birth_control   body_product    tissue  body_mass_index chem_administration     diet    disease_stat    douche  family_relationship     genotype        gynecologic_disord      height  host_body_temp  host_subject_id hysterectomy    last_meal       medic_hist_perform      menarche        menopause       misc_param      occupation      organism_count  oxy_stat_samp   perturbation    phenotype       pregnancy       pulse   samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp sex     sexual_act      temp    tot_mass        urogenit_disord" << endl;
264             }
265         }else if (package == "microbial") {
266             out << "#Environmental:MIMARKS.specimen.microbial.3.0" << endl;
267             if (requiredonly) {
268                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *depth  *elev" << endl;
269             }else {
270                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *depth  *elev   alkalinity      alkyl_diethers  altitude        aminopept_act   ammonium        bacteria_carb_prod      biomass bishomohopanol  bromide calcium carb_nitro_ratio        chem_administration     chloride        chlorophyll     diether_lipids  diss_carb_dioxide       diss_hydrogen   diss_inorg_carb diss_org_carb   diss_org_nitro  diss_oxygen     glucosidase_act magnesium       mean_frict_vel  mean_peak_frict_vel     methane misc_param      n_alkanes       nitrate nitrite nitro   org_carb        org_matter      org_nitro       organism_count  oxy_stat_samp   ph      part_org_carb   perturbation    petroleum_hydrocarb     phaeopigments   phosphate       phosplipid_fatt_acid    potassium       pressure        redox_potential salinity        samp_size       samp_store_dur  samp_store_loc  samp_store_temp silicate        sodium  sulfate sulfide temp    tot_carb        tot_nitro       tot_org_carb    turbidity       water_content" << endl;
271             }
272         }else if (package == "miscellaneous") {
273             out << "#Environmental:MIMARKS.specimen.miscellaneous.3.0" << endl;
274             if (requiredonly) {
275                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *title  *seq_methods    *lat_lon" << endl;
276             }else {
277                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        alkalinity      altitude        ammonium        biomass bromide calcium chem_administration     chloride        chlorophyll     current density depth   diether_lipids  diss_carb_dioxide       diss_hydrogen   diss_inorg_carb diss_org_nitro  diss_oxygen     elev    misc_param      nitrate nitrite nitro   org_carb        org_matter      org_nitro       organism_count  oxy_stat_samp   ph      perturbation    phosphate       phosplipid_fatt_acid    potassium       pressure        salinity        samp_size       samp_store_dur  samp_store_loc  samp_store_temp silicate        sodium  sulfate sulfide temp" << endl;
278             }
279         }else if (package == "plant_associated") {
280             out << "#Environmental:MIMARKS.specimen.plant-associated.3.0" << endl;
281             if (requiredonly) {
282                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *host" << endl;
283             }else {
284                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *host   age     air_temp_regm   altitude        antibiotic_regm body_product    chem_administration     chem_mutagen    climate_environment     depth   disease_stat    dry_mass        elev    fertilizer_regm fungicide_regm  gaseous_environment     genotype        gravity growth_hormone_regm     growth_med      height_or_length        herbicide_regm  host_taxid      humidity_regm   infra_specific_name     infra_specific_rank     life_stage      mechanical_damage       mineral_nutr_regm       misc_param      non_mineral_nutr_regm   organism_count  oxy_stat_samp   ph_regm perturbation    pesticide_regm  phenotype       tissue  plant_product   radiation_regm  rainfall_regm   salt_regm       samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp season_environment      standing_water_regm     temp    tiss_cult_growth_med    tot_mass        water_temp_regm watering_regm   wet_mass" << endl;
285             }
286         }else if (package == "sediment") {
287             out << "#Environmental:MIMARKS.specimen.sediment.3.0" << endl;
288             if (requiredonly) {
289                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *depth  *elev" << endl;
290             }else {
291                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *depth  *elev   alkalinity      alkyl_diethers  aminopept_act   ammonium        bacteria_carb_prod      biomass bishomohopanol  bromide calcium carb_nitro_ratio        chem_administration     chloride        chlorophyll     density diether_lipids  diss_carb_dioxide       diss_hydrogen   diss_inorg_carb diss_org_carb   diss_org_nitro  diss_oxygen     glucosidase_act magnesium       mean_frict_vel  mean_peak_frict_vel     methane misc_param      n_alkanes       nitrate nitrite nitro   org_carb        org_matter      org_nitro       organism_count  oxy_stat_samp   ph      particle_class  part_org_carb   perturbation    petroleum_hydrocarb     phaeopigments   phosphate       phosplipid_fatt_acid    porosity        potassium       pressure        redox_potential salinity        samp_size       samp_store_dur  samp_store_loc  samp_store_temp sediment_type   silicate        sodium  sulfate sulfide temp    tidal_stage     tot_carb        tot_nitro       tot_org_carb    turbidity       water_content" << endl;
292             }
293         }else if (package == "soil") {
294             out << "#Environmental:MIMARKS.specimen.soil.3.0" << endl;
295             if (requiredonly) {
296                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *depth  *elev" << endl;
297             }else {
298                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *depth  *elev   altitude        sieving cur_land_use    cur_vegetation_meth     cur_vegetation  drainage_class  al_sat  al_sat_meth     heavy_metals_meth       heavy_metals    salinity_meth   extreme_salinity        fao_class       agrochem_addition       crop_rotation   extreme_event   fire    flooding        previous_land_use_meth  previous_land_use       tillage horizon_meth    horizon link_class_info link_climate_info       link_addit_analys       annual_season_precpt    annual_season_temp      microbial_biomass_meth  microbial_biomass       misc_param      other   ph_meth ph      pool_dna_extracts       profile_position        samp_size       samp_weight_dna_ext     slope_aspect    slope_gradient  soil_type_meth  soil_type       local_class_meth        local_class     store_cond      texture_meth    texture tot_n_meth      tot_n   tot_org_c_meth  tot_org_carb    water_content_soil_meth water_content_soil" << endl;
299             }
300         }else if (package == "wastewater") {
301             out << "#Environmental:MIMARKS.specimen.wastewater.3.0" << endl;
302             if (requiredonly) {
303                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods" << endl;
304             }else {
305                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        alkalinity      biochem_oxygen_dem      chem_administration     chem_oxygen_dem depth   efficiency_percent      emulsions       gaseous_substances      indust_eff_percent      inorg_particles misc_param      nitrate org_particles   organism_count  oxy_stat_samp   ph      perturbation    phosphate       pre_treatment   primary_treatment       reactor_type    samp_size       samp_salinity   samp_store_dur  samp_store_loc  samp_store_temp secondary_treatment     sewage_type     sludge_retent_time      sodium  soluble_inorg_mat       soluble_org_mat suspend_solids  temp    tertiary_treatment      tot_nitro       tot_phosphate   wastewater_type" << endl;
306             }
307         }else if (package == "water") {
308             out << "#Environmental:MIMARKS.specimen.water.3.0" << endl;
309             if (requiredonly) {
310                 out << "*sample_name    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods *depth" << endl;
311             }else {
312                 out << "*sample_name    description     bioproject_id   sample_title    *organism       *collection_date        *biome  *feature        *material       *geo_loc_name   *lat_lon   *title  *seq_methods rel_to_oxygen   samp_collect_device     samp_mat_process        *depth  alkalinity      alkyl_diethers  aminopept_act   ammonium        atmospheric_data        bacteria_carb_prod      biomass bishomohopanol  bromide calcium carb_nitro_ratio        chem_administration     chloride        chlorophyll     current density diether_lipids  diss_carb_dioxide       diss_hydrogen   diss_inorg_carb diss_inorg_nitro        diss_inorg_phosp        diss_org_carb   diss_org_nitro  diss_oxygen     elev    glucosidase_act light_intensity magnesium       mean_frict_vel  mean_peak_frict_vel     misc_param      n_alkanes       nitrate nitrite nitro   org_carb        org_matter      org_nitro       organism_count  oxy_stat_samp   ph      part_org_carb   part_org_nitro  perturbation    petroleum_hydrocarb     phaeopigments   phosphate       phosplipid_fatt_acid    photon_flux     potassium       pressure        primary_prod    redox_potential salinity        samp_size       samp_store_dur  samp_store_loc  samp_store_temp silicate        sodium  soluble_react_phosp     sulfate sulfide suspend_part_matter     temp    tidal_stage     tot_depth_water_col     tot_diss_nitro  tot_inorg_nitro tot_nitro       tot_part_carb   tot_phosp" << endl;
313             }
314         }
315         
316         for (int i = 0; i < Groups.size(); i++) {  out << Groups[i] << '\t' << endl; }
317         
318         out.close();
319         
320         //output files created by command
321                 m->mothurOutEndLine();
322                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
323                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
324                 m->mothurOutEndLine();
325         return 0;
326                 
327     }
328         catch(exception& e) {
329                 m->errorOut(e, "GetMIMarksPackageCommand", "execute");
330                 exit(1);
331         }
332 }
333 //***************************************************************************************************************
334
335 // going to have to rework this to allow for other options --
336 /*
337  file option 1
338  
339  sfffile1   oligosfile1
340  sfffile2   oligosfile2
341  ...
342  
343  file option 2
344  
345  fastqfile1 oligosfile1
346  fastqfile2 oligosfile2
347  ...
348  
349  file option 3
350  
351  fastqfile  fastqfile   group
352  fastqfile  fastqfile   group
353  fastqfile  fastqfile   group
354  ...
355  
356  */
357
358 int GetMIMarksPackageCommand::readFile(){
359         try {
360         Oligos oligos;
361         inputfile = file;
362         
363         ifstream in;
364         m->openInputFile(file, in);
365         
366         while(!in.eof()) {
367             
368             if (m->control_pressed) { return 0; }
369             
370             string line = m->getline(in);  m->gobble(in);
371             vector<string> pieces = m->splitWhiteSpace(line);
372             
373             string group = "";
374             string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
375             if (pieces.size() == 2) {
376                 thisFileName1 = pieces[0];
377                 thisFileName2 = pieces[1];
378             }else if (pieces.size() == 3) {
379                 thisFileName1 = pieces[1];
380                 thisFileName2 = pieces[2];
381                 string group = pieces[0];
382             }else {
383                 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file.  The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
384             }
385             
386             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2  + ".\n"); }
387             
388             if (inputDir != "") {
389                 string path = m->hasPath(thisFileName2);
390                 if (path == "") {  thisFileName2 = inputDir + thisFileName2;  }
391                 
392                 path = m->hasPath(thisFileName1);
393                 if (path == "") {  thisFileName1 = inputDir + thisFileName1;  }
394             }
395             
396             //check to make sure both are able to be opened
397             ifstream in2;
398             int openForward = m->openInputFile(thisFileName1, in2, "noerror");
399             
400             //if you can't open it, try default location
401             if (openForward == 1) {
402                 if (m->getDefaultPath() != "") { //default path is set
403                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
404                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
405                     ifstream in3;
406                     openForward = m->openInputFile(tryPath, in3, "noerror");
407                     in3.close();
408                     thisFileName1 = tryPath;
409                 }
410             }
411             
412             //if you can't open it, try output location
413             if (openForward == 1) {
414                 if (m->getOutputDir() != "") { //default path is set
415                     string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
416                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
417                     ifstream in4;
418                     openForward = m->openInputFile(tryPath, in4, "noerror");
419                     thisFileName1 = tryPath;
420                     in4.close();
421                 }
422             }
423             
424             if (openForward == 1) { //can't find it
425                 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
426             }else{  in2.close();  }
427             
428             int openReverse = 1;
429             
430             ifstream in3;
431             openReverse = m->openInputFile(thisFileName2, in3, "noerror");
432             
433             //if you can't open it, try default location
434             if (openReverse == 1) {
435                 if (m->getDefaultPath() != "") { //default path is set
436                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
437                     m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
438                     ifstream in3;
439                     openReverse = m->openInputFile(tryPath, in3, "noerror");
440                     in3.close();
441                     thisFileName2 = tryPath;
442                 }
443             }
444             
445             //if you can't open it, try output location
446             if (openReverse == 1) {
447                 if (m->getOutputDir() != "") { //default path is set
448                     string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
449                     m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
450                     ifstream in4;
451                     openReverse = m->openInputFile(tryPath, in4, "noerror");
452                     thisFileName2 = tryPath;
453                     in4.close();
454                 }
455             }
456             
457             if (openReverse == 1) { //can't find it
458                 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
459             }else{  in3.close();  }
460             
461             
462             if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
463                     oligosfile = thisFileName2;
464                     if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
465                     oligos.read(oligosfile);
466             }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
467                 Groups.push_back(group);
468             }
469         }
470         in.close();
471         
472         Groups = oligos.getGroupNames();
473         
474         inputfile = file;
475         
476         return 0;
477     }
478         catch(exception& e) {
479                 m->errorOut(e, "GetMIMarksPackageCommand", "readFile");
480                 exit(1);
481         }
482 }
483 //**********************************************************************************************************************
484
485