2 // getmimarkspackagecommand.cpp
5 // Created by Sarah Westcott on 3/25/14.
6 // Copyright (c) 2014 Schloss Lab. All rights reserved.
9 #include "getmimarkspackagecommand.h"
12 //**********************************************************************************************************************
13 vector<string> GetMIMarksPackageCommand::setParameters(){
15 //files that have dependancies
16 CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pgroup);
17 CommandParameter pfile("file", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pfile);
18 CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos);
19 CommandParameter ppackage("package", "Multiple", "air-host_associated-human_associated-human_gut-human_oral-human_skin-human_vaginal-microbial-miscellaneous-plant_associated-sediment-soil-wastewater-water", "miscellaneous", "", "", "","",false,false,true); parameters.push_back(ppackage);
20 CommandParameter prequiredonly("requiredonly", "Boolean", "", "F", "", "", "","",false,false, true); parameters.push_back(prequiredonly);
21 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24 vector<string> myArray;
25 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
29 m->errorOut(e, "GetMIMarksPackageCommand", "setParameters");
33 //**********************************************************************************************************************
34 string GetMIMarksPackageCommand::getHelpString(){
36 string helpString = "";
37 helpString += "The get.mimarkspackage command creates a mimarks package form with your groups. The required fields are flagged with * characters. Fields marked with '**' indicated they are in a group where at least one of the fields is required.\n";
38 helpString += "Further documentation on the different packages and required formats can be found here, http://www.mothur.org/wiki/MIMarks_Data_Packages.\n";
39 helpString += "The get.mimarkspackage command parameters are: oligos, group, package and requiredonly. oligos or group is required.\n";
40 helpString += "The oligos parameter is used to provide your oligos file so mothur can extract your group names.\n";
41 helpString += "The group parameter is used to provide your group file so mothur can extract your group names.\n";
42 helpString += "The package parameter is used to select the mimarks package you would like to use. The choices are: air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or waterc. Default=miscellaneous.\n";
43 helpString += "The requiredonly parameter is used to indicate you only want the required mimarks feilds printed. Default=F.\n";
44 helpString += "The get.mimarkspackage command should be in the following format: get.mimarkspackage(oligos=yourOligosFile, package=yourPackage)\n";
45 helpString += "get.mimarkspackage(oligos=GQY1XT001.oligos, package=human_gut)\n";
49 m->errorOut(e, "GetMIMarksPackageCommand", "getHelpString");
53 //**********************************************************************************************************************
54 string GetMIMarksPackageCommand::getOutputPattern(string type) {
58 if (type == "tsv") { pattern = "[filename],tsv"; }
59 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
64 m->errorOut(e, "GetMIMarksPackageCommand", "getOutputPattern");
68 //**********************************************************************************************************************
69 GetMIMarksPackageCommand::GetMIMarksPackageCommand(){
71 abort = true; calledHelp = true;
73 vector<string> tempOutNames;
74 outputTypes["tsv"] = tempOutNames;
77 m->errorOut(e, "GetMIMarksPackageCommand", "GetMIMarksPackageCommand");
81 //**********************************************************************************************************************
82 GetMIMarksPackageCommand::GetMIMarksPackageCommand(string option) {
85 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
89 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92 //valid paramters for this command
93 vector<string> myArray = setParameters();
95 OptionParser parser(option);
96 map<string,string> parameters = parser.getParameters();
98 ValidParameters validParameter;
99 map<string,string>::iterator it;
100 //check to make sure all parameters are valid for command
101 for (it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 vector<string> tempOutNames;
106 outputTypes["tsv"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
114 it = parameters.find("oligos");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["oligos"] = inputDir + it->second; }
122 it = parameters.find("group");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["group"] = inputDir + it->second; }
130 it = parameters.find("file");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
139 groupfile = validParameter.validFile(parameters, "group", true);
140 if (groupfile == "not open") { groupfile = ""; abort = true; }
141 else if (groupfile == "not found") { groupfile = ""; }
142 else { m->setGroupFile(groupfile); inputfile = groupfile; }
144 file = validParameter.validFile(parameters, "file", true);
145 if (file == "not open") { file = ""; abort = true; }
146 else if (file == "not found") { file = ""; }
147 else { inputfile = file; }
149 oligosfile = validParameter.validFile(parameters, "oligos", true);
150 if (oligosfile == "not found") { oligosfile = ""; }
151 else if(oligosfile == "not open") { abort = true; }
152 else { m->setOligosFile(oligosfile); inputfile = oligosfile; }
154 if ((groupfile != "") && (oligosfile != "") && (file != "")) {
155 m->mothurOut("[ERROR]: You may not use a group file, file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
158 if ((groupfile == "") && (oligosfile == "") && (file == "")) {
159 oligosfile = m->getOligosFile();
160 if (oligosfile != "") { inputfile = oligosfile; m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
162 groupfile = m->getGroupFile();
163 if (groupfile != "") { inputfile = groupfile; m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
165 m->mothurOut("[ERROR]: You must provide file, groupfile or oligos file for the get.mimarkspackage command."); m->mothurOutEndLine(); abort = true;
170 package = validParameter.validFile(parameters, "package", false); if (package == "not found") { package = "miscellaneous"; }
172 if ((package == "air") || (package == "host_associated") || (package == "human_associated") || (package == "human_gut") || (package == "human_oral") || (package == "human_skin") || (package == "human_vaginal") || (package == "microbial") || (package == "miscellaneous") || (package == "plant_associated") || (package == "sediment") || (package == "soil") || (package == "wastewater") || (package == "water")) {}
174 m->mothurOut("[ERROR]: " + package + " is not a valid package selection. Choices are: air, host_associated, human_associated, human_gut, human_oral, human_skin, human_vaginal, microbial, miscellaneous, plant_associated, sediment, soil, wastewater or water. Aborting.\n."); abort = true;
178 temp = validParameter.validFile(parameters, "requiredonly", false); if(temp == "not found"){ temp = "F"; }
179 requiredonly = m->isTrue(temp);
183 catch(exception& e) {
184 m->errorOut(e, "GetMIMarksPackageCommand", "GetMIMarksPackageCommand");
188 //**********************************************************************************************************************
190 int GetMIMarksPackageCommand::execute(){
193 if (abort == true) { if (calledHelp) { return 0; } return 2; }
195 if (oligosfile != "") { readOligos(); }
196 else if (file != "") { readFile(); }
197 else { GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); }
199 if (outputDir == "") { outputDir += m->hasPath(inputfile); }
200 map<string, string> variables;
201 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
202 string outputFileName = getOutputFileName("tsv", variables);
205 m->openOutputFile(outputFileName, out);
206 outputNames.push_back(outputFileName); outputTypes["tsv"].push_back(outputFileName);
208 out << "#This is a tab-delimited file. Additional Documentation can be found at http://www.mothur.org/wiki/MIMarks_Data_Packages." << endl;
209 out << "#Please fill all the required fields indicated with '*'" << endl;
210 out << "#Fields marked with '**' indicated they are in a group where at least one of the fields is required." << endl;
211 out << "#Unknown or inapplicable fields can be assigned NA value." << endl;
212 out << "#You may add extra custom fields to this template. Make sure all the fields are separated by tabs." << endl;
213 out << "#You may remove any fields not required (marked with '*'). Make sure all the fields are separated by tabs." << endl;
214 out << "#You can edit this template using Microsoft Excel or any other editor. But while saving the file please make sure to save them as 'TAB-DELIMITED' TEXT FILE." << endl;
216 if (package == "air") {
217 out << "#Environmental:MIMARKS.specimen.air.3.0" << endl;
219 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *altitude" << endl;
221 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *altitude barometric_press carb_dioxide carb_monoxide chem_administration elev humidity methane misc_param organism_count oxygen oxy_stat_samp perturbation pollutants resp_part_matter samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp solar_irradiance temp ventilation_rate ventilation_type volatile_org_comp wind_direction wind_speed" << endl;
223 }else if (package == "host_associated") {
224 out << "#Environmental:MIMARKS.specimen.host-associated.3.0" << endl;
226 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host **clone **isolate **strain" << endl;
228 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods **clone **isolate **strain rel_to_oxygen samp_collect_device samp_mat_process *host age altitude blood_press_diast blood_press_syst body_habitat body_product tissue chem_administration depth diet disease_stat dry_mass elev family_relationship genotype gravidity height_or_length host_body_temp host_color host_growth_cond host_shape host_subject_id host_taxid infra_specific_name infra_specific_rank last_meal life_stage misc_param organism_count oxy_stat_samp perturbation phenotype samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp sex substrate temp tot_mass" << endl;
230 }else if (package == "human_associated") {
231 out << "#Environmental:MIMARKS.specimen.human-associated.3.0" << endl;
233 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host" << endl;
235 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *host hiv_stat ihmc_ethnicity ihmc_medication_code age amniotic_fluid_color foetal_health_stat gestation_state maternal_health_stat blood_blood_disord body_product tissue body_mass_index chem_administration diet disease_stat drug_usage family_relationship genotype height host_body_temp host_subject_id last_meal nose_throat_disord pulmonary_disord diet_last_six_month medic_hist_perform misc_param occupation organism_count oxy_stat_samp perturbation phenotype pet_farm_animal pulse samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp sex smoker study_complt_stat temp tot_mass travel_out_six_month twin_sibling urine_collect_meth kidney_disord urogenit_tract_disor weight_loss_3_month" << endl;
237 }else if (package == "human_gut") {
238 out << "#Environmental:MIMARKS.specimen.human-gut.3.0" << endl;
240 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host" << endl;
242 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *host ihmc_ethnicity ihmc_medication_code age body_product tissue body_mass_index chem_administration diet disease_stat family_relationship gastrointest_disord genotype height host_body_temp host_subject_id last_meal liver_disord medic_hist_perform misc_param occupation organism_count oxy_stat_samp perturbation phenotype pulse samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp sex special_diet temp tot_mass" << endl;
244 }else if (package == "human_oral") {
245 out << "#Environmental:MIMARKS.specimen.human-oral.3.0" << endl;
247 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host" << endl;
249 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *host ihmc_ethnicity ihmc_medication_code age body_product tissue body_mass_index chem_administration diet disease_stat family_relationship genotype height host_body_temp host_subject_id last_meal medic_hist_perform misc_param nose_mouth_teeth_throat_disord occupation organism_count oxy_stat_samp perturbation phenotype pulse samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp sex temp time_last_toothbrush tot_mass" << endl;
251 }else if (package == "human_skin") {
252 out << "#Environmental:MIMARKS.specimen.human-skin.3.0" << endl;
254 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host" << endl;
256 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *host ihmc_ethnicity ihmc_medication_code age body_product tissue body_mass_index chem_administration dermatology_disord diet disease_stat dominant_hand family_relationship genotype height host_body_temp host_subject_id last_meal medic_hist_perform misc_param occupation organism_count oxy_stat_samp perturbation phenotype pulse samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp sex temp time_since_last_wash tot_mass" << endl;
258 }else if (package == "human_vaginal") {
259 out << "#Environmental:MIMARKS.specimen.human-vaginal.3.0" << endl;
261 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host" << endl;
263 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *host hrt ihmc_ethnicity ihmc_medication_code age birth_control body_product tissue body_mass_index chem_administration diet disease_stat douche family_relationship genotype gynecologic_disord height host_body_temp host_subject_id hysterectomy last_meal medic_hist_perform menarche menopause misc_param occupation organism_count oxy_stat_samp perturbation phenotype pregnancy pulse samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp sex sexual_act temp tot_mass urogenit_disord" << endl;
265 }else if (package == "microbial") {
266 out << "#Environmental:MIMARKS.specimen.microbial.3.0" << endl;
268 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *depth *elev" << endl;
270 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *depth *elev alkalinity alkyl_diethers altitude aminopept_act ammonium bacteria_carb_prod biomass bishomohopanol bromide calcium carb_nitro_ratio chem_administration chloride chlorophyll diether_lipids diss_carb_dioxide diss_hydrogen diss_inorg_carb diss_org_carb diss_org_nitro diss_oxygen glucosidase_act magnesium mean_frict_vel mean_peak_frict_vel methane misc_param n_alkanes nitrate nitrite nitro org_carb org_matter org_nitro organism_count oxy_stat_samp ph part_org_carb perturbation petroleum_hydrocarb phaeopigments phosphate phosplipid_fatt_acid potassium pressure redox_potential salinity samp_size samp_store_dur samp_store_loc samp_store_temp silicate sodium sulfate sulfide temp tot_carb tot_nitro tot_org_carb turbidity water_content" << endl;
272 }else if (package == "miscellaneous") {
273 out << "#Environmental:MIMARKS.specimen.miscellaneous.3.0" << endl;
275 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *title *seq_methods *lat_lon" << endl;
277 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process alkalinity altitude ammonium biomass bromide calcium chem_administration chloride chlorophyll current density depth diether_lipids diss_carb_dioxide diss_hydrogen diss_inorg_carb diss_org_nitro diss_oxygen elev misc_param nitrate nitrite nitro org_carb org_matter org_nitro organism_count oxy_stat_samp ph perturbation phosphate phosplipid_fatt_acid potassium pressure salinity samp_size samp_store_dur samp_store_loc samp_store_temp silicate sodium sulfate sulfide temp" << endl;
279 }else if (package == "plant_associated") {
280 out << "#Environmental:MIMARKS.specimen.plant-associated.3.0" << endl;
282 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host" << endl;
284 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *host age air_temp_regm altitude antibiotic_regm body_product chem_administration chem_mutagen climate_environment depth disease_stat dry_mass elev fertilizer_regm fungicide_regm gaseous_environment genotype gravity growth_hormone_regm growth_med height_or_length herbicide_regm host_taxid humidity_regm infra_specific_name infra_specific_rank life_stage mechanical_damage mineral_nutr_regm misc_param non_mineral_nutr_regm organism_count oxy_stat_samp ph_regm perturbation pesticide_regm phenotype tissue plant_product radiation_regm rainfall_regm salt_regm samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp season_environment standing_water_regm temp tiss_cult_growth_med tot_mass water_temp_regm watering_regm wet_mass" << endl;
286 }else if (package == "sediment") {
287 out << "#Environmental:MIMARKS.specimen.sediment.3.0" << endl;
289 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *depth *elev" << endl;
291 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *depth *elev alkalinity alkyl_diethers aminopept_act ammonium bacteria_carb_prod biomass bishomohopanol bromide calcium carb_nitro_ratio chem_administration chloride chlorophyll density diether_lipids diss_carb_dioxide diss_hydrogen diss_inorg_carb diss_org_carb diss_org_nitro diss_oxygen glucosidase_act magnesium mean_frict_vel mean_peak_frict_vel methane misc_param n_alkanes nitrate nitrite nitro org_carb org_matter org_nitro organism_count oxy_stat_samp ph particle_class part_org_carb perturbation petroleum_hydrocarb phaeopigments phosphate phosplipid_fatt_acid porosity potassium pressure redox_potential salinity samp_size samp_store_dur samp_store_loc samp_store_temp sediment_type silicate sodium sulfate sulfide temp tidal_stage tot_carb tot_nitro tot_org_carb turbidity water_content" << endl;
293 }else if (package == "soil") {
294 out << "#Environmental:MIMARKS.specimen.soil.3.0" << endl;
296 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *depth *elev" << endl;
298 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *depth *elev altitude sieving cur_land_use cur_vegetation_meth cur_vegetation drainage_class al_sat al_sat_meth heavy_metals_meth heavy_metals salinity_meth extreme_salinity fao_class agrochem_addition crop_rotation extreme_event fire flooding previous_land_use_meth previous_land_use tillage horizon_meth horizon link_class_info link_climate_info link_addit_analys annual_season_precpt annual_season_temp microbial_biomass_meth microbial_biomass misc_param other ph_meth ph pool_dna_extracts profile_position samp_size samp_weight_dna_ext slope_aspect slope_gradient soil_type_meth soil_type local_class_meth local_class store_cond texture_meth texture tot_n_meth tot_n tot_org_c_meth tot_org_carb water_content_soil_meth water_content_soil" << endl;
300 }else if (package == "wastewater") {
301 out << "#Environmental:MIMARKS.specimen.wastewater.3.0" << endl;
303 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods" << endl;
305 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process alkalinity biochem_oxygen_dem chem_administration chem_oxygen_dem depth efficiency_percent emulsions gaseous_substances indust_eff_percent inorg_particles misc_param nitrate org_particles organism_count oxy_stat_samp ph perturbation phosphate pre_treatment primary_treatment reactor_type samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp secondary_treatment sewage_type sludge_retent_time sodium soluble_inorg_mat soluble_org_mat suspend_solids temp tertiary_treatment tot_nitro tot_phosphate wastewater_type" << endl;
307 }else if (package == "water") {
308 out << "#Environmental:MIMARKS.specimen.water.3.0" << endl;
310 out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *depth" << endl;
312 out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *depth alkalinity alkyl_diethers aminopept_act ammonium atmospheric_data bacteria_carb_prod biomass bishomohopanol bromide calcium carb_nitro_ratio chem_administration chloride chlorophyll current density diether_lipids diss_carb_dioxide diss_hydrogen diss_inorg_carb diss_inorg_nitro diss_inorg_phosp diss_org_carb diss_org_nitro diss_oxygen elev glucosidase_act light_intensity magnesium mean_frict_vel mean_peak_frict_vel misc_param n_alkanes nitrate nitrite nitro org_carb org_matter org_nitro organism_count oxy_stat_samp ph part_org_carb part_org_nitro perturbation petroleum_hydrocarb phaeopigments phosphate phosplipid_fatt_acid photon_flux potassium pressure primary_prod redox_potential salinity samp_size samp_store_dur samp_store_loc samp_store_temp silicate sodium soluble_react_phosp sulfate sulfide suspend_part_matter temp tidal_stage tot_depth_water_col tot_diss_nitro tot_inorg_nitro tot_nitro tot_part_carb tot_phosp" << endl;
316 for (int i = 0; i < Groups.size(); i++) { out << Groups[i] << '\t' << endl; }
320 //output files created by command
321 m->mothurOutEndLine();
322 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
323 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
324 m->mothurOutEndLine();
328 catch(exception& e) {
329 m->errorOut(e, "GetMIMarksPackageCommand", "execute");
333 //***************************************************************************************************************
334 int GetMIMarksPackageCommand::readOligos(){
337 m->openInputFile(oligosfile, inOligos);
339 string type, oligo, roligo, group;
340 vector<string> primerNameVector, barcodeNameVector;
341 set<string> uniquePrimers;
342 set<string> uniqueBarcodes;
344 while(!inOligos.eof()){
348 if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
351 while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
356 //make type case insensitive
357 for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
361 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
363 for(int i=0;i<oligo.length();i++){
364 oligo[i] = toupper(oligo[i]);
365 if(oligo[i] == 'U') { oligo[i] = 'T'; }
368 if(type == "FORWARD"){
371 // get rest of line in case there is a primer name
372 while (!inOligos.eof()) {
373 char c = inOligos.get();
374 if (c == 10 || c == 13 || c == -1){ break; }
375 else if (c == 32 || c == 9){;} //space or tab
379 primerNameVector.push_back(group);
381 else if (type == "PRIMER"){
386 for(int i=0;i<roligo.length();i++){
387 roligo[i] = toupper(roligo[i]);
388 if(roligo[i] == 'U') { roligo[i] = 'T'; }
393 // get rest of line in case there is a primer name
394 while (!inOligos.eof()) {
395 char c = inOligos.get();
396 if (c == 10 || c == 13 || c == -1){ break; }
397 else if (c == 32 || c == 9){;} //space or tab
401 primerNameVector.push_back(group);
402 }else if(type == "BARCODE"){
405 //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
406 //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
409 while (!inOligos.eof()) {
410 char c = inOligos.get();
411 if (c == 10 || c == 13 || c == -1){ break; }
412 else if (c == 32 || c == 9){;} //space or tab
416 //then this is illumina data with 4 columns
419 string reverseBarcode = group; //reverseOligo(group); //reverse barcode
422 barcodeNameVector.push_back(group);
424 barcodeNameVector.push_back(group);
432 //add in potential combos
433 if(barcodeNameVector.size() == 0){
434 barcodeNameVector.push_back("");
437 if(primerNameVector.size() == 0){
438 primerNameVector.push_back("");
441 set<string> uniqueNames;
442 for(int i = 0; i < barcodeNameVector.size(); i++){
443 for(int j = 0; j < primerNameVector.size(); j++){
445 string primerName = primerNameVector[j];
446 string barcodeName = barcodeNameVector[i];
448 if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
449 else if ((primerName == "") && (barcodeName == "")) { }
451 string comboGroupName = "";
453 if(primerName == ""){
454 comboGroupName = barcodeNameVector[i];
457 if(barcodeName == ""){
458 comboGroupName = primerNameVector[j];
461 comboGroupName = barcodeNameVector[i] + "." + primerNameVector[j];
464 uniqueNames.insert(comboGroupName);
471 if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
473 for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
478 catch(exception& e) {
479 m->errorOut(e, "GetMIMarksPackageCommand", "readOligos");
483 //**********************************************************************************************************************
484 // going to have to rework this to allow for other options --
494 fastqfile1 oligosfile1
495 fastqfile2 oligosfile2
500 fastqfile fastqfile group
501 fastqfile fastqfile group
502 fastqfile fastqfile group
507 int GetMIMarksPackageCommand::readFile(){
509 //vector<string> theseFiles;
513 m->openInputFile(file, in);
517 if (m->control_pressed) { return 0; }
519 string line = m->getline(in); m->gobble(in);
520 vector<string> pieces = m->splitWhiteSpace(line);
523 string thisFileName1, thisFileName2; thisFileName1 = ""; thisFileName2 = "";
524 if (pieces.size() == 2) {
525 thisFileName1 = pieces[0];
526 thisFileName2 = pieces[1];
527 }else if (pieces.size() == 3) {
528 thisFileName1 = pieces[1];
529 thisFileName2 = pieces[2];
530 string group = pieces[0];
532 m->mothurOut("[ERROR]: file lines can be 2 or 3 columns. The 2 column files are sff file then oligos or fastqfile then oligos. You may have multiple lines in the file. The 3 column files are for paired read libraries. The format is groupName, forwardFastqFile reverseFastqFile. \n"); m->control_pressed = true;
535 if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
537 //check to make sure both are able to be opened
539 int openForward = m->openInputFile(thisFileName1, in2, "noerror");
541 //if you can't open it, try default location
542 if (openForward == 1) {
543 if (m->getDefaultPath() != "") { //default path is set
544 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
545 m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
547 openForward = m->openInputFile(tryPath, in3, "noerror");
549 thisFileName1 = tryPath;
553 //if you can't open it, try output location
554 if (openForward == 1) {
555 if (m->getOutputDir() != "") { //default path is set
556 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName1);
557 m->mothurOut("Unable to open " + thisFileName1 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
559 openForward = m->openInputFile(tryPath, in4, "noerror");
560 thisFileName1 = tryPath;
565 if (openForward == 1) { //can't find it
566 m->mothurOut("[WARNING]: can't find " + thisFileName1 + ", ignoring.\n");
567 }else{ in2.close(); }
572 openReverse = m->openInputFile(thisFileName2, in3, "noerror");
574 //if you can't open it, try default location
575 if (openReverse == 1) {
576 if (m->getDefaultPath() != "") { //default path is set
577 string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName2);
578 m->mothurOut("Unable to open " + thisFileName2 + ". Trying default " + tryPath); m->mothurOutEndLine();
580 openReverse = m->openInputFile(tryPath, in3, "noerror");
582 thisFileName2 = tryPath;
586 //if you can't open it, try output location
587 if (openReverse == 1) {
588 if (m->getOutputDir() != "") { //default path is set
589 string tryPath = m->getOutputDir() + m->getSimpleName(thisFileName2);
590 m->mothurOut("Unable to open " + thisFileName2 + ". Trying output directory " + tryPath); m->mothurOutEndLine();
592 openReverse = m->openInputFile(tryPath, in4, "noerror");
593 thisFileName2 = tryPath;
598 if (openReverse == 1) { //can't find it
599 m->mothurOut("[WARNING]: can't find " + thisFileName2 + ", ignoring pair.\n");
600 }else{ in3.close(); }
603 if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
604 oligosfile = thisFileName2;
605 if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
607 }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
608 Groups.push_back(group);
617 catch(exception& e) {
618 m->errorOut(e, "GetMIMarksPackageCommand", "readFile");
622 //**********************************************************************************************************************