#include "getmimarkspackagecommand.h"
#include "groupmap.h"
+
//**********************************************************************************************************************
vector<string> GetMIMarksPackageCommand::setParameters(){
try {
string GetMIMarksPackageCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The get.mimarkspackage command creates a mimarks package form with your groups. The required fields are flagged with * characters. Fields marked with '**' indicated they are in a group where at least one of the fields is required.\n";
+ helpString += "The get.mimarkspackage command creates a mimarks package form with your groups. The required fields are flagged with * characters. \n";
helpString += "Further documentation on the different packages and required formats can be found here, http://www.mothur.org/wiki/MIMarks_Data_Packages.\n";
helpString += "The get.mimarkspackage command parameters are: oligos, group, package and requiredonly. oligos or group is required.\n";
helpString += "The oligos parameter is used to provide your oligos file so mothur can extract your group names.\n";
outputTypes["tsv"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- if (oligosfile != "") { readOligos(); }
+ if (oligosfile != "") { Oligos oligos(oligosfile); Groups = oligos.getGroupNames(); }
else if (file != "") { readFile(); }
else { GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); }
out << "#This is a tab-delimited file. Additional Documentation can be found at http://www.mothur.org/wiki/MIMarks_Data_Packages." << endl;
out << "#Please fill all the required fields indicated with '*'" << endl;
- out << "#Fields marked with '**' indicated they are in a group where at least one of the fields is required." << endl;
out << "#Unknown or inapplicable fields can be assigned NA value." << endl;
out << "#You may add extra custom fields to this template. Make sure all the fields are separated by tabs." << endl;
out << "#You may remove any fields not required (marked with '*'). Make sure all the fields are separated by tabs." << endl;
}else if (package == "host_associated") {
out << "#Environmental:MIMARKS.specimen.host-associated.3.0" << endl;
if (requiredonly) {
- out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host **clone **isolate **strain" << endl;
+ out << "*sample_name *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods *host " << endl;
}else {
- out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods **clone **isolate **strain rel_to_oxygen samp_collect_device samp_mat_process *host age altitude blood_press_diast blood_press_syst body_habitat body_product tissue chem_administration depth diet disease_stat dry_mass elev family_relationship genotype gravidity height_or_length host_body_temp host_color host_growth_cond host_shape host_subject_id host_taxid infra_specific_name infra_specific_rank last_meal life_stage misc_param organism_count oxy_stat_samp perturbation phenotype samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp sex substrate temp tot_mass" << endl;
+ out << "*sample_name description bioproject_id sample_title *organism *collection_date *biome *feature *material *geo_loc_name *lat_lon *title *seq_methods rel_to_oxygen samp_collect_device samp_mat_process *host age altitude blood_press_diast blood_press_syst body_habitat body_product tissue chem_administration depth diet disease_stat dry_mass elev family_relationship genotype gravidity height_or_length host_body_temp host_color host_growth_cond host_shape host_subject_id host_taxid infra_specific_name infra_specific_rank last_meal life_stage misc_param organism_count oxy_stat_samp perturbation phenotype samp_size samp_salinity samp_store_dur samp_store_loc samp_store_temp sex substrate temp tot_mass" << endl;
}
}else if (package == "human_associated") {
out << "#Environmental:MIMARKS.specimen.human-associated.3.0" << endl;
}
}
//***************************************************************************************************************
-int GetMIMarksPackageCommand::readOligos(){
- try {
- ifstream inOligos;
- m->openInputFile(oligosfile, inOligos);
-
- string type, oligo, roligo, group;
- vector<string> primerNameVector, barcodeNameVector;
- set<string> uniquePrimers;
- set<string> uniqueBarcodes;
-
- while(!inOligos.eof()){
-
- inOligos >> type;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
-
- if(type[0] == '#'){
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- m->gobble(inOligos);
- }
- else{
- m->gobble(inOligos);
- //make type case insensitive
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
-
- inOligos >> oligo;
-
- if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
-
- for(int i=0;i<oligo.length();i++){
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
-
- if(type == "FORWARD"){
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- primerNameVector.push_back(group);
- }
- else if (type == "PRIMER"){
- m->gobble(inOligos);
-
- inOligos >> roligo;
-
- for(int i=0;i<roligo.length();i++){
- roligo[i] = toupper(roligo[i]);
- if(roligo[i] == 'U') { roligo[i] = 'T'; }
- }
-
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- primerNameVector.push_back(group);
- }else if(type == "BARCODE"){
- inOligos >> group;
-
- //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
- //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
-
- string temp = "";
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { temp += c; }
- }
-
- //then this is illumina data with 4 columns
- if (temp != "") {
-
- string reverseBarcode = group; //reverseOligo(group); //reverse barcode
- group = temp;
-
- barcodeNameVector.push_back(group);
- }else {
- barcodeNameVector.push_back(group);
- }
- }
- }
- m->gobble(inOligos);
- }
- inOligos.close();
-
- //add in potential combos
- if(barcodeNameVector.size() == 0){
- barcodeNameVector.push_back("");
- }
-
- if(primerNameVector.size() == 0){
- primerNameVector.push_back("");
- }
-
- set<string> uniqueNames;
- for(int i = 0; i < barcodeNameVector.size(); i++){
- for(int j = 0; j < primerNameVector.size(); j++){
-
- string primerName = primerNameVector[j];
- string barcodeName = barcodeNameVector[i];
-
- if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
- else if ((primerName == "") && (barcodeName == "")) { }
- else {
- string comboGroupName = "";
-
- if(primerName == ""){
- comboGroupName = barcodeNameVector[i];
- }
- else{
- if(barcodeName == ""){
- comboGroupName = primerNameVector[j];
- }
- else{
- comboGroupName = barcodeNameVector[i] + "." + primerNameVector[j];
- }
- }
- uniqueNames.insert(comboGroupName);
- }
- }
- }
-
-
-
- if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
-
- for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { Groups.push_back(*it); }
-
- return true;
-
- }
- catch(exception& e) {
- m->errorOut(e, "GetMIMarksPackageCommand", "readOligos");
- exit(1);
- }
-}
-//**********************************************************************************************************************
+
// going to have to rework this to allow for other options --
/*
file option 1
int GetMIMarksPackageCommand::readFile(){
try {
- //vector<string> theseFiles;
+ Oligos oligos;
inputfile = file;
ifstream in;
if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2 + ".\n"); }
+ if (inputDir != "") {
+ string path = m->hasPath(thisFileName2);
+ if (path == "") { thisFileName2 = inputDir + thisFileName2; }
+
+ path = m->hasPath(thisFileName1);
+ if (path == "") { thisFileName1 = inputDir + thisFileName1; }
+ }
+
//check to make sure both are able to be opened
ifstream in2;
int openForward = m->openInputFile(thisFileName1, in2, "noerror");
if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
oligosfile = thisFileName2;
if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
- readOligos();
+ oligos.read(oligosfile);
}else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
Groups.push_back(group);
}
}
in.close();
+ Groups = oligos.getGroupNames();
+
inputfile = file;
return 0;