X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=heatmapsimcommand.cpp;h=61e29390c21ae0a851a3acaede4c9da681d4d7d4;hb=260ae19c36cb11a53ddc5a75b5e507f8dd8b31d6;hp=0c9502f3845341471bb8e549b516cfe2e16792b8;hpb=0470f6d037aacb3563c3f7010708120a4a67d4e6;p=mothur.git diff --git a/heatmapsimcommand.cpp b/heatmapsimcommand.cpp index 0c9502f..61e2939 100644 --- a/heatmapsimcommand.cpp +++ b/heatmapsimcommand.cpp @@ -22,83 +22,129 @@ //********************************************************************************************************************** -HeatMapSimCommand::HeatMapSimCommand(string option){ +HeatMapSimCommand::HeatMapSimCommand(string option) { try { globaldata = GlobalData::getInstance(); abort = false; allLines = 1; - lines.clear(); labels.clear(); Groups.clear(); Estimators.clear(); - + //allow user to run help if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; } else { //valid paramters for this command - string AlignArray[] = {"groups","line","label", "calc"}; + string AlignArray[] = {"groups","label", "calc","phylip","column","name","outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); - parser = new OptionParser(); - parser->parse(option, parameters); delete parser; + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; - ValidParameters* validParameter = new ValidParameters(); - //check to make sure all parameters are valid for command for (it = parameters.begin(); it != parameters.end(); it++) { - if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } - //make sure the user has already run the read.otu command - if (globaldata->getSharedFile() == "") { - cout << "You must read a list and a group, or a shared before you can use the heatmap.sim command." << endl; abort = true; + format = ""; + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("phylip"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["phylip"] = inputDir + it->second; } + } + + it = parameters.find("column"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["column"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } } - //check for optional parameter and set defaults - // ...at some point should added some additional type checking... - line = validParameter->validFile(parameters, "line", false); - if (line == "not found") { line = ""; } - else { - if(line != "all") { splitAtDash(line, lines); allLines = 0; } - else { allLines = 1; } - } + //required parameters + phylipfile = validParameter.validFile(parameters, "phylip", true); + if (phylipfile == "not open") { abort = true; } + else if (phylipfile == "not found") { phylipfile = ""; } + else { format = "phylip"; if (outputDir == "") { outputDir += m->hasPath(phylipfile); } } - label = validParameter->validFile(parameters, "label", false); - if (label == "not found") { label = ""; } - else { - if(label != "all") { splitAtDash(label, labels); allLines = 0; } - else { allLines = 1; } - } + columnfile = validParameter.validFile(parameters, "column", true); + if (columnfile == "not open") { abort = true; } + else if (columnfile == "not found") { columnfile = ""; } + else { format = "column"; if (outputDir == "") { outputDir += m->hasPath(columnfile); } } - //make sure user did not use both the line and label parameters - if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; } - //if the user has not specified any line or labels use the ones from read.otu - else if ((line == "") && (label == "")) { - allLines = globaldata->allLines; - labels = globaldata->labels; - lines = globaldata->lines; - } + namefile = validParameter.validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { namefile = ""; } - calc = validParameter->validFile(parameters, "calc", false); - if (calc == "not found") { calc = "jclass-thetayc"; } - else { - if (calc == "default") { calc = "jclass-thetayc"; } - } - splitAtDash(calc, Estimators); - groups = validParameter->validFile(parameters, "groups", false); - if (groups == "not found") { groups = ""; } - else { - splitAtDash(groups, Groups); - globaldata->Groups = Groups; + //error checking on files + if ((globaldata->getSharedFile() == "") && ((phylipfile == "") && (columnfile == ""))) { m->mothurOut("You must run the read.otu command or provide a distance file before running the heatmap.sim command."); m->mothurOutEndLine(); abort = true; } + else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When running the heatmap.sim command with a distance file you may not use both the column and the phylip parameters."); m->mothurOutEndLine(); abort = true; } + + if (columnfile != "") { + if (namefile == "") { m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); abort = true; } } - delete validParameter; + if (format == "") { format = "shared"; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + if (format == "shared") { + if (outputDir == "") { outputDir += m->hasPath(globaldata->getSharedFile()); } + + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //if the user has not specified any labels use the ones from read.otu + if (label == "") { + allLines = globaldata->allLines; + labels = globaldata->labels; + } + + calc = validParameter.validFile(parameters, "calc", false); + if (calc == "not found") { calc = "jest-thetayc"; } + else { + if (calc == "default") { calc = "jest-thetayc"; } + } + m->splitAtDash(calc, Estimators); + + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; } + else { + m->splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + } if (abort == false) { validCalculator = new ValidCalculators(); - heatmap = new HeatMapSim(); int i; for (i=0; ierrorOut(e, "HeatMapSimCommand", "HeatMapSimCommand"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the HeatMapSimCommand class function HeatMapSimCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } //********************************************************************************************************************** void HeatMapSimCommand::help(){ try { - cout << "The heatmap.sim command can only be executed after a successful read.otu command." << "\n"; - cout << "The heatmap.sim command parameters are groups, calc, line and label. No parameters are required, but you may not use line and label at the same time." << "\n"; - cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap." << "\n"; - cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like a heatmap created for, and are also separated by dashes." << "\n"; - cout << "The heatmap.sim command should be in the following format: heatmap.sim(groups=yourGroups, calc=yourCalc, line=yourLines, label=yourLabels)." << "\n"; - cout << "Example heatmap.sim(groups=A-B-C, line=1-3-5, calc=jabund)." << "\n"; - cout << "The default value for groups is all the groups in your groupfile, and all lines in your inputfile will be used." << "\n"; + m->mothurOut("The heatmap.sim command can only be executed after a successful read.otu command, or by providing a distance file.\n"); + m->mothurOut("The heatmap.sim command parameters are phylip, column, name, groups, calc and label. No parameters are required.\n"); + m->mothurOut("There are two ways to use the heatmap.sim command. The first is with the read.otu command. \n"); + m->mothurOut("With the read.otu command you may use the groups, label and calc parameters. \n"); + m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n"); + m->mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n"); + m->mothurOut("The heatmap.sim command should be in the following format: heatmap.sim(groups=yourGroups, calc=yourCalc, label=yourLabels).\n"); + m->mothurOut("Example heatmap.sim(groups=A-B-C, calc=jabund).\n"); + m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"); validCalculator->printCalc("heat", cout); - cout << "The default value for calc is jclass-thetayc." << "\n"; - cout << "The heatmap.sim command outputs a .svg file for each calculator you choose at each line or label you specify." << "\n"; - cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n"; + m->mothurOut("The default value for calc is jclass-thetayc.\n"); + m->mothurOut("The heatmap.sim command outputs a .svg file for each calculator you choose at each label you specify.\n"); + m->mothurOut("The second way to use the heatmap.sim command is with a distance file representing the distance bewteen your groups. \n"); + m->mothurOut("Using the command this way, the phylip or column parameter are required, and only one may be used. If you use a column file the name filename is required. \n"); + m->mothurOut("The heatmap.sim command should be in the following format: heatmap.sim(phylip=yourDistanceFile).\n"); + m->mothurOut("Example heatmap.sim(phylip=amazonGroups.dist).\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the HeatMapSimCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "HeatMapSimCommand", "help"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the HeatMapSimCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } //********************************************************************************************************************** -HeatMapSimCommand::~HeatMapSimCommand(){ - delete input; - delete read; - delete heatmap; - delete validCalculator; -} +HeatMapSimCommand::~HeatMapSimCommand(){} //********************************************************************************************************************** @@ -186,10 +225,41 @@ int HeatMapSimCommand::execute(){ if (abort == true) { return 0; } - int count = 1; + heatmap = new HeatMapSim(outputDir); + + if (format == "shared") { + runCommandShared(); + }else if (format == "phylip") { + globaldata->inputFileName = phylipfile; + runCommandDist(); + }else if (format == "column") { + globaldata->inputFileName = columnfile; + runCommandDist(); + } + + delete heatmap; + delete validCalculator; + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + return 0; + } + catch(exception& e) { + m->errorOut(e, "HeatMapSimCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** +int HeatMapSimCommand::runCommandShared() { + try { //if the users entered no valid calculators don't execute command - if (heatCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; } + if (heatCalculators.size() == 0) { m->mothurOut("No valid calculators."); m->mothurOutEndLine(); return 0; } //you have groups read = new ReadOTUFile(globaldata->inputFileName); @@ -197,80 +267,234 @@ int HeatMapSimCommand::execute(){ input = globaldata->ginput; lookup = input->getSharedRAbundVectors(); - vector lastLookup = lookup; + string lastLabel = lookup[0]->getLabel(); - if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0;} + if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); return 0;} //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; set userLabels = labels; - set userLines = lines; - - //as long as you are not at the end of the file or done wih the lines you want - while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) { + if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); return 0; } - if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){ + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + + if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ - cout << lookup[0]->getLabel() << '\t' << count << endl; - heatmap->getPic(lookup, heatCalculators); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + vector outfilenames = heatmap->getPic(lookup, heatCalculators); + for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); - userLines.erase(count); } - if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) { - cout << lastLookup[0]->getLabel() << '\t' << count << endl; - heatmap->getPic(lastLookup, heatCalculators); + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + lookup = input->getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + vector outfilenames = heatmap->getPic(lookup, heatCalculators); + for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } - processedLabels.insert(lastLookup[0]->getLabel()); - userLabels.erase(lastLookup[0]->getLabel()); + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); } //prevent memory leak - if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } } - lastLookup = lookup; + + lastLabel = lookup[0]->getLabel(); //get next line to process + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input->getSharedRAbundVectors(); - count++; } + + if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; globaldata->Groups.clear(); return 0; } + //output error messages about any remaining user labels set::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { - cout << "Your file does not include the label "<< *it; - if (processedLabels.count(lastLookup[0]->getLabel()) != 1) { - cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl; + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { - cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl; + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } - //run last line if you need to + if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; globaldata->Groups.clear(); return 0; } + + //run last label if you need to if (needToRun == true) { - cout << lastLookup[0]->getLabel() << '\t' << count << endl; - heatmap->getPic(lastLookup, heatCalculators); + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input->getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + vector outfilenames = heatmap->getPic(lookup, heatCalculators); + for(int i = 0; i < outfilenames.size(); i++) { outputNames.push_back(outfilenames[i]); } + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } + if (m->control_pressed) { delete read; delete input; globaldata->ginput = NULL; globaldata->Groups.clear(); return 0; } //reset groups parameter globaldata->Groups.clear(); + delete input; globaldata->ginput = NULL; + delete read; + return 0; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the HeatMapSimCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + m->errorOut(e, "HeatMapSimCommand", "runCommandShared"); exit(1); } - catch(...) { - cout << "An unknown error has occurred in the HeatMapSimCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } } +//********************************************************************************************************************** +int HeatMapSimCommand::runCommandDist() { + try { + + vector< vector > matrix; + vector names; + ifstream in; + + //read distance file and create distance vector and names vector + if (format == "phylip") { + //read phylip file + m->openInputFile(phylipfile, in); + + string name; + int numSeqs; + in >> numSeqs >> name; + + //save name + names.push_back(name); + + //resize the matrix and fill with zeros + matrix.resize(numSeqs); + for(int i = 0; i < numSeqs; i++) { + matrix[i].resize(numSeqs, 0.0); + } + + //determine if matrix is square or lower triangle + //if it is square read the distances for the first sequence + char d; + bool square; + while((d=in.get()) != EOF){ + + //is d a number meaning its square + if(isalnum(d)){ + square = true; + in.putback(d); + + for(int i=0;i> matrix[0][i]; + } + break; + } + + //is d a line return meaning its lower triangle + if(d == '\n'){ + square = false; + break; + } + } + + //read rest of matrix + if (square == true) { + for(int i=1;i> name; + names.push_back(name); + + if (m->control_pressed) { return 0; } + + for(int j=0;j> matrix[i][j]; } + m->gobble(in); + } + }else { + double dist; + for(int i=1;i> name; + names.push_back(name); + + if (m->control_pressed) { return 0; } + + for(int j=0;j> dist; + matrix[i][j] = dist; matrix[j][i] = dist; + } + m->gobble(in); + } + } + in.close(); + }else { + //read names file + NameAssignment* nameMap = new NameAssignment(namefile); + nameMap->readMap(); + + //put names in order in vector + for (int i = 0; i < nameMap->size(); i++) { + names.push_back(nameMap->get(i)); + } + + //resize matrix + matrix.resize(nameMap->size()); + for (int i = 0; i < nameMap->size(); i++) { + matrix[i].resize(nameMap->size(), 0.0); + } + + //read column file + string first, second; + double dist; + m->openInputFile(columnfile, in); + + while (!in.eof()) { + in >> first >> second >> dist; m->gobble(in); + + if (m->control_pressed) { return 0; } + + map::iterator itA = nameMap->find(first); + map::iterator itB = nameMap->find(second); + + if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << first << "' was not found in the names file, please correct\n"; exit(1); } + if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << second << "' was not found in the names file, please correct\n"; exit(1); } + + //save distance + matrix[itA->second][itB->second] = dist; + matrix[itB->second][itA->second] = dist; + } + in.close(); + + delete nameMap; + } + + outputNames.push_back(heatmap->getPic(matrix, names)); //vector>, vector + + return 0; + } + catch(exception& e) { + m->errorOut(e, "HeatMapSimCommand", "runCommandDist"); + exit(1); + } +} //********************************************************************************************************************** + + + + + +