return better;
}
//**********************************************************************************************************************
-vector<string> ClusterFragmentsCommand::getValidParameters(){
+vector<string> ClusterFragmentsCommand::setParameters(){
try {
- string AlignArray[] = {"fasta","name","diffs","percent","outputdir","inputdir"};
- vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+ CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pdiffs);
+ CommandParameter ppercent("percent", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppercent);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
}
catch(exception& e) {
- m->errorOut(e, "ClusterFragmentsCommand", "getValidParameters");
+ m->errorOut(e, "ClusterFragmentsCommand", "setParameters");
exit(1);
}
}
//**********************************************************************************************************************
-ClusterFragmentsCommand::ClusterFragmentsCommand(){
+string ClusterFragmentsCommand::getHelpString(){
try {
- abort = true;
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["fasta"] = tempOutNames;
- outputTypes["name"] = tempOutNames;
+ string helpString = "";
+ helpString += "The cluster.fragments command groups sequences that are part of a larger sequence.\n";
+ helpString += "The cluster.fragments command outputs a new fasta and name file.\n";
+ helpString += "The cluster.fragments command parameters are fasta, name, diffs and percent. The fasta parameter is required, unless you have a valid current file. \n";
+ helpString += "The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
+ helpString += "The diffs parameter allows you to set the number of differences allowed, default=0. \n";
+ helpString += "The percent parameter allows you to set percentage of differences allowed, default=0. percent=2 means if the number of difference is less than or equal to two percent of the length of the fragment, then cluster.\n";
+ helpString += "You may use diffs and percent at the same time to say something like: If the number or differences is greater than 1 or more than 2% of the fragment length, don't merge. \n";
+ helpString += "The cluster.fragments command should be in the following format: \n";
+ helpString += "cluster.fragments(fasta=yourFastaFile, names=yourNamesFile) \n";
+ helpString += "Example cluster.fragments(fasta=amazon.fasta).\n";
+ helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
+ return helpString;
}
catch(exception& e) {
- m->errorOut(e, "ClusterFragmentsCommand", "ClusterFragmentsCommand");
+ m->errorOut(e, "ClusterFragmentsCommand", "getHelpString");
exit(1);
}
}
//**********************************************************************************************************************
-vector<string> ClusterFragmentsCommand::getRequiredParameters(){
+string ClusterFragmentsCommand::getOutputFileNameTag(string type, string inputName=""){
try {
- string Array[] = {"fasta"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
- return myArray;
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "fasta") { outputFileName = "fragclust.fasta"; }
+ else if (type == "name") { outputFileName = "fragclust.names"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
}
catch(exception& e) {
- m->errorOut(e, "ClusterFragmentsCommand", "getRequiredParameters");
+ m->errorOut(e, "ClusterFragmentsCommand", "getOutputFileNameTag");
exit(1);
}
}
+
//**********************************************************************************************************************
-vector<string> ClusterFragmentsCommand::getRequiredFiles(){
+ClusterFragmentsCommand::ClusterFragmentsCommand(){
try {
- vector<string> myArray;
- return myArray;
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
}
catch(exception& e) {
- m->errorOut(e, "ClusterFragmentsCommand", "getRequiredFiles");
+ m->errorOut(e, "ClusterFragmentsCommand", "ClusterFragmentsCommand");
exit(1);
}
}
//**********************************************************************************************************************
ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
try {
- abort = false;
+ abort = false; calledHelp = false;
//allow user to run help
- if(option == "help") { help(); abort = true; }
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
- //valid paramters for this command
- string Array[] = {"fasta","name","diffs","percent","outputdir","inputdir"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ vector<string> myArray = setParameters();
OptionParser parser(option);
map<string, string> parameters = parser.getParameters();
//check for required parameters
fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the cluster.fragments command."); m->mothurOutEndLine(); abort = true; }
- else if (fastafile == "not open") { abort = true; }
+ if (fastafile == "not found") {
+ fastafile = m->getFastaFile();
+ if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }
+ else if (fastafile == "not open") { fastafile = ""; abort = true; }
+ else { m->setFastaFile(fastafile); }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
// ...at some point should added some additional type checking...
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not found") { namefile = ""; }
- else if (namefile == "not open") { abort = true; }
- else { readNameFile(); }
+ else if (namefile == "not open") { namefile = ""; abort = true; }
+ else { readNameFile(); m->setNameFile(namefile); }
string temp;
temp = validParameter.validFile(parameters, "diffs", false); if (temp == "not found"){ temp = "0"; }
- convert(temp, diffs);
+ m->mothurConvert(temp, diffs);
temp = validParameter.validFile(parameters, "percent", false); if (temp == "not found"){ temp = "0"; }
- convert(temp, percent);
+ m->mothurConvert(temp, percent);
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
exit(1);
}
}
-
-//**********************************************************************************************************************
-ClusterFragmentsCommand::~ClusterFragmentsCommand(){}
-//**********************************************************************************************************************
-void ClusterFragmentsCommand::help(){
- try {
- m->mothurOut("The cluster.fragments command groups sequences that are part of a larger sequence.\n");
- m->mothurOut("The cluster.fragments command outputs a new fasta and name file.\n");
- m->mothurOut("The cluster.fragments command parameters are fasta, name, diffs and percent. The fasta parameter is required. \n");
- m->mothurOut("The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n");
- m->mothurOut("The diffs parameter allows you to set the number of differences allowed, default=0. \n");
- m->mothurOut("The percent parameter allows you to set percentage of differences allowed, default=0. percent=2 means if the number of difference is less than or equal to two percent of the length of the fragment, then cluster.\n");
- m->mothurOut("You may use diffs and percent at the same time to say something like: If the number or differences is greater than 1 or more than 2% of the fragment length, don't merge. \n");
- m->mothurOut("The cluster.fragments command should be in the following format: \n");
- m->mothurOut("cluster.fragments(fasta=yourFastaFile, names=yourNamesFile) \n");
- m->mothurOut("Example cluster.fragments(fasta=amazon.fasta).\n");
- m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
- }
- catch(exception& e) {
- m->errorOut(e, "ClusterFragmentsCommand", "help");
- exit(1);
- }
-}
//**********************************************************************************************************************
int ClusterFragmentsCommand::execute(){
try {
- if (abort == true) { return 0; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
int start = time(NULL);
if (alignSeqs[j].active) { //this sequence has not been merged yet
string jBases = alignSeqs[j].seq.getUnaligned();
-
+
if (isFragment(iBases, jBases)) {
//merge
alignSeqs[i].names += ',' + alignSeqs[j].names;
string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
- string newFastaFile = fileroot + "fragclust.fasta";
- string newNamesFile = fileroot + "names";
+ string newFastaFile = fileroot + getOutputFileNameTag("fasta");
+ string newNamesFile = fileroot + getOutputFileNameTag("name");
if (m->control_pressed) { return 0; }
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
- if (m->control_pressed) { remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; }
+ if (m->control_pressed) { m->mothurRemove(newFastaFile); m->mothurRemove(newNamesFile); return 0; }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
m->mothurOut(newNamesFile); m->mothurOutEndLine();
outputNames.push_back(newFastaFile); outputNames.push_back(newNamesFile); outputTypes["fasta"].push_back(newFastaFile); outputTypes["name"].push_back(newNamesFile);
m->mothurOutEndLine();
+
+ //set fasta file as new current fastafile
+ string current = "";
+ itTypes = outputTypes.find("fasta");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+ }
+
+ itTypes = outputTypes.find("name");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
+ }
return 0;
int percentDiff = floor((seq2.length() * (percent / 100.0)));
if (percentDiff < totalDiffs) { totalDiffs = percentDiff; }
}
-
+
Alignment* alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (seq1.length()+totalDiffs+1));
//use needleman to align
}
if (numDiffs <= totalDiffs) { fragment = true; }
+
}
return fragment;