//**********************************************************************************************************************
vector<string> ClusterFragmentsCommand::setParameters(){
try {
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pdiffs);
- CommandParameter ppercent("percent", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppercent);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfasta);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
+ CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pdiffs);
+ CommandParameter ppercent("percent", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppercent);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
try {
string helpString = "";
helpString += "The cluster.fragments command groups sequences that are part of a larger sequence.\n";
- helpString += "The cluster.fragments command outputs a new fasta and name file.\n";
- helpString += "The cluster.fragments command parameters are fasta, name, diffs and percent. The fasta parameter is required, unless you have a valid current file. \n";
+ helpString += "The cluster.fragments command outputs a new fasta and name or count file.\n";
+ helpString += "The cluster.fragments command parameters are fasta, name, count, diffs and percent. The fasta parameter is required, unless you have a valid current file. \n";
helpString += "The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
helpString += "The diffs parameter allows you to set the number of differences allowed, default=0. \n";
helpString += "The percent parameter allows you to set percentage of differences allowed, default=0. percent=2 means if the number of difference is less than or equal to two percent of the length of the fragment, then cluster.\n";
}
}
//**********************************************************************************************************************
+string ClusterFragmentsCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "fasta") { pattern = "[filename],fragclust.fasta"; }
+ else if (type == "name") { pattern = "[filename],fragclust.names"; }
+ else if (type == "count") { pattern = "[filename],fragclust.count_table"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterFragmentsCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ClusterFragmentsCommand::ClusterFragmentsCommand(){
try {
abort = true; calledHelp = true;
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ClusterFragmentsCommand", "ClusterFragmentsCommand");
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
vector<string> myArray = setParameters();
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
}
else if (fastafile == "not open") { fastafile = ""; abort = true; }
+ else { m->setFastaFile(fastafile); }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
// ...at some point should added some additional type checking...
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not found") { namefile = ""; }
- else if (namefile == "not open") { abort = true; }
- else { readNameFile(); }
+ else if (namefile == "not open") { namefile = ""; abort = true; }
+ else { readNameFile(); m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { ct.readTable(countfile, true); m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.fragments command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
string temp;
temp = validParameter.validFile(parameters, "diffs", false); if (temp == "not found"){ temp = "0"; }
- convert(temp, diffs);
+ m->mothurConvert(temp, diffs);
temp = validParameter.validFile(parameters, "percent", false); if (temp == "not found"){ temp = "0"; }
- convert(temp, percent);
+ m->mothurConvert(temp, percent);
+
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
string jBases = alignSeqs[j].seq.getUnaligned();
if (isFragment(iBases, jBases)) {
- //merge
- alignSeqs[i].names += ',' + alignSeqs[j].names;
- alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
-
+ if (countfile != "") {
+ ct.mergeCounts(alignSeqs[i].names, alignSeqs[j].names);
+ }else {
+ //merge
+ alignSeqs[i].names += ',' + alignSeqs[j].names;
+ alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
+ }
alignSeqs[j].active = 0;
alignSeqs[j].numIdentical = 0;
count++;
alignSeqs[i].active = 0;
}//end if active i
- if(i % 100 == 0) { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); }
+ if(i % 100 == 0) { m->mothurOutJustToScreen(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)+"\n"); }
}
- if(numSeqs % 100 != 0) { m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); }
+ if(numSeqs % 100 != 0) { m->mothurOutJustToScreen(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)+"\n"); }
string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
-
- string newFastaFile = fileroot + "fragclust.fasta";
- string newNamesFile = fileroot + "names";
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ string newFastaFile = getOutputFileName("fasta", variables);
+ string newNamesFile = getOutputFileName("name", variables);
+ if (countfile != "") { newNamesFile = getOutputFileName("count", variables); }
if (m->control_pressed) { return 0; }
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
- if (m->control_pressed) { remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; }
+ if (m->control_pressed) { m->mothurRemove(newFastaFile); m->mothurRemove(newNamesFile); return 0; }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
return 0;
else{
seqRNode tempNode(itSize->second, seq, names[seq.getName()], seq.getUnaligned().length());
alignSeqs.push_back(tempNode);
- }
+ }
+ }else if(countfile != "") {
+ seqRNode tempNode(ct.getNumSeqs(seq.getName()), seq, seq.getName(), seq.getUnaligned().length());
+ alignSeqs.push_back(tempNode);
}else { //no names file, you are identical to yourself
seqRNode tempNode(1, seq, seq.getName(), seq.getUnaligned().length());
alignSeqs.push_back(tempNode);
ofstream outNames;
m->openOutputFile(newfasta, outFasta);
- m->openOutputFile(newname, outNames);
+ if (countfile == "") { m->openOutputFile(newname, outNames); }
for (int i = 0; i < alignSeqs.size(); i++) {
if (alignSeqs[i].numIdentical != 0) {
alignSeqs[i].seq.printSequence(outFasta);
- outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+ if (countfile == "") { outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; }
}
}
outFasta.close();
- outNames.close();
+ if (countfile == "") { outNames.close(); }
+ else { ct.printTable(newname); }
}
catch(exception& e) {
m->errorOut(e, "ClusterFragmentsCommand", "printData");
exit(1);
}
}
-
/**************************************************************************************************/