names.push_back(temp.getName());
database->addSequence(temp);
}
- database->generateDB();
+ if ((method == "kmer") && (!shortcuts)) {;} //don't print
+ else {database->generateDB(); }
}else if ((method == "kmer") && (!needToGenerate)) {
ifstream kmerFileTest(kmerDBName.c_str());
database->readKmerDB(kmerFileTest);
}
fastaFile.close();
- database->generateDB();
+ if ((method == "kmer") && (!shortcuts)) {;} //don't print
+ else {database->generateDB(); }
}else if ((method == "kmer") && (!needToGenerate)) {
ifstream kmerFileTest(kmerDBName.c_str());
}
}
/************************************************************/
+//returns the names of all unique sequences in file mapped to their seqCounts
+map<string, int> CountTable::getNameMap() {
+ try {
+ map<string, int> names;
+ for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+ names[it->first] = totals[it->second];
+ }
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getNameMap");
+ exit(1);
+ }
+}
+/************************************************************/
//returns the names of all unique sequences in file
vector<string> CountTable::getNamesOfSeqs(string group) {
try {
vector<string> getNamesOfSeqs(string);
int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
ListVector getListVector();
+ map<string, int> getNameMap();
private:
string filename;
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "count") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "pick.count.table"; }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
vector<string> GetLineageCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
string GetLineageCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n";
+ helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
- helpString += "The get.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n";
+ helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, taxonomy, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n";
helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "pick.count.table"; }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "alignreport") { outputFileName = "pick.align.report"; }
outputTypes["group"] = tempOutNames;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
outputTypes["group"] = tempOutNames;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
else { temp = "false"; usedDups = ""; }
}
dups = m->isTrue(temp);
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
taxons = validParameter.validFile(parameters, "taxon", false);
if (taxons == "not found") { taxons = ""; m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine(); abort = true; }
}
m->splitAtChar(taxons, listOfTaxons, '-');
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+ if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
- if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
- vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+ }
}
}
if (abort == true) { if (calledHelp) { return 0; } return 2; }
if (m->control_pressed) { return 0; }
+
+ if (countfile != "") {
+ if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
+ m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+ }
+ }
//read through the correct file and output lines you want to keep
if (taxfile != "") { readTax(); } //fills the set of names to get
if (namefile != "") { readName(); }
if (fastafile != "") { readFasta(); }
+ if (countfile != "") { readCount(); }
if (groupfile != "") { readGroup(); }
if (alignfile != "") { readAlign(); }
if (listfile != "") { readList(); }
itTypes = outputTypes.find("taxonomy");
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
- }
+ }
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
}
return 0;
in.close();
out.close();
- if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
+ if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
return 0;
}
}
//**********************************************************************************************************************
+int GetLineageCommand::readCount(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(countfile, in);
+
+ bool wroteSomething = false;
+
+ string headers = m->getline(in); m->gobble(in);
+ out << headers << endl;
+
+ string name, rest; int thisTotal;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+
+ in >> name; m->gobble(in);
+ in >> thisTotal; m->gobble(in);
+ rest = m->getline(in); m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+
+ if (names.count(name) != 0) {
+ out << name << '\t' << thisTotal << '\t' << rest << endl;
+ wroteSomething = true;
+ }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
+ outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetLineageCommand", "readCount");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
int GetLineageCommand::readList(){
try {
string thisOutputDir = outputDir;
in.close();
out.close();
- if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
+ if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
return 0;
in.close();
out.close();
- if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
+ if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
return 0;
in.close();
out.close();
- if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
+ if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
return 0;
in.close();
out.close();
- if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
+ if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
return 0;
private:
set<string> names;
vector<string> outputNames, listOfTaxons;
- string fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir, taxons;
+ string fastafile, namefile, groupfile, alignfile, countfile, listfile, taxfile, outputDir, taxons;
bool abort, dups;
int readFasta();
int readName();
+ int readCount();
int readGroup();
int readAlign();
int readList();
vector<string> GetSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
string GetSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
+ helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
outputTypes["accnosreport"] = tempOutNames;
}
catch(exception& e) {
if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "pick.count.table"; }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "qfile") { outputFileName = "pick" + m->getExtension(inputName); }
outputTypes["list"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
outputTypes["accnosreport"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["qfile"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
if (accnosfile2 == "not open") { abort = true; }
else if (accnosfile2 == "not found") { accnosfile2 = ""; }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
string usedDups = "true";
string temp = validParameter.validFile(parameters, "dups", false); if (temp == "not found") { temp = "true"; usedDups = ""; }
dups = m->isTrue(temp);
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
-
- if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
- vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
- parser.getNameFile(files);
- }
+ if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
+
+ if (countfile == "") {
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+ }
}
}
names = m->readAccnos(accnosfile);
if (m->control_pressed) { return 0; }
+
+ if (countfile != "") {
+ if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
+ m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+ }
+ }
//read through the correct file and output lines you want to keep
if (namefile != "") { readName(); }
if (fastafile != "") { readFasta(); }
if (groupfile != "") { readGroup(); }
+ if (countfile != "") { readCount(); }
if (alignfile != "") { readAlign(); }
if (listfile != "") { readList(); }
if (taxfile != "") { readTax(); }
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
}
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
}
return 0;
exit(1);
}
}
+//**********************************************************************************************************************
+int GetSeqsCommand::readCount(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(countfile, in);
+
+ bool wroteSomething = false;
+ int selectedCount = 0;
+
+ string headers = m->getline(in); m->gobble(in);
+ out << headers << endl;
+
+ string name, rest; int thisTotal;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+
+ in >> name; m->gobble(in);
+ in >> thisTotal; m->gobble(in);
+ rest = m->getline(in); m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+
+ if (names.count(name) != 0) {
+ out << name << '\t' << thisTotal << '\t' << rest << endl;
+ wroteSomething = true;
+ selectedCount+= thisTotal;
+ }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
+ outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetSeqsCommand", "readCount");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
int GetSeqsCommand::readList(){
try {
private:
set<string> names;
vector<string> outputNames;
- string accnosfile, accnosfile2, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
+ string accnosfile, accnosfile2, fastafile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
bool abort, dups;
//for debug
int readFasta();
int readName();
int readGroup();
+ int readCount();
int readAlign();
int readList();
int readTax();
try {
CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);
CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
try {
string helpString = "";
ValidCalculators validCalculator;
- helpString += "The heatmap.sim command parameters are shared, phylip, column, name, groups, calc, fontsize and label. shared or phylip or column and name are required unless valid current files exist.\n";
- helpString += "There are two ways to use the heatmap.sim command. The first is with the read.otu command. \n";
- helpString += "With the read.otu command you may use the groups, label and calc parameters. \n";
+ helpString += "The heatmap.sim command parameters are shared, phylip, column, name, count, groups, calc, fontsize and label. shared or phylip or column and name are required unless valid current files exist.\n";
+ helpString += "There are two ways to use the heatmap.sim command. The first is with a shared file, and you may use the groups, label and calc parameter. \n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n";
helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n";
helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["shared"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//required parameters
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
//error checking on files
if ((sharedfile == "") && ((phylipfile == "") && (columnfile == ""))) {
namefile = m->getNameFile();
if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
else {
- m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
- abort = true;
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a name or count file if you are going to use the column format."); m->mothurOutEndLine();
+ abort = true;
+ }
}
}
}
in.close();
}else {
//read names file
- NameAssignment* nameMap = new NameAssignment(namefile);
- nameMap->readMap();
-
- //put names in order in vector
- for (int i = 0; i < nameMap->size(); i++) {
- names.push_back(nameMap->get(i));
- }
-
- //resize matrix
- matrix.resize(nameMap->size());
- for (int i = 0; i < nameMap->size(); i++) {
- matrix[i].resize(nameMap->size(), 0.0);
- }
-
+ NameAssignment* nameMap;
+ CountTable ct;
+ if (namefile != "") {
+ nameMap = new NameAssignment(namefile);
+ nameMap->readMap();
+
+ //put names in order in vector
+ for (int i = 0; i < nameMap->size(); i++) {
+ names.push_back(nameMap->get(i));
+ }
+ }else if (countfile != "") {
+ nameMap = NULL;
+ ct.readTable(countfile);
+ names = ct.getNamesOfSeqs();
+ }
+
+ //resize matrix
+ matrix.resize(names.size());
+ for (int i = 0; i < names.size(); i++) {
+ matrix[i].resize(names.size(), 0.0);
+ }
+
//read column file
string first, second;
double dist;
if (m->control_pressed) { return 0; }
- map<string, int>::iterator itA = nameMap->find(first);
- map<string, int>::iterator itB = nameMap->find(second);
-
- if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + first + "' was not found in the names file, please correct\n"); exit(1); }
- if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + second + "' was not found in the names file, please correct\n"); exit(1); }
-
- //save distance
- matrix[itA->second][itB->second] = dist;
- matrix[itB->second][itA->second] = dist;
+ if (namefile != "") {
+ map<string, int>::iterator itA = nameMap->find(first);
+ map<string, int>::iterator itB = nameMap->find(second);
+
+ if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + first + "' was not found in the names file, please correct\n"); exit(1); }
+ if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + second + "' was not found in the names file, please correct\n"); exit(1); }
+
+ //save distance
+ matrix[itA->second][itB->second] = dist;
+ matrix[itB->second][itA->second] = dist;
+ }else if (countfile != "") {
+ int itA = ct.get(first);
+ int itB = ct.get(second);
+ matrix[itA][itB] = dist;
+ matrix[itB][itA] = dist;
+ }
}
in.close();
- delete nameMap;
+ if (namefile != "") { delete nameMap; }
}
OptionParser* parser;
bool abort, allLines;
set<string> labels; //holds labels to be used
- string format, groups, label, calc, sharedfile, phylipfile, columnfile, namefile, outputDir, inputfile;
+ string format, groups, label, calc, sharedfile, phylipfile, columnfile, countfile, namefile, outputDir, inputfile;
vector<string> Estimators, Groups, outputNames;
int fontsize;
#include "listseqscommand.h"
#include "sequence.hpp"
#include "listvector.hpp"
+#include "counttable.h"
//**********************************************************************************************************************
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pfasta);
CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pcount);
CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pgroup);
CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(plist);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
string ListSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The list.seqs command reads a fasta, name, group, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
- helpString += "The list.seqs command parameters are fasta, name, group, list, taxonomy and alignreport. You must provide one of these parameters.\n";
+ helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
+ helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport. You must provide one of these parameters.\n";
helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
helpString += "Example list.seqs(fasta=amazon.fasta).\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
if (taxfile == "not open") { abort = true; }
else if (taxfile == "not found") { taxfile = ""; }
else { m->setTaxonomyFile(taxfile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
- if ((fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
+ if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
int okay = 1;
if (outputDir != "") { okay++; }
else if (alignfile != "") { inputFileName = alignfile; readAlign(); }
else if (listfile != "") { inputFileName = listfile; readList(); }
else if (taxfile != "") { inputFileName = taxfile; readTax(); }
+ else if (countfile != "") { inputFileName = countfile; readCount(); }
if (m->control_pressed) { outputTypes.clear(); return 0; }
Sequence currSeq(in);
name = currSeq.getName();
- //if (lastName == "") { lastName = name; }
- //if (name != lastName) { count = 1; }
- // lastName = name;
-
- //Sequence newSeq(name+"_"+toString(count), currSeq.getAligned());
- //newSeq.printSequence(out);
if (name != "") { names.push_back(name); }
exit(1);
}
}
-
+//**********************************************************************************************************************
+int ListSeqsCommand::readCount(){
+ try {
+ CountTable ct;
+ ct.readTable(countfile);
+
+ if (m->control_pressed) { return 0; }
+
+ names = ct.getNamesOfSeqs();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListSeqsCommand", "readCount");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
//alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
int ListSeqsCommand::readAlign(){
private:
vector<string> names, outputNames;
- string fastafile, namefile, groupfile, alignfile, inputFileName, outputDir, listfile, taxfile;
+ string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile;
bool abort;
int readFasta();
int readAlign();
int readList();
int readTax();
+ int readCount();
};
MOTHUR_FILES="\"Enter_your_default_path_here\""
RELEASE_DATE = "\"7/9/2012\""
VERSION = "\"1.26.0\""
-FORTAN_COMPILER = gfortran
+FORTAN_COMPILER = /usr/local/gfortran/bin/gfortran
FORTRAN_FLAGS =
# Optimize to level 3:
else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
- else if (type == "count") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "pick.count.table"; }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
vector<string> RemoveLineageCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
string RemoveLineageCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n";
+ helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
- helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n";
+ helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n";
helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
+ else if (type == "count") { outputFileName = "pick.count.table"; }
else if (type == "alignreport") { outputFileName = "pick.align.report"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
outputTypes["group"] = tempOutNames;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
outputTypes["group"] = tempOutNames;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
}else { m->setTaxonomyFile(taxfile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
string usedDups = "true";
string temp = validParameter.validFile(parameters, "dups", false);
if (temp == "not found") {
}
m->splitAtChar(taxons, listOfTaxons, '-');
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+ if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; }
- if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
- vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+ }
}
if (abort == true) { if (calledHelp) { return 0; } return 2; }
if (m->control_pressed) { return 0; }
+
+ if (countfile != "") {
+ if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
+ m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+ }
+ }
//read through the correct file and output lines you want to keep
if (taxfile != "") { readTax(); } //fills the set of names to remove
if (groupfile != "") { readGroup(); }
if (alignfile != "") { readAlign(); }
if (listfile != "") { readList(); }
+ if (countfile != "") { readCount(); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
}
return 0;
exit(1);
}
}
-
+//**********************************************************************************************************************
+int RemoveLineageCommand::readCount(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(countfile, in);
+
+ bool wroteSomething = false;
+
+ string headers = m->getline(in); m->gobble(in);
+ out << headers << endl;
+
+ string name, rest; int thisTotal;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+
+ in >> name; m->gobble(in);
+ in >> thisTotal; m->gobble(in);
+ rest = m->getline(in); m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+
+ if (names.count(name) == 0) {
+ out << name << '\t' << thisTotal << '\t' << rest << endl;
+ wroteSomething = true;
+ }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
+ outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveLineageCommand", "readCount");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
int RemoveLineageCommand::readGroup(){
try {
private:
set<string> names;
vector<string> outputNames, listOfTaxons;
- string fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir, taxons;
+ string fastafile, namefile, groupfile, alignfile, listfile, countfile, taxfile, outputDir, taxons;
bool abort, dups;
int readFasta();
int readName();
int readGroup();
+ int readCount();
int readAlign();
int readList();
int readTax();
vector<string> RemoveSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
string RemoveSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
+ helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
- helpString += "The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos and at least one of the file parameters.\n";
+ helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport and dups. You must provide accnos and at least one of the file parameters.\n";
helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "qfile") { outputFileName = "pick" + m->getExtension(inputName); }
else if (type == "alignreport") { outputFileName = "pick.align.report"; }
+ else if (type == "count") { outputFileName = "pick.count.table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["qfile"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
else { temp = "false"; usedDups = ""; }
}
dups = m->isTrue(temp);
-
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
-
- if ((fastafile != "") && (namefile == "")) {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
+ if ((countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+
+ if (countfile == "") {
+ if ((fastafile != "") && (namefile == "")) {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
names = m->readAccnos(accnosfile);
if (m->control_pressed) { return 0; }
+
+ if (countfile != "") {
+ if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
+ m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+ }
+ }
//read through the correct file and output lines you want to keep
if (namefile != "") { readName(); }
if (listfile != "") { readList(); }
if (taxfile != "") { readTax(); }
if (qualfile != "") { readQual(); }
+ if (countfile != "") { readCount(); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
itTypes = outputTypes.find("qfile");
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
- }
+ }
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
}
return 0;
}
}
//**********************************************************************************************************************
+int RemoveSeqsCommand::readCount(){
+ try {
+
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ ifstream in;
+ m->openInputFile(countfile, in);
+
+ bool wroteSomething = false;
+ int removedCount = 0;
+
+ string headers = m->getline(in); m->gobble(in);
+ out << headers << endl;
+
+ string name, rest; int thisTotal;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+
+ in >> name; m->gobble(in);
+ in >> thisTotal; m->gobble(in);
+ rest = m->getline(in); m->gobble(in);
+ if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+
+ if (names.count(name) == 0) {
+ out << name << '\t' << thisTotal << '\t' << rest << endl;
+ wroteSomething = true;
+ }else { removedCount += thisTotal; }
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
+ outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveSeqsCommand", "readCount");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
int RemoveSeqsCommand::readList(){
try {
string thisOutputDir = outputDir;
private:
set<string> names;
- string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
+ string accnosfile, fastafile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir;
bool abort, dups;
vector<string> outputNames;
int readFasta();
int readName();
int readGroup();
+ int readCount();
int readAlign();
int readList();
int readTax();
#include "secondarystructurecommand.h"
#include "sequence.hpp"
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> AlignCheckCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pmap);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string AlignCheckCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The align.check command reads a fasta file and map file as well as an optional name file.\n";
+ helpString += "The align.check command reads a fasta file and map file as well as an optional name or count file.\n";
helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n";
helpString += "The align.check command parameters are fasta and map, both are required.\n";
helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
outputDir = "";
outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it
}
- if ((namefile == "") && (fastafile != "")){
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if ((namefile == "") && (fastafile != "")){
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
readMap();
if (namefile != "") { nameMap = m->readNames(namefile); }
+ else if (countfile != "") {
+ CountTable ct;
+ ct.readTable(countfile);
+ nameMap = ct.getNameMap();
+ }
if (m->control_pressed) { return 0; }
if (haderror == 1) { m->control_pressed = true; break; }
int num = 1;
- if (namefile != "") {
+ if ((namefile != "") || (countfile != "")) {
//make sure this sequence is in the namefile, else error
map<string, int>::iterator it = nameMap.find(seq.getName());
m->mothurOut("75%-tile:\t" + toString(pound[ptile75]) + "\t" + toString(dash[ptile75]) + "\t" + toString(plus[ptile75]) + "\t" + toString(equal[ptile75]) + "\t" + toString(loop[ptile75]) + "\t" + toString(tilde[ptile75]) + "\t" + toString(total[ptile75])); m->mothurOutEndLine();
m->mothurOut("97.5%-tile:\t" + toString(pound[ptile97_5]) + "\t" + toString(dash[ptile97_5]) + "\t" + toString(plus[ptile97_5]) + "\t" + toString(equal[ptile97_5]) + "\t" + toString(loop[ptile97_5]) + "\t" + toString(tilde[ptile97_5]) + "\t" + toString(total[ptile97_5])); m->mothurOutEndLine();
m->mothurOut("Maximum:\t" + toString(pound[ptile100]) + "\t" + toString(dash[ptile100]) + "\t" + toString(plus[ptile100]) + "\t" + toString(equal[ptile100]) + "\t" + toString(loop[ptile100]) + "\t" + toString(tilde[ptile100]) + "\t" + toString(total[ptile100])); m->mothurOutEndLine();
- if (namefile == "") { m->mothurOut("# of Seqs:\t" + toString(count)); m->mothurOutEndLine(); }
+ if ((namefile == "") && (countfile == "")) { m->mothurOut("# of Seqs:\t" + toString(count)); m->mothurOutEndLine(); }
else { m->mothurOut("# of unique seqs:\t" + toString(count)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); }
private:
vector<int> structMap;
- string mapfile, fastafile, outputDir, namefile;
+ string mapfile, fastafile, outputDir, namefile, countfile;
bool abort;
int seqLength, haderror;
vector<string> outputNames;
*/
#include "seqsummarycommand.h"
-
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> SeqSummaryCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
try {
string helpString = "";
helpString += "The summary.seqs command reads a fastafile and summarizes the sequences.\n";
- helpString += "The summary.seqs command parameters are fasta, name and processors, fasta is required, unless you have a valid current fasta file.\n";
+ helpString += "The summary.seqs command parameters are fasta, name, count and processors, fasta is required, unless you have a valid current fasta file.\n";
helpString += "The name parameter allows you to enter a name file associated with your fasta file. \n";
+ helpString += "The count parameter allows you to enter a count file associated with your fasta file. \n";
helpString += "The summary.seqs command should be in the following format: \n";
helpString += "summary.seqs(fasta=yourFastaFile, processors=2) \n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//initialize outputTypes
if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- if (namefile == "") {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
-
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
catch(exception& e) {
vector<int> longHomoPolymer;
if (namefile != "") { nameMap = m->readNames(namefile); }
+ else if (countfile != "") {
+ CountTable ct;
+ ct.readTable(countfile);
+ nameMap = ct.getNameMap();
+ }
if (m->control_pressed) { return 0; }
int size = startPosition.size();
//find means
- float meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
+ double meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
for (int i = 0; i < size; i++) {
meanStartPosition += startPosition[i];
meanAmbigBases += ambigBases[i];
meanLongHomoPolymer += longHomoPolymer[i];
}
+
//this is an int divide so the remainder is lost
meanStartPosition /= (float) size; meanEndPosition /= (float) size; meanLongHomoPolymer /= (float) size; meanSeqLength /= (float) size; meanAmbigBases /= (float) size;
m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
m->mothurOut("Mean:\t" + toString(meanStartPosition) + "\t" + toString(meanEndPosition) + "\t" + toString(meanSeqLength) + "\t" + toString(meanAmbigBases) + "\t" + toString(meanLongHomoPolymer)); m->mothurOutEndLine();
- if (namefile == "") { m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
+ if ((namefile == "") && (countfile == "")) { m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }
if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; }
if (current.getName() != "") {
int num = 1;
- if (namefile != "") {
+ if ((namefile != "") || (countfile != "")) {
//make sure this sequence is in the namefile, else error
map<string, int>::iterator it = nameMap.find(current.getName());
- if (it == nameMap.end()) { m->mothurOut("[ERROR]: '" + current.getName() + "' is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+ if (it == nameMap.end()) { m->mothurOut("[ERROR]: '" + current.getName() + "' is not in your name or count file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
else { num = it->second; }
}
if (current.getName() != "") {
int num = 1;
- if (namefile != "") {
+ if ((namefile != "") || (countfile != "")) {
//make sure this sequence is in the namefile, else error
map<string, int>::iterator it = nameMap.find(current.getName());
- if (it == nameMap.end()) { cout << "[ERROR]: " << current.getName() << " is not in your namefile, please correct." << endl; m->control_pressed = true; }
+ if (it == nameMap.end()) { cout << "[ERROR]: " << current.getName() << " is not in your name or count file, please correct." << endl; m->control_pressed = true; }
else { num = it->second; }
}
vector<seqSumData*> pDataArray;
DWORD dwThreadIdArray[processors-1];
HANDLE hThreadArray[processors-1];
-
+
+ bool hasNameMap = false;
+ if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
+
//Create processor worker threads.
for( int i=0; i<processors-1; i++ ){
string extension = "";
if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, namefile, nameMap);
+ seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, hasNameMap, nameMap);
pDataArray.push_back(tempSum);
//MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
void help() { m->mothurOut(getHelpString()); }
private:
bool abort;
- string fastafile, outputDir, namefile;
+ string fastafile, outputDir, namefile, countfile;
int processors;
vector<string> outputNames;
map<string, int> nameMap;
unsigned long long end;
int count;
MothurOut* m;
- string namefile;
+ bool hasNameMap;
map<string, int> nameMap;
seqSumData(){}
- seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map<string, int> nam) {
+ seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, bool na, map<string, int> nam) {
filename = f;
sumFile = sf;
m = mout;
start = st;
end = en;
- namefile = na;
+ hasNameMap = na;
nameMap = nam;
count = 0;
}
if (current.getName() != "") {
int num = 1;
- if (pDataArray->namefile != "") {
+ if (pDataArray->hasNameMap){
//make sure this sequence is in the namefile, else error
map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
- if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
+ if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your name or count file, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
else { num = it->second; }
}
*/
#include "summaryqualcommand.h"
-
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> SummaryQualCommand::setParameters(){
try {
CommandParameter pqual("qfile", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pqual);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string SummaryQualCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The summary.qual command reads a quality file and an optional name file, and summarizes the quality information.\n";
- helpString += "The summary.tax command parameters are qfile, name and processors. qfile is required, unless you have a valid current quality file.\n";
+ helpString += "The summary.qual command reads a quality file and an optional name or count file, and summarizes the quality information.\n";
+ helpString += "The summary.tax command parameters are qfile, name, count and processors. qfile is required, unless you have a valid current quality file.\n";
helpString += "The name parameter allows you to enter a name file associated with your quality file. \n";
+ helpString += "The count parameter allows you to enter a count file associated with your quality file. \n";
helpString += "The summary.qual command should be in the following format: \n";
helpString += "summary.qual(qfile=yourQualityFile) \n";
helpString += "Note: No spaces between parameter labels (i.e. qfile), '=' and parameters (i.e.yourQualityFile).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//initialize outputTypes
if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- if (namefile == "") {
- vector<string> files; files.push_back(qualfile);
- parser.getNameFile(files);
- }
+
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(qualfile);
+ parser.getNameFile(files);
+ }
+ }
}
}
catch(exception& e) {
if (m->control_pressed) { return 0; }
if (namefile != "") { nameMap = m->readNames(namefile); }
-
+ else if (countfile != "") {
+ CountTable ct;
+ ct.readTable(countfile);
+ nameMap = ct.getNameMap();
+ }
+
vector<unsigned long long> positions;
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
positions = m->divideFile(qualfile, processors);
if (current.getName() != "") {
int num = 1;
- if (namefile != "") {
+ if ((namefile != "") || (countfile != "")) {
//make sure this sequence is in the namefile, else error
map<string, int>::iterator it = nameMap.find(current.getName());
DWORD dwThreadIdArray[processors];
HANDLE hThreadArray[processors];
+ bool hasNameMap = false;
+ if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
+
//Create processor worker threads.
for( int i=0; i<processors; i++ ){
// Allocate memory for thread data.
- seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, namefile, nameMap);
+ seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, hasNameMap, nameMap);
pDataArray.push_back(tempSum);
processIDS.push_back(i);
if (m->control_pressed) { out.close(); return 0; }
- float average = averageQ[i] / (float) position[i];
+ double average = averageQ[i] / (float) position[i];
out << i << '\t' << position[i] << '\t' << average << '\t';
for (int j = 0; j < 41; j++) {
private:
bool abort;
- string qualfile, outputDir, namefile;
+ string qualfile, outputDir, namefile, countfile;
vector<string> outputNames;
map<string, int> nameMap;
int processors;
vector<int> position;
vector<int> averageQ;
vector< vector<int> > scores;
- string filename, namefile;
+ string filename;
unsigned long long start;
unsigned long long end;
int count;
MothurOut* m;
+ bool hasNameMap;
map<string, int> nameMap;
~seqSumQualData(){}
- seqSumQualData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string n, map<string, int> nam) {
+ seqSumQualData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, bool n, map<string, int> nam) {
filename = f;
m = mout;
start = st;
end = en;
- namefile = n;
+ hasNameMap = n;
nameMap = nam;
count = 0;
}
if (current.getName() != "") {
int num = 1;
- if (pDataArray->namefile != "") {
+ if (pDataArray->hasNameMap) {
//make sure this sequence is in the namefile, else error
map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
#include "needlemanoverlap.hpp"
#include "trimoligos.h"
+
//**********************************************************************************************************************
vector<string> TrimSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
string helpString = "";
helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
- helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+ helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
helpString += "The fasta parameter is required.\n";
helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
helpString += "The oligos parameter allows you to provide an oligos file.\n";
helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
+ helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
else if (type == "fasta") { outputFileName = "fasta"; }
else if (type == "group") { outputFileName = "groups"; }
else if (type == "name") { outputFileName = "names"; }
+ else if (type == "count") { outputFileName = "count.table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
outputTypes["qfile"] = tempOutNames;
outputTypes["group"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
outputTypes["qfile"] = tempOutNames;
outputTypes["group"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
if (temp == "not found") { nameFile = ""; }
else if(temp == "not open") { nameFile = ""; abort = true; }
else { nameFile = temp; m->setNameFile(nameFile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; }
m->mothurConvert(temp, qThreshold);
abort = true;
}
- if (nameFile == "") {
- vector<string> files; files.push_back(fastaFile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (nameFile == "") {
+ vector<string> files; files.push_back(fastaFile);
+ parser.getNameFile(files);
+ }
+ }
}
}
outputTypes["name"].push_back(trimNameFile);
outputTypes["name"].push_back(scrapNameFile);
}
+
+ string trimCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "trim." + getOutputFileNameTag("count");
+ string scrapCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "scrap." + getOutputFileNameTag("count");
+
+ if (countfile != "") {
+ CountTable ct;
+ ct.readTable(countfile);
+ nameCount = ct.getNameMap();
+ outputNames.push_back(trimCountFile);
+ outputNames.push_back(scrapCountFile);
+ outputTypes["count"].push_back(trimCountFile);
+ outputTypes["count"].push_back(scrapCountFile);
+ }
+
if (m->control_pressed) { return 0; }
string outputGroupFileName;
if(oligoFile != ""){
createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
- if (createGroup) {
+ if ((createGroup) && (countfile == "")){
outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
}
setLines(fastaFile, qFileName);
if(processors == 1){
- driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+ driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
}else{
- createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
+ createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames);
}
for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
outputNames = outputNames2;
- for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
- ifstream in;
- m->openInputFile(it->first, in);
-
- ofstream out;
- string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
- outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
- m->openOutputFile(thisGroupName, out);
-
- while (!in.eof()){
- if (m->control_pressed) { break; }
-
- Sequence currSeq(in); m->gobble(in);
- out << currSeq.getName() << '\t' << it->second << endl;
+ for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
+ ifstream in;
+ m->openInputFile(it->first, in);
+
+ ofstream out;
+ string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
+ if (countfile == "") { thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
+ else { thisGroupName += getOutputFileNameTag("count"); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName); }
+ m->openOutputFile(thisGroupName, out);
+
+ if (countfile != "") { out << "Representative_Sequence\ttotal\t" << it->second << endl; }
+
+ while (!in.eof()){
+ if (m->control_pressed) { break; }
- if (nameFile != "") {
- map<string, string>::iterator itName = nameMap.find(currSeq.getName());
- if (itName != nameMap.end()) {
- vector<string> thisSeqsNames;
- m->splitAtChar(itName->second, thisSeqsNames, ',');
- for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
- out << thisSeqsNames[k] << '\t' << it->second << endl;
- }
- }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
+ Sequence currSeq(in); m->gobble(in);
+ if (countfile == "") {
+ out << currSeq.getName() << '\t' << it->second << endl;
+
+ if (nameFile != "") {
+ map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+ if (itName != nameMap.end()) {
+ vector<string> thisSeqsNames;
+ m->splitAtChar(itName->second, thisSeqsNames, ',');
+ for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+ out << thisSeqsNames[k] << '\t' << it->second << endl;
+ }
+ }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
+ }
+ }else {
+ map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
+ if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
+ else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
}
- }
- in.close();
- out.close();
- }
+ }
+ in.close();
+ out.close();
+ }
+
+ if (countfile != "") { //create countfile with group info included
+ CountTable* ct = new CountTable();
+ ct->readTable(trimCountFile);
+ map<string, int> justTrimmedNames = ct->getNameMap();
+ delete ct;
+
+ CountTable newCt;
+ for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
+ vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
+ for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
+ newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
+ map<string, string>::iterator it2 = groupMap.find(itNames->first);
+ if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
+ else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ }
+ newCt.printTable(trimCountFile);
+ }
}
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
}
/**************************************************************************************/
-
-int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
+int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {
try {
m->openOutputFile(scrapNFileName, scrapNameFile);
}
+ ofstream trimCountFile;
+ ofstream scrapCountFile;
+ if(countfile != ""){
+ m->openOutputFile(trimCFileName, trimCountFile);
+ m->openOutputFile(scrapCFileName, scrapCountFile);
+ if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
+ }
ofstream outGroupsFile;
- if (createGroup){ m->openOutputFile(groupFileName, outGroupsFile); }
+ if ((createGroup) && (countfile == "")){ m->openOutputFile(groupFileName, outGroupsFile); }
if(allFiles){
for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
if (m->control_pressed) {
inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
- if (createGroup) { outGroupsFile.close(); }
-
- if(qFileName != ""){
- qFile.close();
- }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
-
- return 0;
+ if ((createGroup) && (countfile == "")) { outGroupsFile.close(); }
+ if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
+ if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
+ if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;
}
int success = 1;
if (itName != nameMap.end()) { trimNameFile << itName->first << '\t' << itName->second << endl; }
else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
}
+
+ int numRedundants = 0;
+ if (countfile != "") {
+ map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
+ if (itCount != nameCount.end()) {
+ trimCountFile << itCount->first << '\t' << itCount->second << endl;
+ numRedundants = itCount->second-1;
+ }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
+ }
if (createGroup) {
if(barcodes.size() != 0){
if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
- outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+ if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
+ else { groupMap[currSeq.getName()] = thisGroup; }
- int numRedundants = 0;
if (nameFile != "") {
map<string, string>::iterator itName = nameMap.find(currSeq.getName());
if (itName != nameMap.end()) {
if (itName != nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
}
+ if (countfile != "") {
+ map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
+ if (itCount != nameCount.end()) {
+ trimCountFile << itCount->first << '\t' << itCount->second << endl;
+ }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
+ }
+
currSeq.setName(currSeq.getName() + '|' + trashCode);
currSeq.setUnaligned(origSeq);
currSeq.setAligned(origSeq);
if (createGroup) { outGroupsFile.close(); }
if(qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
if(nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
+ if(countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
return count;
}
/**************************************************************************************************/
-int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
+int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
try {
int process = 1;
(scrapQualFileName + toString(getpid()) + ".temp"),
(trimNameFileName + toString(getpid()) + ".temp"),
(scrapNameFileName + toString(getpid()) + ".temp"),
+ (trimCountFileName + toString(getpid()) + ".temp"),
+ (scrapCountFileName + toString(getpid()) + ".temp"),
(groupFile + toString(getpid()) + ".temp"),
tempFASTAFileNames,
tempPrimerQualFileNames,
for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
out << it->first << '\t' << it->second << endl;
}
+
+ out << groupMap.size() << endl;
+ for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
+ out << it->first << '\t' << it->second << endl;
+ }
out.close();
}
exit(0);
m->openOutputFile(trimNameFileName, temp); temp.close();
m->openOutputFile(scrapNameFileName, temp); temp.close();
}
+ if (countfile != "") {
+ m->openOutputFile(trimCountFileName, temp); temp.close();
+ m->openOutputFile(scrapCountFileName, temp); temp.close();
+ }
- driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+ driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
trimData* tempTrim = new trimData(filename,
- qFileName, nameFile,
+ qFileName, nameFile, countfile,
(trimFASTAFileName+extension),
(scrapFASTAFileName+extension),
(trimQualFileName+extension),
(scrapQualFileName+extension),
(trimNameFileName+extension),
(scrapNameFileName+extension),
+ (trimCountFileName+extension),
+ (scrapCountFileName+extension),
(groupFile+extension),
tempFASTAFileNames,
tempPrimerQualFileNames,
pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer,
primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
- minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
+ minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
pDataArray.push_back(tempTrim);
hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
m->openOutputFile(scrapNameFileName, temp); temp.close();
}
- driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+ driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
processIDS.push_back(processors-1);
if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
else { groupCounts[it->first] += it->second; }
}
+ for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
+ map<string, string>::iterator it2 = groupMap.find(it->first);
+ if (it2 == groupMap.end()) { groupMap[it->first] = it->second; }
+ else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
+ }
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
}
+
+ if(countfile != ""){
+ m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
+ m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
+ m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
+ m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
+ }
- if(createGroup){
+ if((createGroup)&&(countfile == "")){
m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
}
in >> tempNum; m->gobble(in);
if (tempNum != 0) {
- while (!in.eof()) {
- in >> group >> tempNum; m->gobble(in);
+ for (int i = 0; i < tempNum; i++) {
+ int groupNum;
+ in >> group >> groupNum; m->gobble(in);
map<string, int>::iterator it = groupCounts.find(group);
- if (it == groupCounts.end()) { groupCounts[group] = tempNum; }
- else { groupCounts[it->first] += tempNum; }
+ if (it == groupCounts.end()) { groupCounts[group] = groupNum; }
+ else { groupCounts[it->first] += groupNum; }
}
}
+ in >> tempNum; m->gobble(in);
+ if (tempNum != 0) {
+ for (int i = 0; i < tempNum; i++) {
+ string group, seqName;
+ in >> seqName >> group; m->gobble(in);
+
+ map<string, string>::iterator it = groupMap.find(seqName);
+ if (it == groupMap.end()) { groupMap[seqName] = group; }
+ else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n"); }
+ }
+ }
+
in.close(); m->mothurRemove(tempFile);
}
#endif
string fastaFileName = "";
string qualFileName = "";
string nameFileName = "";
+ string countFileName = "";
if(primerName == ""){
comboGroupName = barcodeNameVector[itBar->second];
nameFileNames[itBar->second][itPrimer->second] = nameFileName;
m->openOutputFile(nameFileName, temp); temp.close();
}
-
}
}
}
#include "command.hpp"
#include "sequence.hpp"
#include "qualityscores.h"
-#include "groupmap.h"
#include "trimoligos.h"
+#include "counttable.h"
class TrimSeqsCommand : public Command {
void help() { m->mothurOut(getHelpString()); }
private:
-
- GroupMap* groupMap;
-
struct linePair {
unsigned long long start;
unsigned long long end;
linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
linePair() {}
};
-
+
bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
bool keepFirstTrim(Sequence&, QualityScores&);
bool removeLastTrim(Sequence&, QualityScores&);
string reverseOligo(string);
bool abort, createGroup;
- string fastaFile, oligoFile, qFileName, groupfile, nameFile, outputDir;
+ string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
bool flip, allFiles, qtrim, keepforward;
int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
vector<string> barcodeNameVector; //needed here?
map<string, int> groupCounts;
map<string, string> nameMap;
+ map<string, int> nameCount; //for countfile name -> repCount
+ map<string, string> groupMap; //for countfile name -> group
vector<int> processIDS; //processid
vector<linePair> lines;
vector<linePair> qLines;
- int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);
- int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
+ int driverCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);
+ int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
int setLines(string, string);
};
struct trimData {
unsigned long long start, end;
MothurOut* m;
- string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, groupFileName, nameFile;
+ string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
vector<vector<string> > fastaFileNames;
vector<vector<string> > qualFileNames;
vector<vector<string> > nameFileNames;
map<string, int> barcodes;
map<string, int> rbarcodes;
map<string, int> primers;
+ map<string, int> nameCount;
vector<string> linker;
vector<string> spacer;
map<string, int> combos;
vector<string> barcodeNameVector;
map<string, int> groupCounts;
map<string, string> nameMap;
+ map<string, string> groupMap;
trimData(){}
- trimData(string fn, string qn, string nf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout,
+ trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout,
int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, map<string, int> rbar, vector<string> revP, vector<string> li, vector<string> spa,
vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
- int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm) {
+ int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm, map<string, int> ncount) {
filename = fn;
qFileName = qn;
nameFile = nf;
+ countfile = cf;
trimFileName = tn;
scrapFileName = sn;
trimQFileName = tqn;
scrapQFileName = sqn;
trimNFileName = tnn;
scrapNFileName = snn;
+ trimCFileName = tcn;
+ scrapCFileName = scn;
groupFileName = gn;
fastaFileNames = ffn;
qualFileNames = qfn;
qlineStart = qstart;
qlineEnd = qend;
m = mout;
+ nameCount = ncount;
pdiffs = pd;
bdiffs = bd;
ofstream outGroupsFile;
- if (pDataArray->createGroup){ pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile); }
+ if ((pDataArray->createGroup) && (pDataArray->countfile == "")){ pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile); }
if(pDataArray->allFiles){
for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
}
}
+ ofstream trimCountFile;
+ ofstream scrapCountFile;
+ if(pDataArray->countfile != ""){
+ pDataArray->m->openOutputFile(pDataArray->trimCFileName, trimCountFile);
+ pDataArray->m->openOutputFile(pDataArray->scrapCFileName, scrapCountFile);
+ if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
+ }
+
ifstream inFASTA;
pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
if (pDataArray->m->control_pressed) {
inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
- if (pDataArray->createGroup) { outGroupsFile.close(); }
+ if ((pDataArray->createGroup) && (pDataArray->countfile == "")) { outGroupsFile.close(); }
+ if(pDataArray->qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); }
+ if(pDataArray->nameFile != "") { scrapNameFile.close(); trimNameFile.close(); }
+ if(pDataArray->countfile != "") { scrapCountFile.close(); trimCountFile.close(); }
+
if(pDataArray->qFileName != ""){ qFile.close(); }
return 0;
}
else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
}
+ int numRedundants = 0;
+ if (pDataArray->countfile != "") {
+ map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+ if (itCount != pDataArray->nameCount.end()) {
+ trimCountFile << itCount->first << '\t' << itCount->second << endl;
+ numRedundants = itCount->second-1;
+ }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+ }
+
if (pDataArray->createGroup) {
if(pDataArray->barcodes.size() != 0){
string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
}
}
- outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+ if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
+ else { pDataArray->groupMap[currSeq.getName()] = thisGroup; }
- int numRedundants = 0;
if (pDataArray->nameFile != "") {
map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
if (itName != pDataArray->nameMap.end()) {
if (itName != pDataArray->nameMap.end()) { scrapNameFile << itName->first << '\t' << itName->second << endl; }
else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
}
+ if (pDataArray->countfile != "") {
+ map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+ if (itCount != pDataArray->nameCount.end()) {
+ trimCountFile << itCount->first << '\t' << itCount->second << endl;
+ }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+ }
currSeq.setName(currSeq.getName() + '|' + trashCode);
currSeq.setUnaligned(origSeq);
currSeq.setAligned(origSeq);