CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
+
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
CommandParameter pcutoff("cutoff", "Number", "", "0.5", "", "", "","",false,false); parameters.push_back(pcutoff);
try {
string helpString = "";
helpString += "The chimera.perseus command reads a fastafile and namefile or countfile and outputs potentially chimeric sequences.\n";
- helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, alpha and beta.\n";
+ helpString += "The chimera.perseus command parameters are fasta, name, group, cutoff, processors, dereplicate, alpha and beta.\n";
helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
helpString += "The count parameter allows you to provide a count file associated with your fasta file. A count or name file is required. \n";
helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
helpString += "The group parameter allows you to provide a group file. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
+ helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
helpString += "The alpha parameter .... The default is -5.54. \n";
helpString += "The beta parameter .... The default is 0.33. \n";
helpString += "The cutoff parameter .... The default is 0.50. \n";
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.33"; }
m->mothurConvert(temp, beta);
+
+ temp = validParameter.validFile(parameters, "dereplicate", false);
+ if (temp == "not found") {
+ if (groupfile != "") { temp = "false"; }
+ else { temp = "true"; }
+ }
+ dups = m->isTrue(temp);
}
}
catch(exception& e) {
if (m->control_pressed) { delete ct; delete cparser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
map<string, string> uniqueNames = cparser->getAllSeqsMap();
- numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+ if (!dups) {
+ numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+ }
delete cparser;
m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
if (m->control_pressed) { delete parser; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
map<string, string> uniqueNames = parser->getAllSeqsMap();
- numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+ if (!dups) {
+ numChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName);
+ }
delete parser;
m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
linePair(int i, int j) : start(i), end(j) {}
};
- bool abort, hasName, hasCount;
+ bool abort, hasName, hasCount, dups;
string fastafile, groupfile, countfile, outputDir, namefile;
int processors, alignLength;
double cutoff, alpha, beta;
CommandParameter pminbs("minbs", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pminbs);
CommandParameter psearch("search", "Multiple", "kmer-blast", "blast", "", "", "","",false,false); parameters.push_back(psearch);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+
CommandParameter prealign("realign", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(prealign);
CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "","fasta",false,false); parameters.push_back(ptrim);
CommandParameter psplit("split", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psplit);
CommandParameter pnumwanted("numwanted", "Number", "", "15", "", "", "","",false,false); parameters.push_back(pnumwanted);
CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "","",false,false); parameters.push_back(pdivergence);
+ CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
CommandParameter pparents("parents", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pparents);
CommandParameter pincrement("increment", "Number", "", "5", "", "", "","",false,false); parameters.push_back(pincrement);
CommandParameter pblastlocation("blastlocation", "String", "", "", "", "", "","",false,false); parameters.push_back(pblastlocation);
string helpString = "";
helpString += "The chimera.slayer command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n";
- helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n";
+ helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, dereplicate, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n";
helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n";
helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
#ifdef USE_MPI
helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
#endif
+ helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
helpString += "The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n";
helpString += "The split parameter allows you to check both pieces of non-chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n";
helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=50. \n";
temp = validParameter.validFile(parameters, "numwanted", false); if (temp == "not found") { temp = "15"; }
m->mothurConvert(temp, numwanted);
+
+ temp = validParameter.validFile(parameters, "dereplicate", false);
+ if (temp == "not found") {
+ if (groupfile != "") { temp = "false"; }
+ else { temp = "true"; }
+ }
+ dups = m->isTrue(temp);
blastlocation = validParameter.validFile(parameters, "blastlocation", false);
if (blastlocation == "not found") { blastlocation = ""; }
if (pid == 0) {
#endif
- totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, trimFastaFileName);
- m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine();
+ if (!dups) {
+ totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, trimFastaFileName);
+ m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine();
+ }
#ifdef USE_MPI
}
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait
int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, string, map<string, int>&, bool);
#endif
- bool abort, realign, trim, trimera, save, hasName, hasCount;
+ bool abort, realign, trim, trimera, save, hasName, hasCount, dups;
string fastafile, groupfile, templatefile, outputDir, search, namefile, countfile, blastlocation;
int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
float divR;
temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
skipgaps2 = m->isTrue(temp);
- string usedDups = "false";
+
temp = validParameter.validFile(parameters, "dereplicate", false);
if (temp == "not found") {
if (groupfile != "") { temp = "false"; }
- else { temp = "true"; usedDups = ""; }
+ else { temp = "true"; }
}
dups = m->isTrue(temp);
map<string, string> variables;
variables["[filename]"] = fileroot;
- if (countfile != "") { variables["[tag2]"] = "unique_list"; }
variables["[clustertag]"] = tag;
string sabundFileName = getOutputFileName("sabund", variables);
string rabundFileName = getOutputFileName("rabund", variables);
+ if (countfile != "") { variables["[tag2]"] = "unique_list"; }
string listFileName = getOutputFileName("list", variables);
if (countfile == "") {
map<string, string> variables;
variables["[filename]"] = fileroot;
- if (countfile != "") { variables["[tag2]"] = "unique_list"; }
variables["[clustertag]"] = tag;
string sabundFileName = getOutputFileName("sabund", variables);
string rabundFileName = getOutputFileName("rabund", variables);
+ if (countfile != "") { variables["[tag2]"] = "unique_list"; }
string listFileName = getOutputFileName("list", variables);
if (countfile == "") {
map<string, string> variables;
variables["[filename]"] = fileroot;
- if (countfile != "") { variables["[tag2]"] = "unique_list"; }
variables["[clustertag]"] = tag;
string sabundFileName = getOutputFileName("sabund", variables);
string rabundFileName = getOutputFileName("rabund", variables);
+ if (countfile != "") { variables["[tag2]"] = "unique_list"; }
string listFileName = getOutputFileName("list", variables);
if (countfile == "") {
CommandParameter pminpercent("minpercent", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercent);
CommandParameter pminabund("minabund", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminabund);
CommandParameter pmintotal("mintotal", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pmintotal);
+ CommandParameter pminnumsamples("minnumsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminnumsamples);
+ CommandParameter pminpercentsamples("minpercentsamples", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pminpercentsamples);
CommandParameter pmakerare("makerare", "Boolean", "", "T", "", "", "","",false,false,true); parameters.push_back(pmakerare);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
try {
string helpString = "";
helpString += "The filter.shared command is used to remove OTUs based on various critieria.\n";
- helpString += "The filter.shared command parameters are shared, minpercent, minabund, mintotal, makerare, groups and label. You must provide a shared file.\n";
+ helpString += "The filter.shared command parameters are shared, minpercent, minabund, mintotal, minnumsamples, minpercentsamples, makerare, groups and label. You must provide a shared file.\n";
helpString += "The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n";
helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
helpString += "The minabund parameter allows you indicate the minimum abundance required for each sample in a given OTU. If any samples abundance falls below the minimum, the OTU is removed. Default=0\n";
helpString += "The minpercent parameter allows you indicate the minimum relative abundance of an OTU. For example, if the OTUs total abundance across all samples is 8, and the total abundance across all OTUs is 1000, and minpercent=1. The OTU's relative abundance is 0.008, the minimum is 0.01, so the OTU will be removed. Default=0.\n";
+ helpString += "The minnumsamples parameter allows you indicate the minimum number of samples present in an OTU. If the number of samples present falls below the minimum, the OTU is removed. Default=0.\n";
+ helpString += "The minpercentsamples parameter allows you indicate the minimum percent of sample present in an OTU. For example, if the total number of samples is 10, the number present is 3, and the minpercentsamples=50. The OTU's precent of samples is 0.333, the minimum is 0.50, so the OTU will be removed. Default=0.\n";
helpString += "The mintotal parameter allows you indicate the minimum abundance required for a given OTU. If abundance across all samples falls below the minimum, the OTU is removed. Default=0.\n";
helpString += "The makerare parameter allows you indicate you want the abundances of any removed OTUs to be saved and a new \"rare\" OTU created with its abundances equal to the sum of the OTUs removed. This will preserve the number of reads in your dataset. Default=T\n";
else { setSomething = true; }
m->mothurConvert(temp, minTotal);
+ temp = validParameter.validFile(parameters, "minnumsamples", false);
+ if (temp == "not found"){ temp = "-1"; }
+ else { setSomething = true; }
+ m->mothurConvert(temp, minSamples);
+
temp = validParameter.validFile(parameters, "minpercent", false);
if (temp == "not found"){ temp = "-1"; }
else { setSomething = true; }
m->mothurConvert(temp, minPercent);
if (minPercent < 1) {} //already in percent form
else { minPercent = minPercent / 100.0; } //user gave us a whole number version so convert to %
+
+ temp = validParameter.validFile(parameters, "minpercentsamples", false);
+ if (temp == "not found"){ temp = "-1"; }
+ else { setSomething = true; }
+
+ m->mothurConvert(temp, minPercentSamples);
+ if (minPercentSamples < 1) {} //already in percent form
+ else { minPercentSamples = minPercentSamples / 100.0; } //user gave us a whole number version so convert to %
temp = validParameter.validFile(parameters, "makerare", false); if (temp == "not found"){ temp = "T"; }
makeRare = m->isTrue(temp);
if (percent < minPercent) { okay = false; }
}
+
+ if (okay && (minSamples != -1)) {
+ int samples = 0;
+ for (int j = 0; j < thislookup.size(); j++) {
+ if (thislookup[j]->getAbundance(i) != 0) { samples++; }
+ }
+ if (samples < minSamples) { okay = false; }
+ }
+
+ if (okay && (minPercentSamples != -0.01)) {
+ double samples = 0;
+ double total = thislookup.size();
+ for (int j = 0; j < thislookup.size(); j++) {
+ if (thislookup[j]->getAbundance(i) != 0) { samples++; }
+ }
+ double percent = samples / total;
+ if (percent < minPercentSamples) { okay = false; }
+ }
+
//did this OTU pass the filter criteria
if (okay) {
filteredLabels.push_back(saveBinLabels[i]);
set<string> labels; //holds labels to be used
string groups, label, outputDir, sharedfile;
vector<string> Groups, outputNames;
- int minAbund, minTotal;
- float minPercent;
+ int minAbund, minTotal, minSamples;
+ float minPercent, minPercentSamples;
int processShared(vector<SharedRAbundVector*>&);
map<string, string> variables;
variables["[filename]"] = fileroot;
- if (countfile != "") { variables["[tag2]"] = "unique_list"; }
variables["[clustertag]"] = tag;
string sabundFileName = getOutputFileName("sabund", variables);
string rabundFileName = getOutputFileName("rabund", variables);
+ if (countfile != "") { variables["[tag2]"] = "unique_list"; }
string listFileName = getOutputFileName("list", variables);
if (countfile == "") {
vector<string> ParseListCommand::setParameters(){
try {
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(plist);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pgroup);
+ CommandParameter pcount("count", "InputTypes", "", "", "CountGroup", "CountGroup", "none","",false,false,true); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "CountGroup", "none","",false,false,true); parameters.push_back(pgroup);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
string ParseListCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The parse.list command reads a list and group file and generates a list file for each group in the groupfile. \n";
- helpString += "The parse.list command parameters are list, group and label.\n";
- helpString += "The list and group parameters are required.\n";
+ helpString += "The parse.list command reads a list and group or count file and generates a list file for each group in the group or count file. \n";
+ helpString += "The parse.list command parameters are list, group, count and label.\n";
+ helpString += "The list and group or count parameters are required.\n";
+ helpString += "If a count file is provided, mothur assumes the list file contains only unique names.\n";
+ helpString += "If a group file is provided, mothur assumes the list file contains all names.\n";
helpString += "The label parameter is used to read specific labels in your input you want to use.\n";
helpString += "The parse.list command should be used in the following format: parse.list(list=yourListFile, group=yourGroupFile, label=yourLabels).\n";
helpString += "Example: parse.list(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
//check for required parameters
listfile = validParameter.validFile(parameters, "list", true);
}
}else { m->setListFile(listfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(listfile); }
- groupfile = validParameter.validFile(parameters, "group", true);
- if (groupfile == "not open") { abort = true; }
- else if (groupfile == "not found") {
- groupfile = m->getListFile();
- if (groupfile != "") {
- m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine();
- groupMap = new GroupMap(groupfile);
-
- int error = groupMap->readMap();
- if (error == 1) { abort = true; }
- }else { m->mothurOut("No valid current group file. You must provide a group file."); m->mothurOutEndLine(); abort = true; }
- }else {
- m->setGroupFile(groupfile);
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not found") { groupfile = ""; groupMap = NULL; }
+ else if (groupfile == "not open") { abort = true; groupfile = ""; groupMap = NULL; }
+ else {
+ m->setGroupFile(groupfile);
groupMap = new GroupMap(groupfile);
int error = groupMap->readMap();
if (error == 1) { abort = true; }
- }
-
- //do you have all files needed
- if ((listfile == "") || (groupfile == "")) { m->mothurOut("You must enter both a listfile and groupfile for the parse.list command. "); m->mothurOutEndLine(); abort = true; }
+ }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not found") { countfile = ""; }
+ else if (countfile == "not open") { abort = true; countfile = ""; }
+ else {
+ m->setCountTableFile(countfile);
+ ct.readTable(countfile);
+ if (!ct.hasGroupInfo()) {
+ abort = true;
+ m->mothurOut("[ERROR]: The parse.list command requires group info to be present in your countfile, quitting."); m->mothurOutEndLine();
+ }
+
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }else if ((groupfile == "") && (countfile == "")) {
+ m->mothurOut("[ERROR]: you must provide one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
//fill filehandles with neccessary ofstreams
int i;
ofstream* temp;
- vector<string> gGroups = groupMap->getNamesOfGroups();
+ vector<string> gGroups;
+ if (groupfile != "") { gGroups = groupMap->getNamesOfGroups(); }
+ else { gGroups = ct.getNamesOfGroups(); }
+
for (i=0; i<gGroups.size(); i++) {
temp = new ofstream;
filehandles[gGroups[i]] = temp;
set<string> processedLabels;
set<string> userLabels = labels;
- input = new InputData(listfile, "list");
- list = input->getListVector();
+ InputData input(listfile, "list");
+ list = input.getListVector();
string lastLabel = list->getLabel();
if (m->control_pressed) {
- delete input; delete list; delete groupMap;
- vector<string> gGroups = groupMap->getNamesOfGroups();
+ delete list; if (groupfile != "") { delete groupMap; }
for (i=0; i<gGroups.size(); i++) { (*(filehandles[gGroups[i]])).close(); delete filehandles[gGroups[i]]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
return 0;
while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
if (m->control_pressed) {
- delete input; delete list; delete groupMap;
+ delete list; if (groupfile != "") { delete groupMap; }
for (i=0; i<gGroups.size(); i++) { (*(filehandles[gGroups[i]])).close(); delete filehandles[gGroups[i]]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
return 0;
if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
string saveLabel = list->getLabel();
- delete list;
- list = input->getListVector(lastLabel); //get new list vector to process
+ list = input.getListVector(lastLabel); //get new list vector to process
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
parse(list);
lastLabel = list->getLabel();
delete list;
- list = input->getListVector(); //get new list vector to process
+ list = input.getListVector(); //get new list vector to process
}
if (m->control_pressed) {
- delete input; delete groupMap;
+ if (groupfile != "") { delete groupMap; }
for (i=0; i<gGroups.size(); i++) { (*(filehandles[gGroups[i]])).close(); delete filehandles[gGroups[i]]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
return 0;
}
if (m->control_pressed) {
- delete input; delete groupMap;
+ if (groupfile != "") { delete groupMap; }
for (i=0; i<gGroups.size(); i++) { (*(filehandles[gGroups[i]])).close(); delete filehandles[gGroups[i]]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
return 0;
//run last label if you need to
if (needToRun == true) {
if (list != NULL) { delete list; }
- list = input->getListVector(lastLabel); //get new list vector to process
+ list = input.getListVector(lastLabel); //get new list vector to process
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
parse(list);
delete it3->second;
}
-
- delete groupMap;
- delete input;
+ if (groupfile != "") { delete groupMap; }
if (m->control_pressed) {
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
//parse bin into list of sequences in each group
for (int j = 0; j < names.size(); j++) {
- string group = groupMap->getGroup(names[j]);
+ if (groupfile != "") {
+ string group = groupMap->getGroup(names[j]);
- if (group == "not found") { m->mothurOut(names[j] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); exit(1); }
+ if (group == "not found") { m->mothurOut(names[j] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); exit(1); }
- itGroup = groupBins.find(group);
- if(itGroup == groupBins.end()) {
- groupBins[group] = names[j]; //add first name
- groupNumBins[group]++;
- }else{ //add another name
- groupBins[group] = groupBins[group] + "," + names[j];
- }
+ itGroup = groupBins.find(group);
+ if(itGroup == groupBins.end()) {
+ groupBins[group] = names[j]; //add first name
+ groupNumBins[group]++;
+ }else{ //add another name
+ groupBins[group] = groupBins[group] + "," + names[j];
+ }
+ }else{
+ vector<string> thisSeqsGroups = ct.getGroups(names[j]);
+
+ for (int k = 0; k < thisSeqsGroups.size(); k++) {
+ string group = thisSeqsGroups[k];
+ itGroup = groupBins.find(group);
+ if(itGroup == groupBins.end()) {
+ groupBins[group] = names[j]; //add first name
+ groupNumBins[group]++;
+ }else{ //add another name
+ groupBins[group] = groupBins[group] + "," + names[j];
+ }
+
+ }
+ }
}
//print parsed bin info to files
ListVector* list;
GroupMap* groupMap;
- InputData* input;
+ CountTable ct;
ofstream out;
- string outputDir, listfile, groupfile, label;
+ string outputDir, listfile, groupfile, label, countfile;
set<string> labels;
bool abort, allLines;
vector<string> outputNames;
//somehow the parent is getting one too many accnos
//use print to reassign the taxa id
taxon = getNextTaxon(seqTaxonomy, seqName);
+
+ if (m->debug) { m->mothurOut(seqName +'\t' + taxon +'\n'); }
if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) { uniqueTaxonomies.insert(currentNode); } break; }
int counter = 1;
for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+
+ if (m->debug) { m->mothurOut(toString(index) +'\t' + tree[it->second].name +'\n'); }
+
tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
counter++;
tree[it->second].level = tree[index].level + 1;
}
}
+ if (m->debug) { m->mothurOut("maxLevel = " + toString(maxLevel) +'\n'); }
+
int copyNodes = copy.size();
//go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
int level = copy[itLeaf->second].level;
int currentNode = itLeaf->second;
+
+ if (m->debug) { m->mothurOut(copy[currentNode].name +'\n'); }
//this sequence is unclassified at some levels
while(level < maxLevel){
level++;
+ if (m->debug) { m->mothurOut("level = " + toString(level) +'\n'); }
string taxon = "unclassified";
int SensSpecCommand::execute(){
try{
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
+
+ int startTime = time(NULL);
+
+ //create list file with only unique names, saves time and memory by removing redundant names from list file that are not in the distance file.
+ string newListFile = preProcessList();
+ if (newListFile != "") { listFile = newListFile; }
+
setUpOutput();
outputNames.push_back(sensSpecFileName); outputTypes["sensspec"].push_back(sensSpecFileName);
if(format == "phylip") { processPhylip(); }
else if(format == "column") { processColumn(); }
+ //remove temp file if created
+ if (newListFile != "") { m->mothurRemove(newListFile); }
+
if (m->control_pressed) { m->mothurRemove(sensSpecFileName); return 0; }
-
+
+ m->mothurOut("It took " + toString(time(NULL) - startTime) + " to run sens.spec."); m->mothurOutEndLine();
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
m->mothurOut(sensSpecFileName); m->mothurOutEndLine();
exit(1);
}
}
+//***************************************************************************************************************
+
+string SensSpecCommand::preProcessList(){
+ try {
+ set<string> uniqueNames;
+ //get unique names from distance file
+ if (format == "phylip") {
+
+ ifstream phylipFile;
+ m->openInputFile(distFile, phylipFile);
+ string numTest;
+ int pNumSeqs;
+ phylipFile >> numTest;
+
+ if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+ else {
+ m->mothurConvert(numTest, pNumSeqs);
+ }
+ phylipFile >> pNumSeqs; m->gobble(phylipFile);
+
+ string seqName;
+ double distance;
+
+ for(int i=0;i<pNumSeqs;i++){
+
+ if (m->control_pressed) { return ""; }
+
+ phylipFile >> seqName;
+ uniqueNames.insert(seqName);
+
+ for(int j=0;j<i;j++){
+ phylipFile >> distance;
+ }
+ m->gobble(phylipFile);
+ }
+ phylipFile.close();
+ }else {
+ ifstream columnFile;
+ m->openInputFile(distFile, columnFile);
+ string seqNameA, seqNameB;
+ double distance;
+
+ while(columnFile){
+ if (m->control_pressed) { return ""; }
+ columnFile >> seqNameA >> seqNameB >> distance;
+ uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB);
+ m->gobble(columnFile);
+ }
+ columnFile.close();
+ }
+
+ //read list file, if numSeqs > unique names then remove redundant names
+ string newListFile = listFile + ".temp";
+ ofstream out;
+ m->openOutputFile(newListFile, out);
+ ifstream in;
+ m->openInputFile(listFile, in);
+
+ bool wroteSomething = false;
+
+ while(!in.eof()){
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; }
+
+ //read in list vector
+ ListVector list(in);
+
+ //listfile is already unique
+ if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; }
+
+ //make a new list vector
+ ListVector newList;
+ newList.setLabel(list.getLabel());
+
+ //for each bin
+ for (int i = 0; i < list.getNumBins(); i++) {
+
+ //parse out names that are in accnos file
+ string binnames = list.get(i);
+ vector<string> bnames;
+ m->splitAtComma(binnames, bnames);
+
+ string newNames = "";
+ for (int i = 0; i < bnames.size(); i++) {
+ string name = bnames[i];
+ //if that name is in the .accnos file, add it
+ if (uniqueNames.count(name) != 0) { newNames += name + ","; }
+ }
+
+ //if there are names in this bin add to new list
+ if (newNames != "") {
+ newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
+ newList.push_back(newNames);
+ }
+ }
+
+ //print new listvector
+ if (newList.getNumBins() != 0) {
+ wroteSomething = true;
+ newList.print(out);
+ }
+
+ m->gobble(in);
+ }
+ in.close();
+ out.close();
+
+ if (wroteSomething) { return newListFile; }
+ return "";
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SensSpecCommand", "preProcessList");
+ exit(1);
+ }
+}
+
//***************************************************************************************************************
int fillSeqPairSet(set<string>&, ListVector*&);
int process(map<string, int>&, string, bool&, string&);
int process(set<string>&, string, bool&, string&, int);
+ string preProcessList();
};