//prepare filenames and open files
filename = globaldata->getFastaFile();
outputFileName = (getRootName(filename) + "names");
+ outFastafile = (getRootName(filename) + "uni_fasta");
+
openInputFile(filename, in);
openOutputFile(outputFileName, out);
+ openOutputFile(outFastafile, outFasta);
//constructor reads in file and store internally
fastamap = new FastaMap();
//file contains 2 columns separated by tabs. the first column is the groupname(name of first sequence found.
//the second column is the list of names of identical sequences separated by ','.
fastamap->print(out);
+ fastamap->printCondensedFasta(outFasta);
return 0;
}
GlobalData* globaldata;
FastaMap* fastamap;
ifstream in;
- ofstream out;
- string filename, outputFileName;
+ ofstream out, outFasta;
+ string filename, outputFileName, outFastafile;
};
cout << endl << "mothur > ";
getline(cin, input);
+ if (cin.eof()) { input = "quit()"; }
+
errorFree = errorCheckor->checkInput(input);
if (errorFree == true) {
CommandOptionParser parser(input);
while(quitCommandCalled == 0){
getline(inputBatchFile, input);
- cout << endl << "dotur > " << input << endl;
+ if (inputBatchFile.eof()) { input = "quit()"; }
+
+ cout << endl << "mothur > " << input << endl;
errorFree = errorCheckor->checkInput(input);
if (errorFree == true) {
CommandOptionParser parser(input);
if (parameter == "fileroot" ) { fileroot = value; }
if (parameter == "line" ) { line = value; }
if (parameter == "label" ) { label = value; }
- if (parameter == "randomtree" ) { randomtree = value; }
+ if (parameter == "random" ) { randomtree = value; }
}
if (parameter == "fileroot" ) { fileroot = value; }
if (parameter == "line" ) { line = value; }
if (parameter == "label" ) { label = value; }
- if (parameter == "randomtree" ) { randomtree = value; }
+ if (parameter == "random" ) { randomtree = value; }
}
}
//make sure the user does not use both the line and label parameters
if ((line != "") && (label != "")) { cout << "You may use either the line or label parameters, but not both." << endl; return false; }
- //make sure you have a valid random tree value
- if ((randomtree != "0") && (randomtree != "1")) { cout << randomtree << " is not a valid randomtree value. Valid values for randomtree are 0, (meaning you have read your own trees) or 1 (meaning you want to random distribution of trees)." << endl; return false; }
-
if (commandName == "read.dist") {
validateReadFiles();
validateReadDist();
if (commandName == "parsimony") {
//are you trying to use parsimony without reading a tree or saying you want random distribution
- if (randomtree == "0") {
+ if (randomtree == "") {
if (globaldata->gTree.size() == 0) {
- cout << "You must read a treefile and a groupfile or set the randomtree parameter to 1, before you may execute the parsimony command." << endl; return false; }
+ cout << "You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command." << endl; return false; }
}
}
}
}
/*******************************************************************************/
+void FastaMap::printCondensedFasta(ostream& out){ //prints data
+ try {
+ // two column file created with groupname and them list of identical sequence names
+ for (it = data.begin(); it != data.end(); it++) {
+ out << ">" << it->second.groupname << endl;
+ out << it->first << endl;
+ }
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the FastaMap class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+/*******************************************************************************/
+
void clear();
int size(); //returns number of unique sequences
void print(ostream&); //produces a 2 column file with the groupname in the first column and the names in the second column.
+ void printCondensedFasta(ostream&); //produces a fasta file.
void readFastaFile(ifstream&);
private:
allLines = 1;
commandName = commandString; //save command name to be used by other classes
- //set all non filename paramters to default values
+ //set all non filename paramters to default
reset();
//clears out data from previous read
clear();
gGroupmap = NULL;
gTree.clear();
+ labels.clear(); lines.clear(); groups.clear();
+
}
//saves help request
if (key == "freq" ) { freq = value; }
if (key == "method" ) { method = value; }
if (key == "fileroot" ) { fileroot = value; }
- if (key == "randomtree" ) { randomtree = value; }
+ if (key == "random" ) { randomtree = value; }
if (key == "groups" ) { groups = value; }
if (key == "calc") { calc = value; }
if (key == "freq" ) { freq = value; }
if (key == "method" ) { method = value; }
if (key == "fileroot" ) { fileroot = value; }
- if (key == "randomtree" ) { randomtree = value; }
+ if (key == "random" ) { randomtree = value; }
if (key == "groups" ) { groups = value; }
if (key == "calc") { calc = value; }
label = "";
groups = "";
jumble = "1"; //0 means don't jumble, 1 means jumble.
- randomtree = "0"; //0 means user will enter some user trees, 1 means they just want the random tree distribution.
+ randomtree = ""; //"" means user will enter some user trees, "outputfile" means they just want the random tree distribution to be outputted to outputfile.
freq = "100";
method = "furthest";
fileroot = "";
label = "";
groups = "";
jumble = "1"; //0 means don't jumble, 1 means jumble.
- randomtree = "0"; //0 means user will enter some user trees, 1 means they just want the random tree distribution.
+ randomtree = ""; //"" means user will enter some user trees, "outputfile" means they just want the random tree distribution to be outputted to outputfile.
freq = "100";
method = "furthest";
calc = "";
}else if (globaldata->helpRequest == "read.otu") {
cout << "The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, " << "\n";
cout << "collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command " << "\n";
- cout << "or you may use your own. The read.otu command parameter options are list, rabund, sabund, group and order." << "\n";
+ cout << "or you may use your own. The read.otu command parameter options are list, rabund, sabund, group, order, line and label." << "\n";
cout << "The read.otu command can be used in two ways. The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single." << "\n";
- cout << "For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile)." << "\n";
+ cout << "For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels)." << "\n";
cout << "The list, rabund or sabund parameter is required, but you may only use one of them." << "\n";
+ cout << "The line and label parameters are optional but you may not use both the line and label parameters at the same time." << "\n";
+ cout << "The label and line parameters are used to read specific lines in your input." << "\n";
cout << "The second way to use the read.otu command is to read a list and a group so you can use the collect.shared, rarefaction.shared or summary.shared commands." << "\n";
- cout << "In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile). " << "\n";
+ cout << "In this case the read.otu command should be in the following format: read.otu(list=yourListFile, group=yourGroupFile, line=yourLines). " << "\n";
cout << "The list parameter and group paramaters are required. When using the command the second way read.otu command parses the .list file" << "\n";
cout << "and separates it into groups. It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. " << "\n";
cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
cout << "Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines)." << "\n" << "\n";
}else if (globaldata->helpRequest == "parsimony") {
- cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the randomtree parameter." << "\n";
- cout << "The parsimony command parameters are randomtree and iters. No parameters are required." << "\n";
- cout << "The parsimony command should be in the following format: parsimony(randomtree=yourRandomTreeValue, iters=yourIters)." << "\n";
- cout << "Example parsimony(randomtree=1, iters=500)." << "\n";
- cout << "The default value for randomTree is 0 (meaning you want to use the trees in your inputfile, randomtree=1 means you just want the random distribution of trees)," << "\n";
+ cout << "The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter." << "\n";
+ cout << "The parsimony command parameters are random and iters. No parameters are required." << "\n";
+ cout << "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, iters=yourIters)." << "\n";
+ cout << "Example parsimony(random=out, iters=500)." << "\n";
+ cout << "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony)," << "\n";
cout << "and iters is 1000. The parsimony command output three files: .parsimony, .psummary and .pdistrib, their descriptions are in the manual." << "\n";
- cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename)." << "\n" << "\n";
}else if (globaldata->helpRequest == "unifrac.weighted") {
cout << "The unifrac.weighted command can only be executed after a successful read.tree command." << "\n";
cout << "The unifrac.weighted command parameters are groups and iters. No parameters are required." << "\n";
int ParseListCommand::execute(){
try{
globaldata = GlobalData::getInstance();
+ int count = 1;
//read in listfile
read = new ReadPhilFile(globaldata->inputFileName);
//parses and sets each groups listvector
while(list != NULL){
label = list->getLabel();
- for(i=0; i<list->size(); i++) {
- parse(i); //parses data[i] list of sequence names
- for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors
- seq = it->second;
- seq = seq.substr(1, seq.length()); //rips off extra comma
- groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
+
+ if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(label) == 1){
+
+ for(i=0; i<list->size(); i++) {
+ parse(i); //parses data[i] list of sequence names
+ for (it=listGroups.begin(); it != listGroups.end(); it++) { //loop through map and set new list vectors
+ seq = it->second;
+ seq = seq.substr(1, seq.length()); //rips off extra comma
+ groupOfLists[it->first]->push_back(seq); //sets new listvector for each group
+ }
+ listGroups.clear();
}
- listGroups.clear();
- }
- //prints each new list file
- for (i=0; i<groupMap->getNumGroups(); i++) {
- groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label);
- groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
- groupOfLists[groupMap->namesOfGroups[i]]->clear();
+ //prints each new list file
+ for (i=0; i<groupMap->getNumGroups(); i++) {
+ groupOfLists[groupMap->namesOfGroups[i]]->setLabel(label);
+ groupOfLists[groupMap->namesOfGroups[i]]->print(*(filehandles[groupMap->namesOfGroups[i]]));
+ groupOfLists[groupMap->namesOfGroups[i]]->clear();
+ }
+
+ cout << label << '\t' << count << endl;
}
+
list = input->getSharedListVector();
+ count++;
}
//set groupmap for .shared commands
globaldata = GlobalData::getInstance();
//randomtree will tell us if user had their own treefile or if they just want the random distribution
- convert(globaldata->getRandomTree(), randomtree);
+ randomtree = globaldata->getRandomTree();
//user has entered their own tree
- if (randomtree == 0) {
+ if (randomtree == "") {
T = globaldata->gTree;
tmap = globaldata->gTreemap;
parsFile = globaldata->getTreeFile() + ".parsimony";
openOutputFile(distFile, outDist);
}else { //user wants random distribution
+ savetmap = globaldata->gTreemap;
getUserInput();
- parsFile = "rd_parsimony";
+ parsFile = randomtree + ".rd_parsimony";
openOutputFile(parsFile, out);
}
outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
outDist << "RandomTree#" << '\t' << "ParsScore" << endl;
- if (randomtree == 0) {
+ if (randomtree == "") {
//get pscores for users trees
for (int i = 0; i < T.size(); i++) {
cout << "Processing tree " << i+1 << endl;
//this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
for (it = validScores.begin(); it != validScores.end(); it++) {
- if (randomtree == 0) {
+ if (randomtree == "") {
it2 = uscoreFreq.find(it->first);
//user data has that score
if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul+= it2->second; }
printParsimonyFile();
printUSummaryFile();
- //reset randomTree parameter to 0
- globaldata->setRandomTree("0");
+ //reset globaldata's treemap if you just did random distrib
+ if (randomtree != "") { globaldata->gTreemap = savetmap; }
+
+ //reset randomTree parameter to ""
+ globaldata->setRandomTree("");
return 0;
void ParsimonyCommand::printParsimonyFile() {
try {
//column headers
- if (randomtree == 0) {
+ if (randomtree == "") {
out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
}else {
out << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
//print each line
for (it = validScores.begin(); it != validScores.end(); it++) {
- if (randomtree == 0) {
+ if (randomtree == "") {
out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl;
}else{
out << setprecision(6) << it->first << '\t' << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl;
GlobalData* globaldata;
vector<Tree*> T; //user trees
Tree* randT; //random tree
- TreeMap* tmap;
+ TreeMap* tmap;
+ TreeMap* savetmap;
Parsimony* pars;
- string parsFile, sumFile, distFile;
- int iters, randomtree, numGroups;
+ string parsFile, sumFile, distFile, randomtree;
+ int iters, numGroups;
vector<int> numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib.
vector<float> userTreeScores; //scores for users trees
vector<float> UScoreSig; //tree score signifigance when compared to random trees - percentage of random trees with that score or lower.
shared = new Shared();
int i = 0;
while(SharedList != NULL){
- shared->getSharedVectors(i, SharedList); //fills sharedGroups with new info and updates sharedVector
+
+ if(globaldata->allLines == 1 || globaldata->lines.count(i+1) == 1 || globaldata->labels.count(SharedList->getLabel()) == 1){
+
+ shared->getSharedVectors(i, SharedList); //fills sharedGroups with new info and updates sharedVector
+ printSharedData(); //prints info to the .shared file
+ }
+
SharedList = input->getSharedListVector(); //get new list vector to process
- printSharedData(); //prints info to the .shared file
i++;
}
return 0;
parameters["shared"] = "shared";
parameters["summary"] = "summary";
parameters["sharedsummary"] = "sharedsummary";
- parameters["randomtree"] = "randomtree";
+ parameters["random"] = "random";
parameters["groups"] = "groups";
parameters["calc"] = "calc";