//**********************************************************************************************************************
string CooccurrenceCommand::getHelpString(){
try {
- string helpString = "help!";
-
+ string helpString = "The cooccurrence command calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance.";
+ helpString += "The cooccurrence command parameters are shared, metric, matrixmodel, iters, label and groups.";
+ helpString += "The matrixmodel parameter options are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8 and sim9. Default=sim2";
+ helpString += "The metric parameter options are cscore, checker, combo and vratio. Default=cscore";
+ helpString += "The label parameter is used to analyze specific labels in your input.\n";
+ helpString += "The groups parameter allows you to specify which of the groups you would like analyzed.\n";
+ helpString += "The cooccurrence command should be in the following format: \n";
+ helpString += "cooccurrence(shared=yourSharedFile) \n";
+ helpString += "Example cooccurrence(shared=final.an.shared).\n";
+ helpString += "Note: No spaces between parameter labels (i.e. shared), '=' and parameters (i.e.yourShared).\n";
return helpString;
}
catch(exception& e) {
string getCommandCategory() { return "Hypothesis Testing"; }
string getHelpString();
string getCitation() { return "http://www.mothur.org/wiki/Cooccurrence"; }
- string getDescription() { return "Cooccurrence"; }
+ string getDescription() { return "calculates four metrics and tests their significance to assess whether presence-absence patterns are different than what one would expect by chance."; }
int execute();
void help() { m->mothurOut(getHelpString()); }
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
- vector<unsigned long long> positions = savedPositions[s];
+
+ vector<unsigned long long> positions;
+ if (savedPositions.size() != 0) { positions = savedPositions[s]; }
+ else {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ positions = m->divideFile(fastafileNames[s], processors);
+#else
+ if(processors != 1){
+ int numFastaSeqs = 0;
+ positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs);
+ }
+#endif
+ }
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
//vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors);
#include "linearalgebra.h"
+// This class references functions used from "Numerical Recipes in C++" //
/*********************************************************************************************************************************/
inline double SIGN(const double a, const double b)
CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff);
CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt);
- CommandParameter pflow("flow", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflow);
+ CommandParameter pflow("flow", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pflow);
CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile);
helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n";
helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n";
helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n";
- helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=False. \n";
+ helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n";
helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n";
helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n";
temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; }
fasta = m->isTrue(temp);
- temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "F"; }
+ temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "T"; }
flow = m->isTrue(temp);
temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; }
//read through the correct file and output lines you want to keep
if (accnosfile != "") { readAccnos(); }
if (fastafile != "") { readFasta(); }
- if (flowfile != "") { readFlow(); }
+ if (flowfile != "") { readFlow(); }
if (qualfile != "") { readQual(); }
if (namefile != "") { readName(); }
if (groupfile != "") { readGroup(); }
outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
}
}
-
+
//fills lines and qlines
setLines(fastaFile, qFileName);
//set file positions for fasta file
fastaFilePos = m->divideFile(filename, processors);
- if (qfilename == "") { return processors; }
-
//get name of first sequence in each chunk
map<string, int> firstSeqNames;
for (int i = 0; i < (fastaFilePos.size()-1); i++) {
in.close();
}
-
- //seach for filePos of each first name in the qfile and save in qfileFilePos
- ifstream inQual;
- m->openInputFile(qfilename, inQual);
-
- string input;
- while(!inQual.eof()){
- input = m->getline(inQual);
-
- if (input.length() != 0) {
- if(input[0] == '>'){ //this is a sequence name line
- istringstream nameStream(input);
-
- string sname = ""; nameStream >> sname;
- sname = sname.substr(1);
-
- map<string, int>::iterator it = firstSeqNames.find(sname);
-
- if(it != firstSeqNames.end()) { //this is the start of a new chunk
- unsigned long long pos = inQual.tellg();
- qfileFilePos.push_back(pos - input.length() - 1);
- firstSeqNames.erase(it);
- }
- }
- }
-
- if (firstSeqNames.size() == 0) { break; }
- }
- inQual.close();
-
-
- if (firstSeqNames.size() != 0) {
- for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
- m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
- }
- qFileName = "";
- return processors;
- }
-
- //get last file position of qfile
- FILE * pFile;
- unsigned long long size;
-
- //get num bytes in file
- pFile = fopen (qfilename.c_str(),"rb");
- if (pFile==NULL) perror ("Error opening file");
- else{
- fseek (pFile, 0, SEEK_END);
- size=ftell (pFile);
- fclose (pFile);
- }
- qfileFilePos.push_back(size);
+ if(qfilename != "") {
+ //seach for filePos of each first name in the qfile and save in qfileFilePos
+ ifstream inQual;
+ m->openInputFile(qfilename, inQual);
+
+ string input;
+ while(!inQual.eof()){
+ input = m->getline(inQual);
+
+ if (input.length() != 0) {
+ if(input[0] == '>'){ //this is a sequence name line
+ istringstream nameStream(input);
+
+ string sname = ""; nameStream >> sname;
+ sname = sname.substr(1);
+
+ map<string, int>::iterator it = firstSeqNames.find(sname);
+
+ if(it != firstSeqNames.end()) { //this is the start of a new chunk
+ unsigned long long pos = inQual.tellg();
+ qfileFilePos.push_back(pos - input.length() - 1);
+ firstSeqNames.erase(it);
+ }
+ }
+ }
+
+ if (firstSeqNames.size() == 0) { break; }
+ }
+ inQual.close();
+
+
+ if (firstSeqNames.size() != 0) {
+ for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
+ m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
+ }
+ qFileName = "";
+ return processors;
+ }
+
+ //get last file position of qfile
+ FILE * pFile;
+ unsigned long long size;
+
+ //get num bytes in file
+ pFile = fopen (qfilename.c_str(),"rb");
+ if (pFile==NULL) perror ("Error opening file");
+ else{
+ fseek (pFile, 0, SEEK_END);
+ size=ftell (pFile);
+ fclose (pFile);
+ }
+
+ qfileFilePos.push_back(size);
+ }
for (int i = 0; i < (fastaFilePos.size()-1); i++) {
lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));