A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; };
A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
+ A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */; };
A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kruskalwalliscommand.h; sourceTree = "<group>"; };
A74A9A9D148E881E00AB5E3E /* spline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spline.h; sourceTree = "<group>"; };
A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = "<group>"; };
+ A74C06E616A9C097008390A3 /* primerdesigncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = primerdesigncommand.h; sourceTree = "<group>"; };
+ A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = primerdesigncommand.cpp; sourceTree = "<group>"; };
A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
A74D59A3159A1E2000043046 /* counttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = counttable.cpp; sourceTree = "<group>"; };
A7E9B79512D37EC400DA6239 /* pipelinepdscommand.cpp */,
A7E9B79812D37EC400DA6239 /* preclustercommand.h */,
A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */,
+ A74C06E616A9C097008390A3 /* primerdesigncommand.h */,
+ A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */,
A7E9B7A212D37EC400DA6239 /* quitcommand.h */,
A7E9B7A112D37EC400DA6239 /* quitcommand.cpp */,
A7E9B7AC12D37EC400DA6239 /* rarefactcommand.h */,
834D9D5C1656DEC800E7FAB9 /* regularizeddecisiontree.cpp in Sources */,
A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */,
A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */,
+ A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
}
#endif
- if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur suspects some of your sequences may be reversed, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine();
+ if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur reversed some your sequences for a better classification. If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine();
outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
}else { m->mothurRemove(newaccnosFile); }
#include "sffmultiplecommand.h"
#include "classifysharedcommand.h"
#include "filtersharedcommand.h"
+#include "primerdesigncommand.h"
/*******************************************************/
commands["quit"] = "MPIEnabled";
commands["classify.shared"] = "classify.shared";
commands["filter.shared"] = "filter.shared";
+ commands["primer.design"] = "primer.design";
}
else if(commandName == "sff.multiple") { command = new SffMultipleCommand(optionString); }
else if(commandName == "classify.shared") { command = new ClassifySharedCommand(optionString); }
else if(commandName == "filter.shared") { command = new FilterSharedCommand(optionString); }
+ else if(commandName == "primer.design") { command = new PrimerDesignCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "sff.multiple") { pipecommand = new SffMultipleCommand(optionString); }
else if(commandName == "classify.shared") { pipecommand = new ClassifySharedCommand(optionString); }
else if(commandName == "filter.shared") { pipecommand = new FilterSharedCommand(optionString); }
+ else if(commandName == "primer.design") { pipecommand = new PrimerDesignCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "sff.multiple") { shellcommand = new SffMultipleCommand(); }
else if(commandName == "classify.shared") { shellcommand = new ClassifySharedCommand(); }
else if(commandName == "filter.shared") { shellcommand = new FilterSharedCommand(); }
+ else if(commandName == "primer.design") { shellcommand = new PrimerDesignCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
+ int start = time(NULL);
+
readFasta();
if (m->control_pressed) { return 0; }
delete input;
}
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the consensus sequences.");
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
string firstCol, secondCol;
in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
-
+ //cout << firstCol << '\t' << secondCol << endl;
+ m->checkName(firstCol);
+ m->checkName(secondCol);
+ //cout << firstCol << '\t' << secondCol << endl;
+
vector<string> names;
m->splitAtChar(secondCol, names, ',');
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ m->checkName(firstCol);
+ m->checkName(secondCol);
//parse names into vector
vector<string> theseNames;
m->splitAtComma(secondCol, theseNames);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ m->checkName(firstCol);
+ m->checkName(secondCol);
//parse names into vector
vector<string> theseNames;
m->splitAtComma(secondCol, theseNames);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ m->checkName(firstCol);
it = groupIndex.find(secondCol);
if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
groupIndex[secondCol] = count;
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ m->checkName(firstCol);
it = groupIndex.find(secondCol);
if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
groupIndex[secondCol] = count;
string firstCol, secondCol;
in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
+ m->checkName(firstCol);
+ m->checkName(secondCol);
+
vector<string> names;
m->splitAtChar(secondCol, names, ',');
if (pid == 0) {
#endif
-
+
+ if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'. I changed them to '_' to avoid problems in your downstream analysis.\n"); }
+
mout->mothurOutEndLine();
input = getCommand();
//cout << pid << " is in execute " << commandName << endl;
#endif
//executes valid command
+ mout->changedSeqNames = false;
mout->runParse = true;
mout->clearGroups();
mout->clearAllGroups();
if (input[0] != '#') {
-
+ if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'. I changed them to '_' to avoid problems in your downstream analysis.\n"); }
mout->mothurOutEndLine();
mout->mothurOut("mothur > " + input);
mout->mothurOutEndLine();
if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
#endif
//executes valid command
+ mout->changedSeqNames = false;
mout->runParse = true;
mout->clearGroups();
mout->clearAllGroups();
input = getNextCommand(listOfCommands);
if (input == "") { input = "quit()"; }
+
+ if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'. I changed them to '_' to avoid problems in your downstream analysis.\n"); }
if (mout->gui) {
if ((input.find("quit") != string::npos) || (input.find("set.logfile") != string::npos)) {}
//cout << pid << " is in execute" << endl;
#endif
//executes valid command
+ mout->changedSeqNames = false;
mout->runParse = true;
mout->clearGroups();
mout->clearAllGroups();
bool FlowData::getNext(ifstream& flowFile){
try {
- flowFile >> seqName >> endFlow;
- if (seqName.length() != 0) {
- //cout << "in Flowdata " + seqName << endl;
+ seqName = getSequenceName(flowFile);
+ flowFile >> endFlow;
+ if (!m->control_pressed) {
for(int i=0;i<numFlows;i++) { flowFile >> flowData[i]; }
- //cout << "in Flowdata read " << seqName + " done" << endl;
updateEndFlow();
translateFlow();
m->gobble(flowFile);
- }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+ }
if(flowFile){ return 1; }
else { return 0; }
}
}
+//********************************************************************************************************************
+string FlowData::getSequenceName(ifstream& flowFile) {
+ try {
+ string name = "";
+
+ flowFile >> name;
+
+ if (name.length() != 0) {
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+ }
+ }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ return name;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "FlowData", "getSequenceName");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
string seqName, locationString, sequence, baseFlow;
int numFlows, maxFlows, endFlow;
vector<float> flowData;
+ string getSequenceName(ifstream&);
};
#endif
/************************************************************/
GroupMap::~GroupMap(){}
-
/************************************************************/
int GroupMap::readMap() {
try {
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+ m->checkName(seqName);
it = groupmap.find(seqName);
if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-
+ m->checkName(seqName);
it = groupmap.find(seqName);
if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-
+ m->checkName(seqName);
it = groupmap.find(seqName);
if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-
+ m->checkName(seqName);
it = groupmap.find(seqName);
if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-
+ m->checkName(seqName);
it = groupmap.find(seqName);
if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-
+ m->checkName(seqName);
it = groupmap.find(seqName);
if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-
+ m->checkName(seqName);
it = groupmap.find(seqName);
if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-
+ m->checkName(seqName);
it = groupmap.find(seqName);
if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
void GroupMap::setGroup(string sequenceName, string groupN) {
setNamesOfGroups(groupN);
-
+ m->checkName(sequenceName);
it = groupmap.find(sequenceName);
if (it != groupmap.end()) { m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "","",false,false); parameters.push_back(pthreshold);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
string helpString = "";
helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta. It will also provide new quality files if the fastq or file parameter is used.\n";
helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
- helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
+ helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, format, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column. Mothur will process each pair and create a combined fasta and qual file with all the sequences.\n";
helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process. If you provide one, you must provide the other.\n";
helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process. If you provide one, you must provide the other.\n";
helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters. If you provide one, you must provide the other.\n";
- helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n";
+ helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa or illumina, default=sanger.\n";
+ helpString += "The align parameter allows you to specify the alignment method to use. Your options are: gotoh and needleman. The default is needleman.\n";
helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; }
if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
+
+ format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "sanger"; }
+
+ if ((format != "sanger") && (format != "illumina") && (format != "solexa")) {
+ m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa and illumina, aborting." ); m->mothurOutEndLine();
+ abort=true;
+ }
+
+ //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
+ for (int i = -64; i < 65; i++) {
+ char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
+ convertTable.push_back(temp);
+ }
}
}
if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
- vector<int> qualScores;
- int controlChar = int('!');
- for (int i = 0; i < quality.length(); i++) {
- int temp = int(quality[i]);
- temp -= controlChar;
-
- qualScores.push_back(temp);
- }
-
+ vector<int> qualScores = convertQual(quality);
+
read.name = name;
read.sequence = sequence;
read.scores = qualScores;
}
}
//**********************************************************************************************************************
+vector<int> MakeContigsCommand::convertQual(string qual) {
+ try {
+ vector<int> qualScores;
+
+ for (int i = 0; i < qual.length(); i++) {
+
+ int temp = 0;
+ temp = int(qual[i]);
+ if (format == "illumina") {
+ temp -= 64; //char '@'
+ }else if (format == "solexa") {
+ temp = int(convertTable[temp]); //convert to sanger
+ temp -= int('!'); //char '!'
+ }else {
+ temp -= int('!'); //char '!'
+ }
+ qualScores.push_back(temp);
+ }
+
+ return qualScores;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeContigsCommand", "convertQual");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
private:
bool abort, allFiles, createGroup;
- string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file;
+ string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format;
float match, misMatch, gapOpen, gapExtend;
int processors, longestBase, threshold, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
vector<string> outputNames;
vector<string> linker;
vector<string> spacer;
vector<string> primerNameVector;
- vector<string> barcodeNameVector;
+ vector<string> barcodeNameVector;
+ vector<char> convertTable;
map<string, int> groupCounts;
map<string, string> groupMap;
+ vector<int> convertQual(string);
fastqRead readFastq(ifstream&, bool&);
vector< vector< vector<string> > > preProcessData(unsigned long int&);
vector< vector<string> > readFileNames(string);
if (iters != 0) {
//we need to find the average distance and standard deviation for each groups distance
+ vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals, mode);
- vector< vector<seqDist> > calcAverages; calcAverages.resize(matrixCalculators.size());
- for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
- calcAverages[i].resize(calcDistsTotals[0][i].size());
-
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].seq1 = calcDistsTotals[0][i][j].seq1;
- calcAverages[i][j].seq2 = calcDistsTotals[0][i][j].seq2;
- calcAverages[i][j].dist = 0.0;
- }
- }
- if (mode == "average") {
- for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
- for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
- if (m->debug) { m->mothurOut("[DEBUG]: Totaling for average calc: iter = " + toString(thisIter) + ", " + thisLookup[calcDistsTotals[thisIter][i][j].seq1]->getGroup() + " - " + thisLookup[calcDistsTotals[thisIter][i][j].seq2]->getGroup() + " distance = " + toString(calcDistsTotals[thisIter][i][j].dist) + ". New total = " + toString(calcAverages[i][j].dist) + ".\n"); }
- }
- }
- }
-
- for (int i = 0; i < calcAverages.size(); i++) { //finds average.
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].dist /= (float) iters;
- }
- }
- }else { //find median
- for (int i = 0; i < calcAverages.size(); i++) { //for each calc
- for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
- vector<double> dists;
- for (int thisIter = 0; thisIter < iters; thisIter++) { //for each subsample
- dists.push_back(calcDistsTotals[thisIter][i][j].dist);
- }
- sort(dists.begin(), dists.end());
- calcAverages[i][j].dist = dists[(iters/2)];
- }
- }
- }
//find standard deviation
- vector< vector<seqDist> > stdDev; stdDev.resize(matrixCalculators.size());
- for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
- stdDev[i].resize(calcDistsTotals[0][i].size());
-
- for (int j = 0; j < stdDev[i].size(); j++) {
- stdDev[i][j].seq1 = calcDistsTotals[0][i][j].seq1;
- stdDev[i][j].seq2 = calcDistsTotals[0][i][j].seq2;
- stdDev[i][j].dist = 0.0;
- }
- }
-
- for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
- for (int i = 0; i < stdDev.size(); i++) {
- for (int j = 0; j < stdDev[i].size(); j++) {
- stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
- }
- }
- }
-
- for (int i = 0; i < stdDev.size(); i++) { //finds average.
- for (int j = 0; j < stdDev[i].size(); j++) {
- stdDev[i][j].dist /= (float) iters;
- stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
- }
- }
+ vector< vector<seqDist> > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
//print results
for (int i = 0; i < calcDists.size(); i++) {
try {
string pattern = "";
- if (type == "metastats") { pattern = "[filename],[distance],[groups],metastats"; }
+ if (type == "metastats") { pattern = "[filename],[distance],[group],metastats"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
//are there confidence scores, if so remove them
if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
map<string, string>::iterator itTax = taxMap.find(firstCol);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
//are there confidence scores, if so remove them
if (secondCol.find_first_of('(') != -1) { removeConfidences(secondCol); }
map<string, string>::iterator itTax = taxMap.find(firstCol);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+
//parse names into vector
vector<string> theseNames;
splitAtComma(secondCol, theseNames);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+
//parse names into vector
vector<string> theseNames;
splitAtComma(secondCol, theseNames);
- for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
+ for (int i = 0; i < theseNames.size(); i++) { nameMap[theseNames[i]] = firstCol; }
pairDone = false;
}
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
nameMap[secondCol] = firstCol;
pairDone = false;
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
nameMap[secondCol] = firstCol;
pairDone = false;
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
//parse names into vector
vector<string> theseNames;
splitAtComma(secondCol, theseNames);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
//parse names into vector
vector<string> theseNames;
splitAtComma(secondCol, theseNames);
if (columnOne) { firstCol = pieces[i]; columnOne=false; }
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ nameMap[firstCol] = secondCol; pairDone = false; }
}
}
in.close();
if (columnOne) { firstCol = pieces[i]; columnOne=false; }
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ nameMap[firstCol] = secondCol; pairDone = false; }
}
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
vector<string> temp;
splitAtComma(secondCol, temp);
nameMap[firstCol] = temp;
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
vector<string> temp;
splitAtComma(secondCol, temp);
nameMap[firstCol] = temp;
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ }
+ }
+ }
+ in.close();
+
+ if (rest != "") {
+ vector<string> pieces = splitWhiteSpace(rest);
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
+ int num = getNumNames(secondCol);
+ nameMap[firstCol] = num;
+ pairDone = false;
+ }
+ }
+ }
+
+ return nameMap;
+
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "readNames");
+ exit(1);
+ }
+}
+/**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) {
+ try {
+ map<string, int> nameMap;
+ numSeqs = 0;
+
+ //open input file
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string rest = "";
+ char buffer[4096];
+ bool pairDone = false;
+ bool columnOne = true;
+ string firstCol, secondCol;
+
+ while (!in.eof()) {
+ if (control_pressed) { break; }
+
+ in.read(buffer, 4096);
+ vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+
+ for (int i = 0; i < pieces.size(); i++) {
+ if (columnOne) { firstCol = pieces[i]; columnOne=false; }
+ else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+
+ if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
int num = getNumNames(secondCol);
nameMap[firstCol] = num;
pairDone = false;
+ numSeqs += num;
}
}
}
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
int num = getNumNames(secondCol);
nameMap[firstCol] = num;
pairDone = false;
+ numSeqs += num;
}
}
}
exit(1);
}
}
+/************************************************************/
+int MothurOut::checkName(string& name) {
+ try {
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "checkName");
+ exit(1);
+ }
+}
/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
try {
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
int num = getNumNames(secondCol);
map<string, string>::iterator it = fastamap.find(firstCol);
else { secondCol = pieces[i]; pairDone = true; columnOne=true; }
if (pairDone) {
+ checkName(firstCol);
+ checkName(secondCol);
int num = getNumNames(secondCol);
map<string, string>::iterator it = fastamap.find(firstCol);
in.read(buffer, 4096);
vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
- for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
}
in.close();
if (rest != "") {
vector<string> pieces = splitWhiteSpace(rest);
- for (int i = 0; i < pieces.size(); i++) { names.insert(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.insert(pieces[i]); }
}
return names;
}
in.read(buffer, 4096);
vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
- for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
}
in.close();
if (rest != "") {
vector<string> pieces = splitWhiteSpace(rest);
- for (int i = 0; i < pieces.size(); i++) { names.push_back(pieces[i]); }
+ for (int i = 0; i < pieces.size(); i++) { checkName(pieces[i]); names.push_back(pieces[i]); }
}
return 0;
exit(1);
}
}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
+ try{
+
+ vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
+ for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
+ //calcAverages[i].resize(calcDistsTotals[0][i].size());
+ vector<seqDist> temp;
+ for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+ seqDist tempDist;
+ tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+ tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+ tempDist.dist = 0.0;
+ temp.push_back(tempDist);
+ }
+ calcAverages.push_back(temp);
+ }
+
+ if (mode == "average") {
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+ }
+ }
+ }
+
+ for (int i = 0; i < calcAverages.size(); i++) { //finds average.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+ }
+ }
+ }else { //find median
+ for (int i = 0; i < calcAverages.size(); i++) { //for each calc
+ for (int j = 0; j < calcAverages[i].size(); j++) { //for each comparison
+ vector<double> dists;
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
+ dists.push_back(calcDistsTotals[thisIter][i][j].dist);
+ }
+ sort(dists.begin(), dists.end());
+ calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
+ }
+ }
+ }
+
+ return calcAverages;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+ try{
+
+ vector< vector<seqDist> > calcAverages; //calcAverages.resize(calcDistsTotals[0].size());
+ for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
+ //calcAverages[i].resize(calcDistsTotals[0][i].size());
+ vector<seqDist> temp;
+ for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+ seqDist tempDist;
+ tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+ tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+ tempDist.dist = 0.0;
+ temp.push_back(tempDist);
+ }
+ calcAverages.push_back(temp);
+ }
+
+
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+ for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+ }
+ }
+ }
+
+ for (int i = 0; i < calcAverages.size(); i++) { //finds average.
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+ }
+ }
+
+ return calcAverages;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+ try{
+
+ vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
+
+ //find standard deviation
+ vector< vector<seqDist> > stdDev;
+ for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
+ vector<seqDist> temp;
+ for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+ seqDist tempDist;
+ tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+ tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+ tempDist.dist = 0.0;
+ temp.push_back(tempDist);
+ }
+ stdDev.push_back(temp);
+ }
+
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int i = 0; i < stdDev.size(); i++) {
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+ }
+ }
+ }
+
+ for (int i = 0; i < stdDev.size(); i++) { //finds average.
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist /= (float) calcDistsTotals.size();
+ stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+ }
+ }
+
+ return stdDev;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
+ try{
+ //find standard deviation
+ vector< vector<seqDist> > stdDev;
+ for (int i = 0; i < calcDistsTotals[0].size(); i++) { //initialize sums to zero.
+ vector<seqDist> temp;
+ for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+ seqDist tempDist;
+ tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+ tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+ tempDist.dist = 0.0;
+ temp.push_back(tempDist);
+ }
+ stdDev.push_back(temp);
+ }
+
+ for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int i = 0; i < stdDev.size(); i++) {
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+ }
+ }
+ }
+
+ for (int i = 0; i < stdDev.size(); i++) { //finds average.
+ for (int j = 0; j < stdDev[i].size(); j++) {
+ stdDev[i][j].dist /= (float) calcDistsTotals.size();
+ stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+ }
+ }
+
+ return stdDev;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverages");
+ exit(1);
+ }
+}
+
/**************************************************************************************************/
bool MothurOut::isContainingOnlyDigits(string input) {
try{
vector<string> binLabelsInFile;
vector<string> currentBinLabels;
string saveNextLabel, argv, sharedHeaderMode, groupMode;
- bool printedHeaders, commandInputsConvertError;
+ bool printedHeaders, commandInputsConvertError, changedSeqNames;
//functions from mothur.h
//file operations
set<string> readAccnos(string);
int readAccnos(string, vector<string>&);
map<string, int> readNames(string);
+ map<string, int> readNames(string, unsigned long int&);
int readTax(string, map<string, string>&);
int readNames(string, map<string, string>&, map<string, int>&);
int readNames(string, map<string, string>&);
string removeQuotes(string);
string makeList(vector<string>&);
bool isSubset(vector<string>, vector<string>); //bigSet, subset
+ int checkName(string&);
//math operation
int factorial(int num);
vector<double> getStandardDeviation(vector< vector<double> >&);
vector<double> getStandardDeviation(vector< vector<double> >&, vector<double>&);
vector<double> getAverages(vector< vector<double> >&);
+ vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&);
+ vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&, vector< vector<seqDist> >&);
+ vector< vector<seqDist> > getAverages(vector< vector< vector<seqDist> > >&, string);
+ vector< vector<seqDist> > getAverages(vector< vector< vector<seqDist> > >&);
int control_pressed;
bool executing, runParse, jumble, gui, mothurCalling, debug;
debug = false;
sharedHeaderMode = "";
groupMode = "group";
+ changedSeqNames = false;
}
~MothurOut();
///variables for examples below that you will most likely want to put in the header for
//use by the other class functions.
- string phylipfile, columnfile, namefile, fastafile, sharedfile, method;
+ string phylipfile, columnfile, namefile, fastafile, sharedfile, method, countfile;
int processors;
bool useTiming, allLines;
vector<string> Estimators, Groups;
//saved by mothur that is associated with the other files you are using as inputs.
//You can do so by adding the files associated with the namefile to the files vector and then asking parser to check.
//This saves our users headaches over file mismatches because they forgot to include the namefile, :)
- if (namefile == "") {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
+
}
--- /dev/null
+//
+// primerdesigncommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 1/18/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "primerdesigncommand.h"
+
+//**********************************************************************************************************************
+vector<string> PrimerDesignCommand::setParameters(){
+ try {
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","summary-list",false,true,true); parameters.push_back(plist);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","",false,true, true); parameters.push_back(pfasta);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
+ CommandParameter plength("length", "Number", "", "18", "", "", "","",false,false); parameters.push_back(plength);
+ CommandParameter pmintm("mintm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmintm);
+ CommandParameter pmaxtm("maxtm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxtm);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors);
+ CommandParameter potunumber("otunumber", "Number", "", "-1", "", "", "","",false,true,true); parameters.push_back(potunumber);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+ CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pcutoff);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string PrimerDesignCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The primer.design allows you to design sequence fragments that are specific to particular OTUs.\n";
+ helpString += "The primer.design command parameters are: list, fasta, name, count, otunumber, cutoff, length, pdiffs, mintm, maxtm, processors and label.\n";
+ helpString += "The list parameter allows you to provide a list file and is required.\n";
+ helpString += "The fasta parameter allows you to provide a fasta file and is required.\n";
+ helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
+ helpString += "The count parameter allows you to provide a count file associated with your fasta file.\n";
+ helpString += "The label parameter is used to indicate the label you want to use from your list file.\n";
+ helpString += "The otunumber parameter is used to indicate the otu you want to use from your list file. It is required.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+ helpString += "The length parameter is used to indicate the length of the primer. The default is 18.\n";
+ helpString += "The mintm parameter is used to indicate minimum melting temperature.\n";
+ helpString += "The maxtm parameter is used to indicate maximum melting temperature.\n";
+ helpString += "The processors parameter allows you to indicate the number of processors you want to use. Default=1.\n";
+ helpString += "The cutoff parameter allows you set a percentage of sequences that support the base. For example: cutoff=97 would only return a sequence that only showed ambiguities for bases that were not supported by at least 97% of sequences.\n";
+ helpString += "The primer.desing command should be in the following format: primer.design(list=yourListFile, fasta=yourFastaFile, name=yourNameFile)\n";
+ helpString += "primer.design(list=final.an.list, fasta=final.fasta, name=final.names, label=0.03)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string PrimerDesignCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "fasta") { pattern = "[filename],[distance],otu.cons.fasta"; }
+ else if (type == "summary") { pattern = "[filename],[distance],primer.summary"; }
+ else if (type == "list") { pattern = "[filename],pick,[extension]"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+PrimerDesignCommand::PrimerDesignCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["list"] = tempOutNames;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "PrimerDesignCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+PrimerDesignCommand::PrimerDesignCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["list"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("fasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("list");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["list"] = inputDir + it->second; }
+ }
+ }
+
+ //check for parameters
+ namefile = validParameter.validFile(parameters, "name", true);
+ if (namefile == "not open") { abort = true; }
+ else if (namefile == "not found") { namefile = ""; }
+ else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ //get fastafile - it is required
+ fastafile = validParameter.validFile(parameters, "fasta", true);
+ if (fastafile == "not open") { fastafile = ""; abort=true; }
+ else if (fastafile == "not found") {
+ fastafile = m->getFastaFile();
+ if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setFastaFile(fastafile); }
+
+ //get listfile - it is required
+ listfile = validParameter.validFile(parameters, "list", true);
+ if (listfile == "not open") { listfile = ""; abort=true; }
+ else if (listfile == "not found") {
+ listfile = m->getListFile();
+ if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setListFile(listfile); }
+
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(listfile); //if user entered a file with a path then preserve it
+ }
+
+ string temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "100"; }
+ m->mothurConvert(temp, cutoff);
+
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, pdiffs);
+
+ temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "18"; }
+ m->mothurConvert(temp, length);
+
+ temp = validParameter.validFile(parameters, "mintm", false); if (temp == "not found") { temp = "-1"; }
+ m->mothurConvert(temp, minTM);
+
+ temp = validParameter.validFile(parameters, "maxtm", false); if (temp == "not found") { temp = "-1"; }
+ m->mothurConvert(temp, maxTM);
+
+ temp = validParameter.validFile(parameters, "otunumber", false); if (temp == "not found") { temp = "-1"; }
+ m->mothurConvert(temp, otunumber);
+ if (otunumber < 1) { m->mothurOut("[ERROR]: You must provide an OTU number, aborting.\n"); abort = true; }
+
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
+ m->mothurConvert(temp, processors);
+
+ label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "PrimerDesignCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int PrimerDesignCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ int start = time(NULL);
+ //////////////////////////////////////////////////////////////////////////////
+ // get file inputs //
+ //////////////////////////////////////////////////////////////////////////////
+
+ //reads list file and selects the label the users specified or the first label
+ getListVector();
+ if (otunumber > list->getNumBins()) { m->mothurOut("[ERROR]: You selected an OTU number larger than the number of OTUs you have in your list file, quitting.\n"); return 0; }
+
+ map<string, int> nameMap;
+ unsigned long int numSeqs; //used to sanity check the files. numSeqs = total seqs for namefile and uniques for count.
+ //list file should have all seqs if namefile was used to create it and only uniques in count file was used.
+
+ if (namefile != "") { nameMap = m->readNames(namefile, numSeqs); }
+ else if (countfile != "") { nameMap = readCount(numSeqs); }
+ else { numSeqs = list->getNumSeqs(); }
+
+ //sanity check
+ if (numSeqs != list->getNumSeqs()) {
+ if (namefile != "") { m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your name file contains " + toString(numSeqs) + " sequences, aborting. Do you have the correct files? Perhaps you forgot to include the name file when you clustered? \n"); }
+ else if (countfile != "") {
+ m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your count file contains " + toString(numSeqs) + " unique sequences, aborting. Do you have the correct files? Perhaps you forgot to include the count file when you clustered? \n");
+ }
+ m->control_pressed = true;
+ }
+
+ if (m->control_pressed) { delete list; return 0; }
+
+ //////////////////////////////////////////////////////////////////////////////
+ // process data //
+ //////////////////////////////////////////////////////////////////////////////
+ m->mothurOut("\nFinding consensus sequences for each otu..."); cout.flush();
+
+ vector<Sequence> conSeqs = createProcessesConSeqs(nameMap, numSeqs);
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+ variables["[distance]"] = list->getLabel();
+ string consFastaFile = getOutputFileName("fasta", variables);
+ outputNames.push_back(consFastaFile); outputTypes["fasta"].push_back(consFastaFile);
+ ofstream out;
+ m->openOutputFile(consFastaFile, out);
+ for (int i = 0; i < conSeqs.size(); i++) { conSeqs[i].printSequence(out); }
+ out.close();
+
+ m->mothurOut("Done.\n\n");
+
+ set<string> primers = getPrimer(conSeqs[otunumber-1]);
+
+ if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ string consSummaryFile = getOutputFileName("summary", variables);
+ outputNames.push_back(consSummaryFile); outputTypes["summary"].push_back(consSummaryFile);
+ ofstream outSum;
+ m->openOutputFile(consSummaryFile, outSum);
+
+ outSum << "PrimerOtu: " << otunumber << " Members: " << list->get(otunumber-1) << endl << "Primers\tminTm\tmaxTm" << endl;
+
+ //find min and max melting points
+ vector<double> minTms;
+ vector<double> maxTms;
+ string primerString = "";
+ for (set<string>::iterator it = primers.begin(); it != primers.end();) {
+
+ double minTm, maxTm;
+ findMeltingPoint(*it, minTm, maxTm);
+ if ((minTM == -1) && (maxTM == -1)) { //user did not set min or max Tm so save this primer
+ minTms.push_back(minTm);
+ maxTms.push_back(maxTm);
+ outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+ it++;
+ }else if ((minTM == -1) && (maxTm <= maxTM)){ //user set max and no min, keep if below max
+ minTms.push_back(minTm);
+ maxTms.push_back(maxTm);
+ outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+ it++;
+ }else if ((maxTM == -1) && (minTm >= minTM)){ //user set min and no max, keep if above min
+ minTms.push_back(minTm);
+ maxTms.push_back(maxTm);
+ outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+ it++;
+ }else if ((maxTm <= maxTM) && (minTm >= minTM)) { //keep if above min and below max
+ minTms.push_back(minTm);
+ maxTms.push_back(maxTm);
+ outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+ it++;
+ }else { primers.erase(it++); } //erase because it didn't qualify
+ }
+
+ outSum << "\nOTUNumber\tPrimer\tStart\tEnd\tLength\tMismatches\tminTm\tmaxTm\n";
+ outSum.close();
+
+ //check each otu's conseq for each primer in otunumber
+ set<int> otuToRemove = createProcesses(consSummaryFile, minTms, maxTms, primers, conSeqs);
+
+ if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //print new list file
+ map<string, string> mvariables;
+ mvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+ mvariables["[extension]"] = m->getExtension(listfile);
+ string newListFile = getOutputFileName("list", mvariables);
+ outputNames.push_back(newListFile); outputTypes["list"].push_back(newListFile);
+ ofstream outList;
+ m->openOutputFile(newListFile, outList);
+
+ outList << list->getLabel() << '\t' << (list->getNumBins()-otuToRemove.size()) << '\t';
+ for (int j = 0; j < list->getNumBins(); j++) {
+ if (m->control_pressed) { break; }
+ //good otus
+ if (otuToRemove.count(j) == 0) {
+ string bin = list->get(j);
+ if (bin != "") { outList << bin << '\t'; }
+ }
+ }
+ outList << endl;
+ outList.close();
+
+ if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ delete list;
+
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(list->getNumBins()) + " OTUs.\n");
+
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "execute");
+ exit(1);
+ }
+}
+//********************************************************************/
+//used http://www.biophp.org/minitools/melting_temperature/ as a reference to substitute degenerate bases
+// in order to find the min and max Tm values.
+//Tm = 64.9°C + 41°C x (number of G’s and C’s in the primer – 16.4)/N
+
+/* A = adenine
+ * C = cytosine
+ * G = guanine
+ * T = thymine
+ * R = G A (purine)
+ * Y = T C (pyrimidine)
+ * K = G T (keto)
+ * M = A C (amino)
+ * S = G C (strong bonds)
+ * W = A T (weak bonds)
+ * B = G T C (all but A)
+ * D = G A T (all but C)
+ * H = A C T (all but G)
+ * V = G C A (all but T)
+ * N = A G C T (any) */
+
+int PrimerDesignCommand::findMeltingPoint(string primer, double& minTm, double& maxTm){
+ try {
+ string minTmprimer = primer;
+ string maxTmprimer = primer;
+
+ //find minimum Tm string substituting for degenerate bases
+ for (int i = 0; i < minTmprimer.length(); i++) {
+ minTmprimer[i] = toupper(minTmprimer[i]);
+
+ if (minTmprimer[i] == 'Y') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'R') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'W') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'K') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'M') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'D') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'V') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'H') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'B') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'N') { minTmprimer[i] = 'A'; }
+ else if (minTmprimer[i] == 'S') { minTmprimer[i] = 'G'; }
+ }
+
+ //find maximum Tm string substituting for degenerate bases
+ for (int i = 0; i < maxTmprimer.length(); i++) {
+ maxTmprimer[i] = toupper(maxTmprimer[i]);
+
+ if (maxTmprimer[i] == 'Y') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'R') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'W') { maxTmprimer[i] = 'A'; }
+ else if (maxTmprimer[i] == 'K') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'M') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'D') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'V') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'H') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'B') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'N') { maxTmprimer[i] = 'G'; }
+ else if (maxTmprimer[i] == 'S') { maxTmprimer[i] = 'G'; }
+ }
+
+ int numGC = 0;
+ for (int i = 0; i < minTmprimer.length(); i++) {
+ if (minTmprimer[i] == 'G') { numGC++; }
+ else if (minTmprimer[i] == 'C') { numGC++; }
+ }
+
+ minTm = 64.9 + 41 * (numGC - 16.4) / (double) minTmprimer.length();
+
+ numGC = 0;
+ for (int i = 0; i < maxTmprimer.length(); i++) {
+ if (maxTmprimer[i] == 'G') { numGC++; }
+ else if (maxTmprimer[i] == 'C') { numGC++; }
+ }
+
+ maxTm = 64.9 + 41 * (numGC - 16.4) / (double) maxTmprimer.length();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "findMeltingPoint");
+ exit(1);
+ }
+}
+//********************************************************************/
+//search for a primer over the sequence string
+bool PrimerDesignCommand::findPrimer(string rawSequence, string primer, vector<int>& primerStart, vector<int>& primerEnd, vector<int>& mismatches){
+ try {
+ bool foundAtLeastOne = false; //innocent til proven guilty
+
+ //look for exact match
+ if(rawSequence.length() < primer.length()) { return false; }
+
+ //search for primer
+ for (int j = 0; j < rawSequence.length()-length; j++){
+
+ if (m->control_pressed) { return foundAtLeastOne; }
+
+ string rawChunk = rawSequence.substr(j, length);
+
+ int numDiff = countDiffs(primer, rawChunk);
+
+ if(numDiff <= pdiffs){
+ primerStart.push_back(j);
+ primerEnd.push_back(j+length);
+ mismatches.push_back(numDiff);
+ foundAtLeastOne = true;
+ }
+ }
+
+ return foundAtLeastOne;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "findPrimer");
+ exit(1);
+ }
+}
+//********************************************************************/
+//find all primers for the given sequence
+set<string> PrimerDesignCommand::getPrimer(Sequence primerSeq){
+ try {
+ set<string> primers;
+
+ string rawSequence = primerSeq.getUnaligned();
+
+ for (int j = 0; j < rawSequence.length()-length; j++){
+ if (m->control_pressed) { break; }
+
+ string primer = rawSequence.substr(j, length);
+ primers.insert(primer);
+ }
+
+ return primers;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "getPrimer");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs) {
+ try {
+
+ vector<int> processIDS;
+ int process = 1;
+ set<int> otusToRemove;
+ int numBinsProcessed = 0;
+
+ //sanity check
+ int numBins = conSeqs.size();
+ if (numBins < processors) { processors = numBins; }
+
+ //divide the otus between the processors
+ vector<linePair> lines;
+ int numOtusPerProcessor = numBins / processors;
+ for (int i = 0; i < processors; i++) {
+ int startIndex = i * numOtusPerProcessor;
+ int endIndex = (i+1) * numOtusPerProcessor;
+ if(i == (processors - 1)){ endIndex = numBins; }
+ lines.push_back(linePair(startIndex, endIndex));
+ }
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ //clear old file because we append in driver
+ m->mothurRemove(newSummaryFile + toString(getpid()) + ".temp");
+
+ otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed);
+
+ string tempFile = toString(getpid()) + ".otus2Remove.temp";
+ ofstream outTemp;
+ m->openOutputFile(tempFile, outTemp);
+
+ outTemp << numBinsProcessed << endl;
+ outTemp << otusToRemove.size() << endl;
+ for (set<int>::iterator it = otusToRemove.begin(); it != otusToRemove.end(); it++) { outTemp << *it << endl; }
+ outTemp.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //do my part
+ otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ string tempFile = toString(processIDS[i]) + ".otus2Remove.temp";
+ ifstream intemp;
+ m->openInputFile(tempFile, intemp);
+
+ int num;
+ intemp >> num; m->gobble(intemp);
+ if (num != (lines[i+1].end - lines[i+1].start)) { m->mothurOut("[ERROR]: process " + toString(processIDS[i]) + " did not complete processing all OTUs assigned to it, quitting.\n"); m->control_pressed = true; }
+ intemp >> num; m->gobble(intemp);
+ for (int k = 0; k < num; k++) {
+ int otu;
+ intemp >> otu; m->gobble(intemp);
+ otusToRemove.insert(otu);
+ }
+ intemp.close();
+ m->mothurRemove(tempFile);
+ }
+
+
+ #else
+
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the primerDesignData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<primerDesignData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+ // Allocate memory for thread data.
+ string extension = toString(i) + ".temp";
+ m->mothurRemove(newSummaryFile+extension);
+
+ primerDesignData* tempPrimer = new primerDesignData((newSummaryFile+extension), m, lines[i].start, lines[i].end, minTms, maxTms, primers, conSeqs, pdiffs, otunumber, length, i);
+ pDataArray.push_back(tempPrimer);
+ processIDS.push_back(i);
+
+ //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyPrimerThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+
+ //using the main process as a worker saves time and memory
+ otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (set<int>::iterator it = pDataArray[i]->otusToRemove.begin(); it != pDataArray[i]->otusToRemove.end(); it++) {
+ otusToRemove.insert(*it);
+ }
+ int num = pDataArray[i]->numBinsProcessed;
+ if (num != (lines[processIDS[i]].end - lines[processIDS[i]].start)) { m->mothurOut("[ERROR]: process " + toString(processIDS[i]) + " did not complete processing all OTUs assigned to it, quitting.\n"); m->control_pressed = true; }
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+#endif
+
+ //append output files
+ for(int i=0;i<processIDS.size();i++){
+ m->appendFiles((newSummaryFile + toString(processIDS[i]) + ".temp"), newSummaryFile);
+ m->mothurRemove((newSummaryFile + toString(processIDS[i]) + ".temp"));
+ }
+
+ return otusToRemove;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "createProcesses");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+set<int> PrimerDesignCommand::driver(string summaryFileName, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int start, int end, int& numBinsProcessed){
+ try {
+ set<int> otuToRemove;
+
+ ofstream outSum;
+ m->openOutputFileAppend(summaryFileName, outSum);
+
+ for (int i = start; i < end; i++) {
+
+ if (m->control_pressed) { break; }
+
+ if (i != (otunumber-1)) {
+ int primerIndex = 0;
+ for (set<string>::iterator it = primers.begin(); it != primers.end(); it++) {
+ vector<int> primerStarts;
+ vector<int> primerEnds;
+ vector<int> mismatches;
+
+ bool found = findPrimer(conSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches);
+
+ //if we found it report to the table
+ if (found) {
+ for (int j = 0; j < primerStarts.size(); j++) {
+ outSum << (i+1) << '\t' << *it << '\t' << primerStarts[j] << '\t' << primerEnds[j] << '\t' << length << '\t' << mismatches[j] << '\t' << minTms[primerIndex] << '\t' << maxTms[primerIndex] << endl;
+ }
+ otuToRemove.insert(i);
+ }
+ primerIndex++;
+ }
+ }
+ numBinsProcessed++;
+ }
+ outSum.close();
+
+
+ return otuToRemove;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "driver");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector< vector< vector<unsigned int> > > PrimerDesignCommand::driverGetCounts(map<string, int>& nameMap, unsigned long int& fastaCount, vector<unsigned int>& otuCounts, unsigned long long& start, unsigned long long& end){
+ try {
+ vector< vector< vector<unsigned int> > > counts;
+ map<string, int> seq2Bin;
+ alignedLength = 0;
+
+ ifstream in;
+ m->openInputFile(fastafile, in);
+
+ in.seekg(start);
+
+ bool done = false;
+ fastaCount = 0;
+
+ while (!done) {
+ if (m->control_pressed) { in.close(); return counts; }
+
+ Sequence seq(in); m->gobble(in);
+
+ if (seq.getName() != "") {
+ if (fastaCount == 0) { alignedLength = seq.getAligned().length(); initializeCounts(counts, alignedLength, seq2Bin, nameMap, otuCounts); }
+ else if (alignedLength != seq.getAligned().length()) {
+ m->mothurOut("[ERROR]: your sequences are not all the same length. primer.design requires sequences to be aligned."); m->mothurOutEndLine(); m->control_pressed = true; break;
+ }
+
+ int num = 1;
+ map<string, int>::iterator itCount;
+ if (namefile != "") {
+ itCount = nameMap.find(seq.getName());
+ if (itCount == nameMap.end()) { m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your name file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break; }
+ else { num = itCount->second; }
+ fastaCount+=num;
+ }else if (countfile != "") {
+ itCount = nameMap.find(seq.getName());
+ if (itCount == nameMap.end()) { m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your count file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break; }
+ else { num = itCount->second; }
+ fastaCount++;
+ }else {
+ fastaCount++;
+ }
+
+ //increment counts
+ itCount = seq2Bin.find(seq.getName());
+ if (itCount == seq2Bin.end()) {
+ if ((namefile != "") || (countfile != "")) {
+ m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your list file, aborting. Perhaps you forgot to include your name or count file while clustering.\n"); m->mothurOutEndLine(); m->control_pressed = true; break;
+ }else{
+ m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your list file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break;
+ }
+ }else {
+ otuCounts[itCount->second] += num;
+ string aligned = seq.getAligned();
+ for (int i = 0; i < alignedLength; i++) {
+ char base = toupper(aligned[i]);
+ if (base == 'A') { counts[itCount->second][i][0]+=num; }
+ else if (base == 'T') { counts[itCount->second][i][1]+=num; }
+ else if (base == 'G') { counts[itCount->second][i][2]+=num; }
+ else if (base == 'C') { counts[itCount->second][i][3]+=num; }
+ else { counts[itCount->second][i][4]+=num; }
+ }
+ }
+
+ }
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ unsigned long long pos = in.tellg();
+ if ((pos == -1) || (pos >= end)) { break; }
+#else
+ if (in.eof()) { break; }
+#endif
+ }
+
+ in.close();
+
+ return counts;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "driverGetCounts");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector<Sequence> PrimerDesignCommand::createProcessesConSeqs(map<string, int>& nameMap, unsigned long int& numSeqs) {
+ try {
+ vector< vector< vector<unsigned int> > > counts;
+ vector<unsigned int> otuCounts;
+ vector<int> processIDS;
+ int process = 1;
+ unsigned long int fastaCount = 0;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
+ vector<unsigned long long> positions;
+ vector<fastaLinePair> lines;
+ positions = m->divideFile(fastafile, processors);
+ for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(fastaLinePair(positions[i], positions[(i+1)])); }
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ counts = driverGetCounts(nameMap, fastaCount, otuCounts, lines[process].start, lines[process].end);
+
+ string tempFile = toString(getpid()) + ".cons_counts.temp";
+ ofstream outTemp;
+ m->openOutputFile(tempFile, outTemp);
+
+ outTemp << fastaCount << endl;
+ //pass counts
+ outTemp << counts.size() << endl;
+ for (int i = 0; i < counts.size(); i++) {
+ outTemp << counts[i].size() << endl;
+ for (int j = 0; j < counts[i].size(); j++) {
+ for (int k = 0; k < 5; k++) { outTemp << counts[i][j][k] << '\t'; }
+ outTemp << endl;
+ }
+ }
+ //pass otuCounts
+ outTemp << otuCounts.size() << endl;
+ for (int i = 0; i < otuCounts.size(); i++) { outTemp << otuCounts[i] << '\t'; }
+ outTemp << endl;
+ outTemp.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //do my part
+ counts = driverGetCounts(nameMap, fastaCount, otuCounts, lines[0].start, lines[0].end);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ string tempFile = toString(processIDS[i]) + ".cons_counts.temp";
+ ifstream intemp;
+ m->openInputFile(tempFile, intemp);
+
+ unsigned long int num;
+ intemp >> num; m->gobble(intemp); fastaCount += num;
+ intemp >> num; m->gobble(intemp);
+ if (num != counts.size()) { m->mothurOut("[ERROR]: " + tempFile + " was not built correctly by the child process, quitting.\n"); m->control_pressed = true; }
+ else {
+ //read counts
+ for (int k = 0; k < num; k++) {
+ int alength;
+ intemp >> alength; m->gobble(intemp);
+ if (alength != alignedLength) { m->mothurOut("[ERROR]: your sequences are not all the same length. primer.design requires sequences to be aligned."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ for (int j = 0; j < alength; j++) {
+ for (int l = 0; l < 5; l++) { unsigned int numTemp; intemp >> numTemp; m->gobble(intemp); counts[k][j][l] += numTemp; }
+ }
+ }
+ }
+ //read otuCounts
+ intemp >> num; m->gobble(intemp);
+ for (int k = 0; k < num; k++) {
+ unsigned int numTemp; intemp >> numTemp; m->gobble(intemp);
+ otuCounts[k] += numTemp;
+ }
+ }
+ intemp.close();
+ m->mothurRemove(tempFile);
+ }
+
+
+#else
+ counts = driverGetCounts(nameMap, fastaCount, otuCounts, 0, 1000);
+#endif
+
+ //you will have a nameMap error if there is a namefile or countfile, but if those aren't given we want to make sure the fasta and list file match.
+ if (fastaCount != numSeqs) {
+ if ((namefile == "") && (countfile == "")) { m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your fasta file contains " + toString(fastaCount) + " sequences, aborting. Do you have the correct files? Perhaps you forgot to include the name or count file? \n"); }
+ m->control_pressed = true;
+ }
+
+ vector<Sequence> conSeqs;
+
+ if (m->control_pressed) { return conSeqs; }
+
+ //build consensus seqs
+ string snumBins = toString(counts.size());
+ for (int i = 0; i < counts.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ string otuLabel = "Otu";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { otuLabel += "0"; }
+ }
+ otuLabel += sbinNumber;
+
+ string cons = "";
+ for (int j = 0; j < counts[i].size(); j++) {
+ cons += getBase(counts[i][j], otuCounts[i]);
+ }
+ Sequence consSeq(otuLabel, cons);
+ conSeqs.push_back(consSeq);
+ }
+
+ if (m->control_pressed) { conSeqs.clear(); return conSeqs; }
+
+ return conSeqs;
+
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "createProcessesConSeqs");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+
+char PrimerDesignCommand::getBase(vector<unsigned int> counts, int size){ //A,T,G,C,Gap
+ try{
+ /* A = adenine
+ * C = cytosine
+ * G = guanine
+ * T = thymine
+ * R = G A (purine)
+ * Y = T C (pyrimidine)
+ * K = G T (keto)
+ * M = A C (amino)
+ * S = G C (strong bonds)
+ * W = A T (weak bonds)
+ * B = G T C (all but A)
+ * D = G A T (all but C)
+ * H = A C T (all but G)
+ * V = G C A (all but T)
+ * N = A G C T (any) */
+
+ char conBase = 'N';
+
+ //zero out counts that don't make the cutoff
+ float percentage = (100.0 - cutoff) / 100.0;
+
+ for (int i = 0; i < counts.size(); i++) {
+ float countPercentage = counts[i] / (float) size;
+ if (countPercentage < percentage) { counts[i] = 0; }
+ }
+
+ //any
+ if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'n'; }
+ //any no gap
+ else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'N'; }
+ //all but T
+ else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'v'; }
+ //all but T no gap
+ else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'V'; }
+ //all but G
+ else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'h'; }
+ //all but G no gap
+ else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'H'; }
+ //all but C
+ else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'd'; }
+ //all but C no gap
+ else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'D'; }
+ //all but A
+ else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'b'; }
+ //all but A no gap
+ else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'B'; }
+ //W = A T (weak bonds)
+ else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'w'; }
+ //W = A T (weak bonds) no gap
+ else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'W'; }
+ //S = G C (strong bonds)
+ else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 's'; }
+ //S = G C (strong bonds) no gap
+ else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'S'; }
+ //M = A C (amino)
+ else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'm'; }
+ //M = A C (amino) no gap
+ else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'M'; }
+ //K = G T (keto)
+ else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'k'; }
+ //K = G T (keto) no gap
+ else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'K'; }
+ //Y = T C (pyrimidine)
+ else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'y'; }
+ //Y = T C (pyrimidine) no gap
+ else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'Y'; }
+ //R = G A (purine)
+ else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'r'; }
+ //R = G A (purine) no gap
+ else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'R'; }
+ //only A
+ else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'a'; }
+ //only A no gap
+ else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'A'; }
+ //only T
+ else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 't'; }
+ //only T no gap
+ else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'T'; }
+ //only G
+ else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = 'g'; }
+ //only G no gap
+ else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'G'; }
+ //only C
+ else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) { conBase = 'c'; }
+ //only C no gap
+ else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) { conBase = 'C'; }
+ //only gap
+ else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) { conBase = '-'; }
+ //cutoff removed all counts
+ else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) { conBase = 'N'; }
+ else{ m->mothurOut("[ERROR]: cannot find consensus base."); m->mothurOutEndLine(); }
+
+ return conBase;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "getBase");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+int PrimerDesignCommand::initializeCounts(vector< vector< vector<unsigned int> > >& counts, int length, map<string, int>& seq2Bin, map<string, int>& nameMap, vector<unsigned int>& otuCounts){
+ try {
+ counts.clear();
+ otuCounts.clear();
+ seq2Bin.clear();
+
+ //vector< vector< vector<unsigned int> > > counts - otu < spot_in_alignment < counts_for_A,T,G,C,Gap > > >
+ for (int i = 0; i < list->getNumBins(); i++) {
+ string binNames = list->get(i);
+ vector<string> names;
+ m->splitAtComma(binNames, names);
+ otuCounts.push_back(0);
+
+ //lets be smart and only map the unique names if a name or count file was given to save search time and memory
+ if ((namefile != "") || (countfile != "")) {
+ for (int j = 0; j < names.size(); j++) {
+ map<string, int>::iterator itNames = nameMap.find(names[j]);
+ if (itNames != nameMap.end()) { //add name because its a unique one
+ seq2Bin[names[j]] = i;
+ }
+ }
+ }else { //map everyone
+ for (int j = 0; j < names.size(); j++) { seq2Bin[names[j]] = i; }
+ }
+
+ vector<unsigned int> temp; temp.resize(5, 0); //A,T,G,C,Gap
+ vector< vector<unsigned int> > temp2;
+ for (int j = 0; j < length; j++) {
+ temp2.push_back(temp);
+ }
+ counts.push_back(temp2);
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "initializeCounts");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+map<string, int> PrimerDesignCommand::readCount(unsigned long int& numSeqs){
+ try {
+ map<string, int> nameMap;
+
+ CountTable ct;
+ ct.readTable(countfile);
+ vector<string> namesOfSeqs = ct.getNamesOfSeqs();
+ numSeqs = ct.getNumUniqueSeqs();
+
+ for (int i = 0; i < namesOfSeqs.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ nameMap[namesOfSeqs[i]] = ct.getNumSeqs(namesOfSeqs[i]);
+ }
+
+ return nameMap;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "readCount");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int PrimerDesignCommand::getListVector(){
+ try {
+ InputData input(listfile, "list");
+ list = input.getListVector();
+ string lastLabel = list->getLabel();
+
+ if (label == "") { label = lastLabel; return 0; }
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> labels; labels.insert(label);
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((list != NULL) && (userLabels.size() != 0)) {
+ if (m->control_pressed) { return 0; }
+
+ if(labels.count(list->getLabel()) == 1){
+ processedLabels.insert(list->getLabel());
+ userLabels.erase(list->getLabel());
+ break;
+ }
+
+ if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = list->getLabel();
+
+ delete list;
+ list = input.getListVector(lastLabel);
+
+ processedLabels.insert(list->getLabel());
+ userLabels.erase(list->getLabel());
+
+ //restore real lastlabel to save below
+ list->setLabel(saveLabel);
+ break;
+ }
+
+ lastLabel = list->getLabel();
+
+ //get next line to process
+ //prevent memory leak
+ delete list;
+ list = input.getListVector();
+ }
+
+
+ if (m->control_pressed) { return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ delete list;
+ list = input.getListVector(lastLabel);
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "getListVector");
+ exit(1);
+ }
+}
+//********************************************************************/
+/* A = adenine
+ * C = cytosine
+ * G = guanine
+ * T = thymine
+ * R = G A (purine)
+ * Y = T C (pyrimidine)
+ * K = G T (keto)
+ * M = A C (amino)
+ * S = G C (strong bonds)
+ * W = A T (weak bonds)
+ * B = G T C (all but A)
+ * D = G A T (all but C)
+ * H = A C T (all but G)
+ * V = G C A (all but T)
+ * N = A G C T (any) */
+int PrimerDesignCommand::countDiffs(string oligo, string seq){
+ try {
+
+ int length = oligo.length();
+ int countDiffs = 0;
+
+ for(int i=0;i<length;i++){
+
+ oligo[i] = toupper(oligo[i]);
+ seq[i] = toupper(seq[i]);
+
+ if(oligo[i] != seq[i]){
+ if(oligo[i] == 'A' && (seq[i] != 'A' && seq[i] != 'M' && seq[i] != 'R' && seq[i] != 'W' && seq[i] != 'D' && seq[i] != 'H' && seq[i] != 'V')) { countDiffs++; }
+ else if(oligo[i] == 'C' && (seq[i] != 'C' && seq[i] != 'Y' && seq[i] != 'M' && seq[i] != 'S' && seq[i] != 'B' && seq[i] != 'H' && seq[i] != 'V')) { countDiffs++; }
+ else if(oligo[i] == 'G' && (seq[i] != 'G' && seq[i] != 'R' && seq[i] != 'K' && seq[i] != 'S' && seq[i] != 'B' && seq[i] != 'D' && seq[i] != 'V')) { countDiffs++; }
+ else if(oligo[i] == 'T' && (seq[i] != 'T' && seq[i] != 'Y' && seq[i] != 'K' && seq[i] != 'W' && seq[i] != 'B' && seq[i] != 'D' && seq[i] != 'H')) { countDiffs++; }
+ else if((oligo[i] == '.' || oligo[i] == '-')) { countDiffs++; }
+ else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { countDiffs++; }
+ else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { countDiffs++; }
+ else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { countDiffs++; }
+ else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { countDiffs++; }
+ else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
+ else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { countDiffs++; }
+ else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
+ else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
+ else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { countDiffs++; }
+ else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { countDiffs++; }
+ else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { countDiffs++; }
+ }
+
+ }
+
+ return countDiffs;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "countDiffs");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
+
--- /dev/null
+//
+// primerdesigncommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 1/18/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_primerdesigncommand_h
+#define Mothur_primerdesigncommand_h
+
+#include "command.hpp"
+#include "listvector.hpp"
+#include "inputdata.h"
+#include "sequence.hpp"
+#include "alignment.hpp"
+#include "needlemanoverlap.hpp"
+
+/**************************************************************************************************/
+
+class PrimerDesignCommand : public Command {
+public:
+ PrimerDesignCommand(string);
+ PrimerDesignCommand();
+ ~PrimerDesignCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "primer.design"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+
+ string getOutputPattern(string);
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Primer.design"; }
+ string getDescription() { return "design sequence fragments that are specific to particular OTUs"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+
+ struct linePair {
+ int start;
+ int end;
+ linePair(int i, int j) : start(i), end(j) {}
+ };
+ struct fastaLinePair {
+ unsigned long long start;
+ unsigned long long end;
+ fastaLinePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+ };
+
+ bool abort, allLines, large;
+ int cutoff, pdiffs, length, otunumber, processors, alignedLength;
+ string outputDir, listfile, namefile, countfile, fastafile, label;
+ double minTM, maxTM;
+ ListVector* list;
+ vector<string> outputNames;
+
+ int initializeCounts(vector< vector< vector<unsigned int> > >& counts, int length, map<string, int>&, map<string, int>&, vector<unsigned int>&);
+ map<string, int> readCount(unsigned long int&);
+ char getBase(vector<unsigned int> counts, int size);
+ int getListVector();
+ int countDiffs(string, string);
+ set<string> getPrimer(Sequence);
+ bool findPrimer(string, string, vector<int>&, vector<int>&, vector<int>&);
+ int findMeltingPoint(string primer, double&, double&);
+
+ set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&);
+ set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&);
+ vector< vector< vector<unsigned int> > > driverGetCounts(map<string, int>&, unsigned long int&, vector<unsigned int>&, unsigned long long&, unsigned long long&);
+ vector<Sequence> createProcessesConSeqs(map<string, int>&, unsigned long int&);
+
+};
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct primerDesignData {
+ string summaryFileName;
+ MothurOut* m;
+ int start;
+ int end;
+ int pdiffs, threadID, otunumber, length;
+ set<string> primers;
+ vector<double> minTms, maxTms;
+ set<int> otusToRemove;
+ vector<Sequence> consSeqs;
+ int numBinsProcessed;
+
+ primerDesignData(){}
+ primerDesignData(string sf, MothurOut* mout, int st, int en, vector<double> min, vector<double> max, set<string> pri, vector<Sequence> seqs, int d, int otun, int l, int tid) {
+ summaryFileName = sf;
+ m = mout;
+ start = st;
+ end = en;
+ pdiffs = d;
+ minTms = min;
+ maxTms = max;
+ primers = pri;
+ consSeqs = seqs;
+ otunumber = otun;
+ length = l;
+ threadID = tid;
+ numBinsProcessed = 0;
+ }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyPrimerThreadFunction(LPVOID lpParam){
+ primerDesignData* pDataArray;
+ pDataArray = (primerDesignData*)lpParam;
+
+ try {
+ ofstream outSum;
+ pDataArray->m->openOutputFileAppend(pDataArray->summaryFileName, outSum);
+
+ for (int i = pDataArray->start; i < pDataArray->end; i++) {
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ if (i != (pDataArray->otunumber-1)) {
+ int primerIndex = 0;
+ for (set<string>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
+ vector<int> primerStarts;
+ vector<int> primerEnds;
+ vector<int> mismatches;
+
+ //bool found = findPrimer(conSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches);
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+ bool found = false; //innocent til proven guilty
+
+ string rawSequence = pDataArray->consSeqs[i].getUnaligned();
+ string primer = *it;
+
+ //look for exact match
+ if(rawSequence.length() < primer.length()) { found = false; }
+ else {
+ //search for primer
+ for (int j = 0; j < rawSequence.length()-pDataArray->length; j++){
+
+ if (pDataArray->m->control_pressed) { found = false; break; }
+
+ string rawChunk = rawSequence.substr(j, pDataArray->length);
+
+ //int numDiff = countDiffs(primer, rawchuck);
+ ///////////////////////////////////////////////////////////////////////
+ int numDiff = 0;
+ string oligo = primer;
+ string seq = rawChunk;
+
+ for(int k=0;k<pDataArray->length;k++){
+
+ oligo[k] = toupper(oligo[k]);
+ seq[k] = toupper(seq[k]);
+
+ if(oligo[k] != seq[k]){
+
+ if((oligo[k] == 'N' || oligo[k] == 'I') && (seq[k] == 'N')) { numDiff++; }
+ else if(oligo[k] == 'R' && (seq[k] != 'A' && seq[k] != 'G')) { numDiff++; }
+ else if(oligo[k] == 'Y' && (seq[k] != 'C' && seq[k] != 'T')) { numDiff++; }
+ else if(oligo[k] == 'M' && (seq[k] != 'C' && seq[k] != 'A')) { numDiff++; }
+ else if(oligo[k] == 'K' && (seq[k] != 'T' && seq[k] != 'G')) { numDiff++; }
+ else if(oligo[k] == 'W' && (seq[k] != 'T' && seq[k] != 'A')) { numDiff++; }
+ else if(oligo[k] == 'S' && (seq[k] != 'C' && seq[k] != 'G')) { numDiff++; }
+ else if(oligo[k] == 'B' && (seq[k] != 'C' && seq[k] != 'T' && seq[k] != 'G')) { numDiff++; }
+ else if(oligo[k] == 'D' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'G')) { numDiff++; }
+ else if(oligo[k] == 'H' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'C')) { numDiff++; }
+ else if(oligo[k] == 'V' && (seq[k] != 'A' && seq[k] != 'C' && seq[k] != 'G')) { numDiff++; }
+ else if(oligo[k] == 'A' && (seq[k] != 'A' && seq[k] != 'M' && seq[k] != 'R' && seq[k] != 'W' && seq[k] != 'D' && seq[k] != 'H' && seq[k] != 'V')) { numDiff++; }
+ else if(oligo[k] == 'C' && (seq[k] != 'C' && seq[k] != 'Y' && seq[k] != 'M' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'H' && seq[k] != 'V')) { numDiff++; }
+ else if(oligo[k] == 'G' && (seq[k] != 'G' && seq[k] != 'R' && seq[k] != 'K' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'V')) { numDiff++; }
+ else if(oligo[k] == 'T' && (seq[k] != 'T' && seq[k] != 'Y' && seq[k] != 'K' && seq[k] != 'W' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'H')) { numDiff++; }
+ else if((oligo[k] == '.' || oligo[k] == '-')) { numDiff++; }
+ }
+ }
+ ///////////////////////////////////////////////////////////////////////
+
+ if(numDiff <= pDataArray->pdiffs){
+ primerStarts.push_back(j);
+ primerEnds.push_back(j+pDataArray->length);
+ mismatches.push_back(numDiff);
+ found = true;
+ }
+ }
+ }
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+ //if we found it report to the table
+ if (found) {
+ for (int j = 0; j < primerStarts.size(); j++) {
+ outSum << (i+1) << '\t' << *it << '\t' << primerStarts[j] << '\t' << primerEnds[j] << '\t' << pDataArray->length << '\t' << mismatches[j] << '\t' << pDataArray->minTms[primerIndex] << '\t' << pDataArray->maxTms[primerIndex] << endl;
+ }
+ pDataArray->otusToRemove.insert(i);
+ }
+ primerIndex++;
+ }
+ }
+ pDataArray->numBinsProcessed++;
+ }
+ outSum.close();
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "PrimerDesignCommand", "MyPrimerThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
+/**************************************************************************************************/
+
+
+
+
+
+#endif
try {
m = MothurOut::getInstance();
-
- seqName = "";
+
int score;
+ seqName = getSequenceName(qFile);
+
+ if (!m->control_pressed) {
+ string qScoreString = m->getline(qFile);
+ //cout << qScoreString << endl;
+ while(qFile.peek() != '>' && qFile.peek() != EOF){
+ if (m->control_pressed) { break; }
+ string temp = m->getline(qFile);
+ //cout << temp << endl;
+ qScoreString += ' ' + temp;
+ }
+ //cout << "done reading " << endl;
+ istringstream qScoreStringStream(qScoreString);
+ int count = 0;
+ while(!qScoreStringStream.eof()){
+ if (m->control_pressed) { break; }
+ string temp;
+ qScoreStringStream >> temp; m->gobble(qScoreStringStream);
+
+ //check temp to make sure its a number
+ if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
+ convert(temp, score);
+
+ //cout << count << '\t' << score << endl;
+ qScores.push_back(score);
+ count++;
+ }
+ }
- qFile >> seqName;
- m->getline(qFile);
- //cout << seqName << endl;
- if (seqName == "") {
- m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
- m->mothurOutEndLine();
- }
- else{
- seqName = seqName.substr(1);
- }
-
- string qScoreString = m->getline(qFile);
- //cout << qScoreString << endl;
- while(qFile.peek() != '>' && qFile.peek() != EOF){
- if (m->control_pressed) { break; }
- string temp = m->getline(qFile);
- //cout << temp << endl;
- qScoreString += ' ' + temp;
- }
- //cout << "done reading " << endl;
- istringstream qScoreStringStream(qScoreString);
- int count = 0;
- while(!qScoreStringStream.eof()){
- if (m->control_pressed) { break; }
- string temp;
- qScoreStringStream >> temp; m->gobble(qScoreStringStream);
-
- //check temp to make sure its a number
- if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
- convert(temp, score);
-
- //cout << count << '\t' << score << endl;
- qScores.push_back(score);
- count++;
- }
- //qScores.pop_back();
-
-// string scores = "";
-//
-// while(!qFile.eof()){
-//
-// qFile >> seqName;
-//
-// //get name
-// if (seqName.length() != 0) {
-// seqName = seqName.substr(1);
-// while (!qFile.eof()) {
-// char c = qFile.get();
-// //gobble junk on line
-// if (c == 10 || c == 13){ break; }
-// }
-// m->gobble(qFile);
-// }
-//
-// //get scores
-// while(qFile){
-// char letter=qFile.get();
-// if((letter == '>')){ qFile.putback(letter); break; }
-// else if (isprint(letter)) { scores += letter; }
-// }
-// m->gobble(qFile);
-//
-// break;
-// }
-//
-// //convert scores string to qScores
-// istringstream qScoreStringStream(scores);
-//
-// int score;
-// while(!qScoreStringStream.eof()){
-//
-// if (m->control_pressed) { break; }
-//
-// qScoreStringStream >> score;
-// qScores.push_back(score);
-// }
-//
-// qScores.pop_back();
-
seqLength = qScores.size();
//cout << "seqlength = " << seqLength << '\t' << count << endl;
}
}
-
+//********************************************************************************************************************
+string QualityScores::getSequenceName(ifstream& qFile) {
+ try {
+ string name = "";
+
+ qFile >> name;
+ m->getline(qFile);
+
+ if (name.length() != 0) {
+
+ name = name.substr(1);
+
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+ }
+
+ }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ return name;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "QualityScores", "getSequenceName");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+void QualityScores::setName(string name) {
+ try {
+
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+ }
+
+ seqName = name;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "QualityScores", "setName");
+ exit(1);
+ }
+}
/**************************************************************************************************/
string QualityScores::getName(){
void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
void updateForwardMap(vector<vector<int> >&, int, int, int);
void updateReverseMap(vector<vector<int> >&, int, int, int);
- void setName(string n) { seqName = n; }
+ void setName(string n);
void setScores(vector<int> qs) { qScores = qs; seqLength = qScores.size(); }
string seqName;
int seqLength;
+
+ string getSequenceName(ifstream&);
};
/**************************************************************************************************/
while(!inputNames.eof()){
if (m->control_pressed) { goodNameOut.close(); inputNames.close(); m->mothurRemove(goodNameFile); return 0; }
- inputNames >> seqName >> seqList;
+ inputNames >> seqName; m->gobble(inputNames); inputNames >> seqList;
it = badSeqNames.find(seqName);
if(it != badSeqNames.end()){
while(!inputGroups.eof()){
if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodNameFile); m->mothurRemove(goodGroupFile); return 0; }
- inputGroups >> seqName >> group;
+ inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
it = badSeqGroups.find(seqName);
while(!inputGroups.eof()){
if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodGroupFile); return 0; }
- inputGroups >> seqName >> group;
+ inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
it = badSeqNames.find(seqName);
if(it != badSeqNames.end()){
while(!input.eof()){
if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
- input >> seqName >> tax;
+ input >> seqName; m->gobble(input); input >> tax;
it = badSeqNames.find(seqName);
if(it != badSeqNames.end()){ badSeqNames.erase(it); }
m = MothurOut::getInstance();
initialize();
name = newName;
+
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+ }
//setUnaligned removes any gap characters for us
setUnaligned(sequence);
m = MothurOut::getInstance();
initialize();
name = newName;
+
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+ }
//setUnaligned removes any gap characters for us
setUnaligned(sequence);
m = MothurOut::getInstance();
initialize();
- fastaString >> name;
-
- if (name.length() != 0) {
+ name = getSequenceName(fastaString);
- name = name.substr(1);
+ if (!m->control_pressed) {
string sequence;
//read comments
setUnaligned(sequence);
if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
-
- }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+ }
}
catch(exception& e) {
m = MothurOut::getInstance();
initialize();
- fastaString >> name;
-
- if (name.length() != 0) {
+ name = getSequenceName(fastaString);
- name = name.substr(1);
+ if (!m->control_pressed) {
string sequence;
//read comments
if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
- }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+ }
}
catch(exception& e) {
try {
m = MothurOut::getInstance();
initialize();
- fastaFile >> name;
-
- if (name.length() != 0) {
+ name = getSequenceName(fastaFile);
- name = name.substr(1);
+ if (!m->control_pressed) {
string sequence;
if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
- }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+ }
}
catch(exception& e) {
try {
m = MothurOut::getInstance();
initialize();
- fastaFile >> name;
extraInfo = "";
- if (name.length() != 0) {
-
- name = name.substr(1);
-
+ name = getSequenceName(fastaFile);
+
+ if (!m->control_pressed) {
string sequence;
//read comments
setUnaligned(sequence);
if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
-
- }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+ }
}
catch(exception& e) {
try {
m = MothurOut::getInstance();
initialize();
- fastaFile >> name;
+ name = getSequenceName(fastaFile);
- if (name.length() != 0) {
- name = name.substr(1);
+ if (!m->control_pressed) {
string sequence;
//read comments
if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
- }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+ }
}
catch(exception& e) {
exit(1);
}
}
-
+//********************************************************************************************************************
+string Sequence::getSequenceName(ifstream& fastaFile) {
+ try {
+ string name = "";
+
+ fastaFile >> name;
+
+ if (name.length() != 0) {
+
+ name = name.substr(1);
+
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+ }
+
+ }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ return name;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Sequence", "getSequenceName");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+string Sequence::getSequenceName(istringstream& fastaFile) {
+ try {
+ string name = "";
+
+ fastaFile >> name;
+
+ if (name.length() != 0) {
+
+ name = name.substr(1);
+
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+ }
+
+ }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ return name;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Sequence", "getSequenceName");
+ exit(1);
+ }
+}
//********************************************************************************************************************
string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) {
try {
string getCommentString(ifstream&);
string getSequenceString(istringstream&, int&);
string getCommentString(istringstream&);
+ string getSequenceName(ifstream&);
+ string getSequenceName(istringstream&);
string name;
string unaligned;
string aligned;
if (iters != 0) {
//we need to find the average distance and standard deviation for each groups distance
-
- vector< vector<seqDist> > calcAverages; calcAverages.resize(sumCalculators.size());
- for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
- calcAverages[i].resize(calcDistsTotals[0][i].size());
-
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].seq1 = calcDists[i][j].seq1;
- calcAverages[i][j].seq2 = calcDists[i][j].seq2;
- calcAverages[i][j].dist = 0.0;
- }
- }
-
- for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
- for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
- }
- }
- }
-
- for (int i = 0; i < calcAverages.size(); i++) { //finds average.
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].dist /= (float) iters;
- }
- }
+ vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals);
//find standard deviation
- vector< vector<seqDist> > stdDev; stdDev.resize(sumCalculators.size());
- for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero.
- stdDev[i].resize(calcDistsTotals[0][i].size());
-
- for (int j = 0; j < stdDev[i].size(); j++) {
- stdDev[i][j].seq1 = calcDists[i][j].seq1;
- stdDev[i][j].seq2 = calcDists[i][j].seq2;
- stdDev[i][j].dist = 0.0;
- }
- }
-
- for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
- for (int i = 0; i < stdDev.size(); i++) {
- for (int j = 0; j < stdDev[i].size(); j++) {
- stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
- }
- }
- }
-
- for (int i = 0; i < stdDev.size(); i++) { //finds average.
- for (int j = 0; j < stdDev[i].size(); j++) {
- stdDev[i][j].dist /= (float) iters;
- stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
- }
- }
+ vector< vector<seqDist> > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
//print results
for (int i = 0; i < calcDists.size(); i++) {
if (iters != 1) {
//we need to find the average distance and standard deviation for each groups distance
-
- vector< vector<seqDist> > calcAverages; calcAverages.resize(treeCalculators.size());
- for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
- calcAverages[i].resize(calcDistsTotals[0][i].size());
-
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].seq1 = calcDists[i][j].seq1;
- calcAverages[i][j].seq2 = calcDists[i][j].seq2;
- calcAverages[i][j].dist = 0.0;
- }
- }
-
- for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
- for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero.
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
- }
- }
- }
-
- for (int i = 0; i < calcAverages.size(); i++) { //finds average.
- for (int j = 0; j < calcAverages[i].size(); j++) {
- calcAverages[i][j].dist /= (float) iters;
- }
- }
+ vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals);
//create average tree for each calc
for (int i = 0; i < calcDists.size(); i++) {
try {
//we need to find the average distance and standard deviation for each groups distance
//finds sum
- vector<double> averages; //averages.resize(numComp, 0.0);
- for (int i = 0; i < numComp; i++) { averages.push_back(0.0); }
-
- if (m->debug) { m->mothurOut("[DEBUG]: numcomparisons = " + toString(numComp) + ", subsampleIters = " + toString(subsampleIters) + "\n"); }
-
- for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
- for (int i = 0; i < dists[thisIter].size(); i++) {
- averages[i] += dists[thisIter][i];
- }
- }
-
- if (m->debug) { m->mothurOut("[DEBUG]: numcomparisons = " + toString(numComp) + ", subsampleIters = " + toString(subsampleIters) + "\n"); }
-
- //finds average.
- for (int i = 0; i < averages.size(); i++) {
- averages[i] /= (float) subsampleIters;
- if (m->debug) { m->mothurOut("[DEBUG]: i = " + toString(i) + ", averages[i] = " + toString(averages[i]) + "\n"); }
- }
+ vector<double> averages = m->getAverages(dists);
//find standard deviation
- vector<double> stdDev; //stdDev.resize(numComp, 0.0);
- for (int i = 0; i < numComp; i++) { stdDev.push_back(0.0); }
-
- for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
- for (int j = 0; j < dists[thisIter].size(); j++) {
- stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
- }
- }
- for (int i = 0; i < stdDev.size(); i++) {
- stdDev[i] /= (float) subsampleIters;
- stdDev[i] = sqrt(stdDev[i]);
- if (m->debug) { m->mothurOut("[DEBUG]: i = " + toString(i) + ", stdDev[i] = " + toString(stdDev[i]) + "\n"); }
- }
+ vector<double> stdDev = m->getStandardDeviation(dists, averages);
//make matrix with scores in it
vector< vector<double> > avedists; //avedists.resize(m->getNumGroups());
//we need to find the average distance and standard deviation for each groups distance
//finds sum
- vector<double> averages; averages.resize(numComp, 0);
- for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
- for (int i = 0; i < dists[thisIter].size(); i++) {
- averages[i] += dists[thisIter][i];
- }
- }
-
- //finds average.
- for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
+ vector<double> averages = m->getAverages(dists);
//find standard deviation
- vector<double> stdDev; stdDev.resize(numComp, 0);
-
- for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
- for (int j = 0; j < dists[thisIter].size(); j++) {
- stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
- }
- }
- for (int i = 0; i < stdDev.size(); i++) {
- stdDev[i] /= (float) subsampleIters;
- stdDev[i] = sqrt(stdDev[i]);
- }
+ vector<double> stdDev = m->getStandardDeviation(dists, averages);
//make matrix with scores in it
- vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
+ vector< vector<double> > avedists; //avedists.resize(m->getNumGroups());
for (int i = 0; i < m->getNumGroups(); i++) {
- avedists[i].resize(m->getNumGroups(), 0.0);
+ vector<double> temp;
+ for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+ avedists.push_back(temp);
}
//make matrix with scores in it
- vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
+ vector< vector<double> > stddists; //stddists.resize(m->getNumGroups());
for (int i = 0; i < m->getNumGroups(); i++) {
- stddists[i].resize(m->getNumGroups(), 0.0);
+ vector<double> temp;
+ for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+ //stddists[i].resize(m->getNumGroups(), 0.0);
+ stddists.push_back(temp);
}
+
//flip it so you can print it
int count = 0;
//in essence you want to run it like a single
if (vCalcs[i]->getName() == "sharedsobs") {
singleCalc = new Sobs();
- if (sharedOtus) {
+ if (sharedOtus && (labels.size() != 0)) {
string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
outputNames.push_back(filenameShared);
subset.push_back(lookup[0]); subset.push_back(lookup[1]);
vector<string> labels;
vector<double> sharedab = vCalcs[i]->getValues(subset, labels);
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() << '\t' << labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.clear();
subset.push_back(lookup[0]); subset.push_back(lookup[2]);
vector<double> sharedac = vCalcs[i]->getValues(subset, labels);
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.clear();
subset.push_back(lookup[1]); subset.push_back(lookup[2]);
vector<double> sharedbc = vCalcs[i]->getValues(subset, labels);
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.clear();
subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
vector<double> sharedabc = vCalcs[i]->getValues(subset, labels);
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[0]); subset.push_back(lookup[1]);
data = vCalcs[i]->getValues(subset, labels);
sharedAB = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[0]); subset.push_back(lookup[2]);
data = vCalcs[i]->getValues(subset, labels);
sharedAC = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[0]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset, labels);
sharedAD = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[1]); subset.push_back(lookup[2]);
data = vCalcs[i]->getValues(subset, labels);
sharedBC = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[1]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset, labels);
sharedBD = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[1]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[2]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset, labels);
sharedCD = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[2]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
data = vCalcs[i]->getValues(subset, labels);
sharedABC = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup()+ "-" + lookup[2]->getGroup()<< '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[0]); subset.push_back(lookup[2]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset, labels);
sharedACD = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
subset.push_back(lookup[1]); subset.push_back(lookup[2]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset,labels);
sharedBCD = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
}
- if (labels.size() != 0) { outShared << labels[labels.size()-1]; }
+ outShared << labels[labels.size()-1];
outShared << endl;
}
//cout << "num bcd = " << sharedBCD << endl;
subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset, labels);
sharedABD = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";
}
- if (labels.size() != 0) { outShared << labels[labels.size()-1]; }
+ outShared << labels[labels.size()-1];
outShared << endl;
}
//cout << "num abd = " << sharedABD << endl;
//get estimate for all four
data = vCalcs[i]->getValues(lookup, labels);
sharedABCD = data[0];
- if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+ if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") && (labels.size() != 0)) {
outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
for (int k = 0; k < labels.size()-1; k++) {
outShared << labels[k] << ",";