A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741744A175CD9B1007DF49B /* makelefsecommand.cpp */; };
A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
+ A747EC71181EA0F900345732 /* sracommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A747EC70181EA0F900345732 /* sracommand.cpp */; };
A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; };
A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */; };
A741744B175CD9B1007DF49B /* makelefsecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makelefsecommand.h; sourceTree = "<group>"; };
A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequencecountparser.cpp; sourceTree = "<group>"; };
A741FAD415D168A00067BCC5 /* sequencecountparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequencecountparser.h; sourceTree = "<group>"; };
+ A747EC6F181EA0E500345732 /* sracommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sracommand.h; sourceTree = "<group>"; };
+ A747EC70181EA0F900345732 /* sracommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sracommand.cpp; sourceTree = "<group>"; };
A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kruskalwalliscommand.cpp; sourceTree = "<group>"; };
A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kruskalwalliscommand.h; sourceTree = "<group>"; };
A74A9A9D148E881E00AB5E3E /* spline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spline.h; sourceTree = "<group>"; };
A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */,
A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */,
A7E9B84112D37EC400DA6239 /* splitgroupscommand.cpp */,
+ A747EC6F181EA0E500345732 /* sracommand.h */,
+ A747EC70181EA0F900345732 /* sracommand.cpp */,
A7E9B85012D37EC400DA6239 /* subsamplecommand.h */,
A7E9B84F12D37EC400DA6239 /* subsamplecommand.cpp */,
A7E9B85812D37EC400DA6239 /* summarycommand.h */,
A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */,
A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */,
A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */,
+ A747EC71181EA0F900345732 /* sracommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
"DSTROOT[sdk=*]" = TARGET_BUILD_DIR;
GCC_DYNAMIC_NO_PIC = NO;
GCC_MODEL_TUNING = G5;
- GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_OPTIMIZATION_LEVEL = 0;
"INSTALL_PATH[sdk=*]" = TARGET_BUILD_DIR;
PRODUCT_NAME = mothur;
SDKROOT = macosx;
DEPLOYMENT_LOCATION = YES;
DSTROOT = TARGET_BUILD_DIR;
GCC_MODEL_TUNING = G5;
- GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_OPTIMIZATION_LEVEL = 0;
GCC_WARN_UNUSED_VALUE = YES;
PRODUCT_NAME = mothur;
SDKROOT = macosx;
GCC_ENABLE_SSE3_EXTENSIONS = NO;
GCC_ENABLE_SSE41_EXTENSIONS = NO;
GCC_ENABLE_SSE42_EXTENSIONS = NO;
- GCC_OPTIMIZATION_LEVEL = s;
+ GCC_OPTIMIZATION_LEVEL = 3;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"../../release\\\"\"",
- "VERSION=\"\\\"1.31.0\\\"\"",
- "RELEASE_DATE=\"\\\"5/24/2013\\\"\"",
+ "VERSION=\"\\\"1.32.0\\\"\"",
+ "RELEASE_DATE=\"\\\"10/31/2013\\\"\"",
);
GCC_VERSION = "";
"GCC_VERSION[arch=*]" = "";
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
GCC_MODEL_TUNING = "";
- GCC_OPTIMIZATION_LEVEL = s;
+ GCC_OPTIMIZATION_LEVEL = 3;
GCC_PREPROCESSOR_DEFINITIONS = (
- "VERSION=\"\\\"1.32.0\\\"\"",
- "RELEASE_DATE=\"\\\"10/01/2013\\\"\"",
+ "VERSION=\"\\\"1.32.1\\\"\"",
+ "RELEASE_DATE=\"\\\"10/16/2013\\\"\"",
);
GCC_VERSION = "";
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
//for each bin in the list vector
+ vector<string> binLabels = list->getLabels();
for (int i = 0; i < list->size(); i++) {
if (m->control_pressed) { return 1; }
}
if (groups.size() != 0) { groupInfo += groups[groups.size()-1]; }
else { groupInfo = "not found"; }
- name = name + "\t" + groupInfo + "\t" + toString(i+1)+ "\tNumRep=" + toString(ct.getNumSeqs(name));
+ name = name + "\t" + groupInfo + "\t" + binLabels[i] + "\tNumRep=" + toString(ct.getNumSeqs(name));
out << ">" << name << endl;
out << sequence << endl;
}else {
- name = name + "\t" + toString(i+1) + "\tNumRep=" + toString(ct.getNumSeqs(name));
+ name = name + "\t" + binLabels[i] + "\tNumRep=" + toString(ct.getNumSeqs(name));
out << ">" << name << endl;
out << sequence << endl;
}
if (sequence != "not found") {
//if you don't have groups
if (groupfile == "") {
- name = name + "\t" + toString(i+1);
+ name = name + "\t" + binLabels[i];
out << ">" << name << endl;
out << sequence << endl;
}else {//if you do have groups
m->mothurOut(name + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
return 1;
}else{
- name = name + "\t" + group + "\t" + toString(i+1);
+ name = name + "\t" + group + "\t" + binLabels[i];
out << ">" << name << endl;
out << sequence << endl;
}
if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+ if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
//look for uchime exe
//for each bin in the list vector
string snumBins = toString(processList->getNumBins());
+ vector<string> binLabels = processList->getLabels();
for (int i = 0; i < processList->getNumBins(); i++) {
if (m->control_pressed) { break; }
names = findConsensusTaxonomy(thisNames, size, conTax);
if (m->control_pressed) { break; }
-
- //output to new names file
- string binLabel = "Otu";
- string sbinNumber = toString(i+1);
- if (sbinNumber.length() < snumBins.length()) {
- int diff = snumBins.length() - sbinNumber.length();
- for (int h = 0; h < diff; h++) { binLabel += "0"; }
- }
- binLabel += sbinNumber;
- out << binLabel << '\t' << size << '\t' << conTax << endl;
+ out << binLabels[i] << '\t' << size << '\t' << conTax << endl;
string noConfidenceConTax = conTax;
m->removeConfidences(noConfidenceConTax);
if (m->control_pressed) { break; }
- //output to new names file
- string binLabel = "Otu";
- string sbinNumber = toString(i+1);
- if (sbinNumber.length() < snumBins.length()) {
- int diff = snumBins.length() - sbinNumber.length();
- for (int h = 0; h < diff; h++) { binLabel += "0"; }
- }
- binLabel += sbinNumber;
- (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl;
+ (*outs[groupIndex[itParsed->first]]) << binLabels[i] << '\t' << size << '\t' << conTax << endl;
string noConfidenceConTax = conTax;
m->removeConfidences(noConfidenceConTax);
//make classify
Classify* myclassify;
string outputMethodTag = pDataArray->method + ".";
- if(pDataArray->method == "bayesian"){ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts); }
+ if(pDataArray->method == "wang"){ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts); }
else if(pDataArray->method == "knn"){ myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID); }
else if(pDataArray->method == "zap"){
outputMethodTag = pDataArray->search + "_" + outputMethodTag;
else { myclassify = new AlignTree(pDataArray->templateFileName, pDataArray->taxonomyFileName, pDataArray->cutoff); }
}
else {
- pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
+ pDataArray->m->mothurOut(pDataArray->method + " is not a valid method option. I will run the command using wang.");
pDataArray->m->mothurOutEndLine();
myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);
}
}
m->openOutputFile(listFileName, listFile);
outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
-
+ list->printHeaders(listFile);
time_t estart = time(NULL);
float previousDist = 0.00000;
}
m->openOutputFile(listFileName, listFile);
outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
+ list->printHeaders(listFile);
float previousDist = 0.00000;
float rndPreviousDist = 0.00000;
m->openOutputFile(listFileName, outList);
outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
-
map<float, int>::iterator itLabel;
+
+ //clears out junk for autocompleting of list files above. Perhaps there is a beter way to handle this from within the data structure?
+ m->printedListHeaders = false;
//for each label needed
for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
rabund->print(outRabund);
}
//outList << endl;
+ if (!m->printedListHeaders) {
+ m->listBinLabelsInFile.clear(); completeList.printHeaders(outList); }
completeList.print(outList);
if (rabund != NULL) { delete rabund; }
helpString += "The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
- helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
+ helpString += "Note: No spaces between parameter labels (i.e. shared), '=' and parameters (i.e.yourSharedfile).\n";
return helpString;
}
catch(exception& e) {
#include "makelefsecommand.h"
#include "lefsecommand.h"
#include "kruskalwalliscommand.h"
+#include "sracommand.h"
/*******************************************************/
commands["make.lefse"] = "make.lefse";
commands["lefse"] = "lefse";
commands["kruskal.wallis"] = "kruskal.wallis";
+ commands["sra"] = "sra";
}
else { optionString += "inputdir=" + inputDir; }
}
- if(commandName == "cluster") { command = new ClusterCommand(optionString); }
+ if(commandName == "cluster") { command = new ClusterCommand(optionString); }
else if(commandName == "unique.seqs") { command = new DeconvoluteCommand(optionString); }
else if(commandName == "parsimony") { command = new ParsimonyCommand(optionString); }
else if(commandName == "help") { command = new HelpCommand(optionString); }
else if(commandName == "make.contigs") { command = new MakeContigsCommand(optionString); }
else if(commandName == "load.logfile") { command = new LoadLogfileCommand(optionString); }
else if(commandName == "sff.multiple") { command = new SffMultipleCommand(optionString); }
- else if(commandName == "classify.rf") { command = new ClassifyRFSharedCommand(optionString); }
+ else if(commandName == "classify.rf") { command = new ClassifyRFSharedCommand(optionString); }
else if(commandName == "filter.shared") { command = new FilterSharedCommand(optionString); }
else if(commandName == "primer.design") { command = new PrimerDesignCommand(optionString); }
else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); }
else if(commandName == "make.lefse") { command = new MakeLefseCommand(optionString); }
else if(commandName == "lefse") { command = new LefseCommand(optionString); }
else if(commandName == "kruskal.wallis") { command = new KruskalWallisCommand(optionString); }
+ else if(commandName == "sra") { command = new SRACommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "make.lefse") { pipecommand = new MakeLefseCommand(optionString); }
else if(commandName == "lefse") { pipecommand = new LefseCommand(optionString); }
else if(commandName == "kruskal.wallis") { pipecommand = new KruskalWallisCommand(optionString); }
+ else if(commandName == "sra") { pipecommand = new SRACommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "make.lefse") { shellcommand = new MakeLefseCommand(); }
else if(commandName == "lefse") { shellcommand = new LefseCommand(); }
else if(commandName == "kruskal.wallis") { shellcommand = new KruskalWallisCommand(); }
+ else if(commandName == "sra") { shellcommand = new SRACommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
bool isValidCommand(string, string);\r
void printCommands(ostream&);\r
void printCommandsCategories(ostream&);\r
- void setOutputDirectory(string o) { outputDir = o; m->setOutputDir(o); }\r
- void setInputDirectory(string i) { inputDir = i; }\r
+ void setOutputDirectory(string o) { if(m->dirCheck(o) || (o == "")) { outputDir = o; m->setOutputDir(o); } }\r
+ void setInputDirectory(string i) { if(m->dirCheck(i) || (i == "")) { inputDir = i; } }\r
void setLogfileName(string n, bool a) { logFileName = n; append = a; }\r
string getLogfileName() { return logFileName; }\r
bool getAppend() { return append; }\r
optionString = input.substr((openParen+1), (closeParen-openParen-1)); //optionString contains everything between "(" and ")".
}
else if (openParen == -1) { m->mothurOut("[ERROR]: You are missing ("); m->mothurOutEndLine(); }
- else if (closeParen == -1) { m->mothurOut("[ERROR]:You are missing )"); m->mothurOutEndLine(); }
+ else if (closeParen == -1) { m->mothurOut("[ERROR]: You are missing )"); m->mothurOutEndLine(); }
}
catch(exception& e) {
m->errorOut(e, "CommandOptionParser", "CommandOptionParser");
outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
string snumBins = toString(list->getNumBins());
+ vector<string> binLabels = list->getLabels();
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
string bin = list->get(i);
string consSeq = getConsSeq(bin, outSummary, i);
-
- string seqName = "Otu";
- string sbinNumber = toString(i+1);
- if (sbinNumber.length() < snumBins.length()) {
- int diff = snumBins.length() - sbinNumber.length();
- for (int h = 0; h < diff; h++) { seqName += "0"; }
- }
- seqName += sbinNumber;
- outFasta << ">" << seqName << endl << consSeq << endl;
- outName << seqName << '\t' << seqName << "," << bin << endl;
+ outFasta << ">" << binLabels[i] << endl << consSeq << endl;
+ outName << binLabels[i] << '\t' << binLabels[i] << "," << bin << endl;
}
outSummary.close(); outName.close(); outFasta.close();
~CooccurrenceCommand(){}
vector<string> setParameters();
- string getCommandName() { return "Cooccurrence"; }
+ string getCommandName() { return "cooccurrence"; }
string getCommandCategory() { return "Hypothesis Testing"; }
string getHelpString();
//for each otu
for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
- if (metadatafile == "") { out << m->currentBinLabels[i]; }
+ if (metadatafile == "") { out << m->currentSharedBinLabels[i]; }
else { out << metadataLabels[i]; }
//find the averages this otu - Y
//for each otu
for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
- if (metadatafile == "") { out << m->currentBinLabels[i]; }
+ if (metadatafile == "") { out << m->currentSharedBinLabels[i]; }
else { out << metadataLabels[i]; }
//find the ranks of this otu - Y
//for each otu
for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
- if (metadatafile == "") { out << m->currentBinLabels[i]; }
+ if (metadatafile == "") { out << m->currentSharedBinLabels[i]; }
else { out << metadataLabels[i]; }
//find the ranks of this otu - Y
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
newBinLabels.push_back(binLabel);
}
for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
thislookup = newLookup;
- m->currentBinLabels = newBinLabels;
+ m->currentSharedBinLabels = newBinLabels;
return 0;
}
}
in.close();
- out.close();
+
if (rest != "") {
vector<string> pieces = m->splitWhiteSpace(rest);
}
}
+ out.close();
return indexToNames;
}
}
}
in.close();
- out.close();
+
if (rest != "") {
vector<string> pieces = m->splitWhiteSpace(rest);
}
}
}
+ out.close();
for (it = groupIndex.begin(); it != groupIndex.end(); it++) { indexToGroups[it->second] = it->first; }
header += "repSeqName\trepSeq\tOTUConTaxonomy";
out << header << endl;
+ vector<string> binLabels = list->getLabels();
for (int i = 0; i < list->getNumBins(); i++) {
+ int index = findIndex(otuLabels, binLabels[i]);
+ if (index == -1) { m->mothurOut("[ERROR]: " + binLabels[i] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
+
if (m->control_pressed) { break; }
- out << otuLabels[i] << '\t';
+ out << otuLabels[index] << '\t';
vector<string> binNames;
string bin = list->get(i);
map<string, string>::iterator it = repNames.find(bin);
if (it == repNames.end()) {
- m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
}else { seqRepName = it->second; numSeqsRep = binNames.size(); }
//sanity check
- if (binNames.size() != classifyOtuSizes[i]) {
- m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ if (binNames.size() != classifyOtuSizes[index]) {
+ m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
}
}else {
//find rep sequence in bin
}
if (seqRepName == "") {
- m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
}
if (numSeqsRep != classifyOtuSizes[i]) {
- m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
}
}
}else { out << numSeqsRep << '\t'; }
//output repSeq
- out << seqRepName << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+ out << seqRepName << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl;
}
if (m->control_pressed) { break; }
- int index = findIndex(otuLabels, m->currentBinLabels[h]);
- if (index == -1) { m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
+ int index = findIndex(otuLabels, m->currentSharedBinLabels[h]);
+ if (index == -1) { m->mothurOut("[ERROR]: " + m->currentSharedBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
if (m->control_pressed) { break; }
//sanity check
if (totalAbund != classifyOtuSizes[index]) {
- m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break;
+ m->mothurOut("[WARNING]: OTU " + m->currentSharedBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break;
}
//output repSeq
public:
DataVector(){ m = MothurOut::getInstance(); }// : maxRank(0), numBins(0), numSeqs(0){};
- DataVector(string l) : label(l) {};
- DataVector(const DataVector& dv) : label(dv.label){};//, maxRank(dv.maxRank), numBins(dv.numBins), numSeqs(dv.numSeqs) {};
- DataVector(ifstream&);
- DataVector(ifstream&, GroupMap*);
+ DataVector(string l) : label(l) { m = MothurOut::getInstance();};
+ DataVector(const DataVector& dv) : label(dv.label){ m = MothurOut::getInstance();};//, maxRank(dv.maxRank), numBins(dv.numBins), numSeqs(dv.numSeqs) {};
+ DataVector(ifstream&) {m = MothurOut::getInstance();}
+ DataVector(ifstream&, GroupMap*){m = MothurOut::getInstance();}
virtual ~DataVector(){};
// virtual int getNumBins() { return numBins; }
#include "deuniqueseqscommand.h"
#include "sequence.hpp"
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> DeUniqueSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "namecount", "none","name",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "namecount", "none","group",false,false,true); parameters.push_back(pcount);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
string DeUniqueSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The deunique.seqs command reads a fastafile and namefile, and creates a fastafile containing all the sequences.\n";
- helpString += "The deunique.seqs command parameters are fasta and name, both are required, unless you have valid current name and fasta files.\n";
+ helpString += "The deunique.seqs command reads a fastafile and namefile or countfile, and creates a fastafile containing all the sequences. It you provide a count file with group information a group file is also created.\n";
+ helpString += "The deunique.seqs command parameters are fasta, name and count. Fasta is required and you must provide either a name or count file.\n";
helpString += "The deunique.seqs command should be in the following format: \n";
helpString += "deunique.seqs(fasta=yourFastaFile, name=yourNameFile) \n";
helpString += "Example deunique.seqs(fasta=abrecovery.unique.fasta, name=abrecovery.names).\n";
try {
string pattern = "";
- if (type == "fasta") { pattern = "[filename],redundant.fasta"; }
+ if (type == "fasta") { pattern = "[filename],redundant.fasta"; }
+ else if (type == "group") { pattern = "[filename],redundant.groups"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
setParameters();
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "DeUniqueSeqsCommand", "DeconvoluteCommand");
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
outputDir = "";
- outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it
}
nameFile = validParameter.validFile(parameters, "name", true);
if (nameFile == "not open") { abort = true; }
- else if (nameFile == "not found"){
- nameFile = m->getNameFile();
+ else if (nameFile == "not found"){ nameFile = ""; }
+ else { m->setNameFile(nameFile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (nameFile != "")) { m->mothurOut("When executing a deunique.seqs command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
+
+ if ((countfile == "") && (nameFile == "")) { //look for currents
+ nameFile = m->getNameFile();
if (nameFile != "") { m->mothurOut("Using " + nameFile + " as input file for the name parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }
- }else { m->setNameFile(nameFile); }
+ else {
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("[ERROR]: You have no current name or count files one is required."); m->mothurOutEndLine(); abort = true; }
+ }
+ }
+
}
}
//prepare filenames and open files
ofstream out;
- string outFastaFile = m->getRootName(m->getSimpleName(fastaFile));
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(fastaFile); }
+ string outFastaFile = thisOutputDir + m->getRootName(m->getSimpleName(fastaFile));
int pos = outFastaFile.find("unique");
if (pos != string::npos) { outFastaFile = outputDir + outFastaFile.substr(0, pos); }
else { outFastaFile = outputDir + outFastaFile; }
outFastaFile = getOutputFileName("fasta", variables);
m->openOutputFile(outFastaFile, out);
- readNamesFile();
- if (m->control_pressed) { out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; }
+ map<string, string> nameMap;
+ CountTable ct;
+ ofstream outGroup;
+ string outGroupFile;
+ vector<string> groups;
+ if (nameFile != "") { m->readNames(nameFile, nameMap); }
+ else {
+ ct.readTable(countfile, true, false);
+
+ if (ct.hasGroupInfo()) {
+ thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
+ outGroupFile = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+ variables["[filename]"] = outGroupFile;
+ outGroupFile = getOutputFileName("group", variables);
+ m->openOutputFile(outGroupFile, outGroup);
+ groups = ct.getNamesOfGroups();
+ }
+ }
+
+ if (m->control_pressed) { out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; }
ifstream in;
m->openInputFile(fastaFile, in);
while (!in.eof()) {
- if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; }
+ if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; }
Sequence seq(in); m->gobble(in);
if (seq.getName() != "") {
- //look for sequence name in nameMap
- map<string, string>::iterator it = nameMap.find(seq.getName());
-
- if (it == nameMap.end()) { m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
- else {
- vector<string> names;
- m->splitAtComma(it->second, names);
-
- //output sequences
- for (int i = 0; i < names.size(); i++) {
- out << ">" << names[i] << endl;
- out << seq.getAligned() << endl;
- }
-
- //remove seq from name map so we can check for seqs in namefile not in fastafile later
- nameMap.erase(it);
- }
+ if (nameFile != "") {
+ //look for sequence name in nameMap
+ map<string, string>::iterator it = nameMap.find(seq.getName());
+
+ if (it == nameMap.end()) { m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ vector<string> names;
+ m->splitAtComma(it->second, names);
+
+ //output sequences
+ for (int i = 0; i < names.size(); i++) {
+ out << ">" << names[i] << endl;
+ out << seq.getAligned() << endl;
+ }
+
+ //remove seq from name map so we can check for seqs in namefile not in fastafile later
+ nameMap.erase(it);
+ }
+ }else {
+ if (ct.hasGroupInfo()) {
+ vector<int> groupCounts = ct.getGroupCounts(seq.getName());
+ int count = 1;
+ for (int i = 0; i < groups.size(); i++) {
+ for (int j = 0; j < groupCounts[i]; j++) {
+ outGroup << seq.getName()+"_"+toString(count) << '\t' << groups[i] << endl; count++;
+ }
+ }
+
+ }
+
+ int numReps = ct.getNumSeqs(seq.getName()); //will report error and set m->control_pressed if not found
+ for (int i = 0; i < numReps; i++) {
+ out << ">" << seq.getName()+"_"+toString(i+1) << endl;
+ out << seq.getAligned() << endl;
+ }
+ }
}
}
in.close();
- out.close();
+ out.close();
+ if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); } }
- if (nameMap.size() != 0) { //then there are names in the namefile not in the fastafile
- for (map<string, string>::iterator it = nameMap.begin(); it != nameMap.end(); it++) {
- m->mothurOut(it->first + " is not in your fasta file, but is in your name file. Please correct."); m->mothurOutEndLine();
- }
- }
-
- if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; }
+
+ if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { m->mothurRemove(outGroupFile); } }return 0; }
+ outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
+ if (countfile != "") { if (ct.hasGroupInfo()) { outputNames.push_back(outGroupFile); outputTypes["group"].push_back(outGroupFile); } }
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(outFastaFile); m->mothurOutEndLine();
- outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
- m->mothurOutEndLine();
+ for(int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
//set fasta file as new current fastafile
string current = "";
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
}
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "DeUniqueSeqsCommand", "execute");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int DeUniqueSeqsCommand::readNamesFile() {
- try {
-
- ifstream inNames;
- m->openInputFile(nameFile, inNames);
-
- string name, names;
- map<string, string>::iterator it;
-
- while(inNames){
-
- if(m->control_pressed) { break; }
-
- inNames >> name; m->gobble(inNames);
- inNames >> names;
-
- it = nameMap.find(name);
-
- if (it == nameMap.end()) { nameMap[name] = names; }
- else { m->mothurOut("[ERROR]: Your namefile already contains " + name + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
-
- m->gobble(inNames);
+
+ itTypes = outputTypes.find("group");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
}
- inNames.close();
+
return 0;
-
}
catch(exception& e) {
- m->errorOut(e, "DeUniqueSeqsCommand", "readNamesFile");
+ m->errorOut(e, "DeUniqueSeqsCommand", "execute");
exit(1);
}
}
-
/**************************************************************************************/
private:
- string fastaFile, nameFile, outputDir;
+ string fastaFile, nameFile, outputDir, countfile;
vector<string> outputNames;
bool abort;
-
- map<string, string> nameMap;
-
- int readNamesFile();
+
};
mout->clearAllGroups();
mout->Treenames.clear();
mout->saveNextLabel = "";
- mout->printedHeaders = false;
- mout->commandInputsConvertError = false;
- mout->currentBinLabels.clear();
- mout->binLabelsInFile.clear();
+ mout->commandInputsConvertError = false;
+ mout->printedSharedHeaders = false;
+ mout->currentSharedBinLabels.clear();
+ mout->sharedBinLabelsInFile.clear();
+ mout->printedListHeaders = false;
+ mout->listBinLabelsInFile.clear();
Command* command = cFactory->getCommand(commandName, options);
if (mout->commandInputsConvertError) { quitCommandCalled = 2; }
mout->clearAllGroups();
mout->Treenames.clear();
mout->saveNextLabel = "";
- mout->printedHeaders = false;
mout->commandInputsConvertError = false;
- mout->currentBinLabels.clear();
- mout->binLabelsInFile.clear();
+ mout->printedSharedHeaders = false;
+ mout->currentSharedBinLabels.clear();
+ mout->sharedBinLabelsInFile.clear();
+ mout->printedListHeaders = false;
+ mout->listBinLabelsInFile.clear();
Command* command = cFactory->getCommand(commandName, options);
mout->clearAllGroups();
mout->Treenames.clear();
mout->saveNextLabel = "";
- mout->printedHeaders = false;
- mout->commandInputsConvertError = false;
- mout->currentBinLabels.clear();
- mout->binLabelsInFile.clear();
+ mout->commandInputsConvertError = false;
+ mout->printedSharedHeaders = false;
+ mout->currentSharedBinLabels.clear();
+ mout->sharedBinLabelsInFile.clear();
+ mout->printedListHeaders = false;
+ mout->listBinLabelsInFile.clear();
Command* command = cFactory->getCommand(commandName, options);
if (mout->commandInputsConvertError) { quitCommandCalled = 2; }
try {
//save mothurOut's binLabels to restore for next label
- vector<string> saveBinLabels = m->currentBinLabels;
+ vector<string> saveBinLabels = m->currentSharedBinLabels;
map<string, string> variables;
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
m->openOutputFile(outputFileName, out);
outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
- m->currentBinLabels = filteredLabels;
+ m->currentSharedBinLabels = filteredLabels;
filteredLookup[0]->printHeaders(out);
//save mothurOut's binLabels to restore for next label
- m->currentBinLabels = saveBinLabels;
+ m->currentSharedBinLabels = saveBinLabels;
for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; }
for (int k = 0; k < counts[j]; k++) { table[j][k]++; }
if ((abund == -1) && (samples != -1)) { //we want all OTUs with this number of samples
- if (counts[j] >= samples) { otuNames[j].push_back(m->currentBinLabels[i]); }
+ if (counts[j] >= samples) { otuNames[j].push_back(m->currentSharedBinLabels[i]); }
}else if ((abund != -1) && (samples == -1)) { //we want all OTUs with this relabund
if (j == abund) {
- for (int k = 0; k < counts[j]; k++) { otuNames[k+1].push_back(m->currentBinLabels[i]); }
+ for (int k = 0; k < counts[j]; k++) { otuNames[k+1].push_back(m->currentSharedBinLabels[i]); }
}
}else if ((abund != -1) && (samples != -1)) { //we want only OTUs with this relabund for this number of samples
if ((j == abund) && (counts[j] >= samples)) {
- otuNames[j].push_back(m->currentBinLabels[i]);
+ otuNames[j].push_back(m->currentSharedBinLabels[i]);
}
}
}
else if (type == "name") { pattern = "[filename],pick,[extension]"; }
else if (type == "group") { pattern = "[filename],pick,[extension]"; }
else if (type == "count") { pattern = "[filename],pick,[extension]"; }
- else if (type == "list") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "list") { pattern = "[filename],[tag],pick,[extension]"; }
else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; }
else if (type == "design") { pattern = "[filename],pick,[extension]"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
map<string, string> variables;
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
variables["[extension]"] = m->getExtension(listfile);
- string outputFileName = getOutputFileName("list", variables);
-
- ofstream out;
- m->openOutputFile(outputFileName, out);
ifstream in;
m->openInputFile(listfile, in);
while(!in.eof()){
selectedCount = 0;
-
+
//read in list vector
ListVector list(in);
+
+ variables["[tag]"] = list.getLabel();
+ string outputFileName = getOutputFileName("list", variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ vector<string> binLabels = list.getLabels();
+ vector<string> newBinLabels;
//make a new list vector
ListVector newList;
//parse out names that are in accnos file
string binnames = list.get(i);
+ vector<string> thisBinNames;
+ m->splitAtComma(binnames, thisBinNames);
string newNames = "";
- while (binnames.find_first_of(',') != -1) {
- string name = binnames.substr(0,binnames.find_first_of(','));
- binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-
- //if that name is in the .accnos file, add it
+ for (int j = 0; j < thisBinNames.size(); j++) {
+ string name = thisBinNames[j];
+
+ //if that name is in the .accnos file, add it
if (names.count(name) != 0) { newNames += name + ","; selectedCount++; }
else{
//if you are not in the accnos file check if you are a name that needs to be changed
selectedCount++;
}
}
- }
-
- //get last name
- if (names.count(binnames) != 0) { newNames += binnames + ","; selectedCount++; }
- else{
- //if you are not in the accnos file check if you are a name that needs to be changed
- map<string, string>::iterator it = uniqueToRedundant.find(binnames);
- if (it != uniqueToRedundant.end()) {
- newNames += it->second + ",";
- selectedCount++;
- }
- }
-
+ }
+
//if there are names in this bin add to new list
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
- newList.push_back(newNames);
+ newList.push_back(newNames);
+ newBinLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
newList.print(out);
}
m->gobble(in);
+ out.close();
}
- in.close();
- out.close();
+ in.close();
if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
- outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
else if (type == "name") { pattern = "[filename],pick,[extension]"; }
else if (type == "group") { pattern = "[filename],pick,[extension]"; }
else if (type == "count") { pattern = "[filename],pick,[extension]"; }
- else if (type == "list") { pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]"; }
+ else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; }
else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; }
else if (type == "alignreport") { pattern = "[filename],pick.align.report"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
map<string, string> variables;
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
variables["[extension]"] = m->getExtension(listfile);
- string outputFileName = getOutputFileName("list", variables);
- ofstream out;
- m->openOutputFile(outputFileName, out);
-
+
ifstream in;
m->openInputFile(listfile, in);
bool wroteSomething = false;
while(!in.eof()){
-
- if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
//read in list vector
ListVector list(in);
//make a new list vector
ListVector newList;
newList.setLabel(list.getLabel());
+
+ variables["[distance]"] = list.getLabel();
+ string outputFileName = getOutputFileName("list", variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ if (m->control_pressed) { in.close(); out.close(); return 0; }
+
+ vector<string> binLabels = list.getLabels();
+ vector<string> newBinLabels;
//for each bin
for (int i = 0; i < list.getNumBins(); i++) {
//if there are names in this bin add to new list
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
- newList.push_back(newNames);
+ newList.push_back(newNames);
+ newBinLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
newList.print(out);
}
m->gobble(in);
+ out.close();
}
in.close();
- out.close();
+
if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
- outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
return 0;
bool wroteSomething = false;
string snumBins = toString(list->getNumBins());
+ vector<string> binLabels = list->getLabels();
+ vector<string> newBinLabels;
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { delete list; return 0;}
if (names.count(m->getSimpleLabel(otuLabel)) != 0) {
selectedCount++;
newList.push_back(list->get(i));
+ newBinLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
newList.print(out);
}
out.close();
if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
//is this otu on the list
- if (names.count(m->getSimpleLabel(m->currentBinLabels[i])) != 0) {
+ if (names.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) != 0) {
numSelected++; wroteSomething = true;
- newLabels.push_back(m->currentBinLabels[i]);
+ newLabels.push_back(m->currentSharedBinLabels[i]);
for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
}
for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
- m->currentBinLabels = newLabels;
+ m->currentSharedBinLabels = newLabels;
newLookup[0]->printHeaders(out);
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
//for each bin in the list vector
+ vector<string> binLabels = list->getLabels();
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { break; }
binnames = list->get(i);
if (sort == "otu") {
- out << i+1 << '\t' << binnames << endl;
+ out << binLabels[i] << '\t' << binnames << endl;
}else{ //sort = name
vector<string> names;
m->splitAtComma(binnames, names);
for (int j = 0; j < names.size(); j++) {
- out << names[j] << '\t' << i+1 << endl;
+ out << names[j] << '\t' << binLabels[i] << endl;
}
}
}
outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
findQ.printZMatrix(matrixName, thisGroups);
- findQ.printRelAbund(relabund, m->currentBinLabels);
+ findQ.printRelAbund(relabund, m->currentSharedBinLabels);
if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
pDataArray->outputNames.push_back(pDataArray->matrix[i]);
findQ->printZMatrix(pDataArray->matrix[i], pDataArray->m->getGroups());
- findQ->printRelAbund(pDataArray->relabunds[i], pDataArray->m->currentBinLabels);
+ findQ->printRelAbund(pDataArray->relabunds[i], pDataArray->m->currentSharedBinLabels);
if(pDataArray->optimizegap != -1 && (numPartitions - pDataArray->minPartition) >= pDataArray->optimizegap && numPartitions >= pDataArray->minpartitions){ break; }
if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
//is this otu on the list
- if (labels.count(m->getSimpleLabel(m->currentBinLabels[i])) != 0) {
+ if (labels.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) != 0) {
numSelected++; wroteSomething = true;
- newLabels.push_back(m->currentBinLabels[i]);
+ newLabels.push_back(m->currentSharedBinLabels[i]);
for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
}
for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
- m->currentBinLabels = newLabels;
+ m->currentSharedBinLabels = newLabels;
newLookup[0]->printHeaders(out);
newList.setLabel(list->getLabel());
int selectedCount = 0;
bool wroteSomething = false;
- string snumBins = toString(list->getNumBins());
+ vector<string> binLabels = list->getLabels();
+ vector<string> newLabels;
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { delete list; return 0;}
- //create a label for this otu
- string otuLabel = "Otu";
- string sbinNumber = toString(i+1);
- if (sbinNumber.length() < snumBins.length()) {
- int diff = snumBins.length() - sbinNumber.length();
- for (int h = 0; h < diff; h++) { otuLabel += "0"; }
- }
- otuLabel += sbinNumber;
-
- if (labels.count(m->getSimpleLabel(otuLabel)) != 0) {
+ if (labels.count(m->getSimpleLabel(binLabels[i])) != 0) {
selectedCount++;
newList.push_back(list->get(i));
+ newLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newLabels);
+ newList.printHeaders(out);
newList.print(out);
}
out.close();
//********************************************************************************************************************
//sorts lowest to highest
inline bool compareBin(repStruct left, repStruct right){
- return (left.bin < right.bin);
+ return (left.simpleBin < right.simpleBin);
}
//********************************************************************************************************************
//sorts lowest to highest
}
//for each bin in the list vector
+ vector<string> binLabels = processList->getLabels();
for (int i = 0; i < processList->size(); i++) {
if (m->control_pressed) {
out.close();
if (Groups.size() == 0) {
nameRep = findRep(namesInBin, "");
- newNamesOutput << i << '\t' << nameRep << '\t';
+ newNamesOutput << binLabels[i] << '\t' << nameRep << '\t';
//put rep at first position in names line
string outputString = nameRep + ",";
nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
//output group rep and other members of this group
- (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
+ (*(filehandles[Groups[j]])) << binLabels[i] << '\t' << nameRep << '\t';
//put rep at first position in names line
string outputString = nameRep + ",";
ifstream in;
m->openInputFile(filename, in);
- int i = 0;
string tempGroup = "";
in >> tempGroup; m->gobble(in);
int thistotal = 0;
while (!in.eof()) {
- string rep, binnames;
- in >> i >> rep >> binnames; m->gobble(in);
+ string rep, binnames, binLabel;
+ in >> binLabel >> rep >> binnames; m->gobble(in);
vector<string> names;
m->splitAtComma(binnames, names);
if (sequence != "not found") {
if (sorted == "") { //print them out
- rep = rep + "\t" + toString(i+1);
+ rep = rep + "\t" + binLabel;
rep = rep + "|" + toString(binsize);
if (group != "") {
rep = rep + "|" + group;
out << ">" << rep << endl;
out << sequence << endl;
}else { //save them
- repStruct newRep(rep, i+1, binsize, group);
+ int simpleLabel;
+ m->mothurConvert(m->getSimpleLabel(binLabel), simpleLabel);
+ repStruct newRep(rep, binLabel, simpleLabel, binsize, group);
reps.push_back(newRep);
}
}else {
//print them
for (int i = 0; i < reps.size(); i++) {
string sequence = fasta->getSequence(reps[i].name);
- string outputName = reps[i].name + "\t" + toString(reps[i].bin);
+ string outputName = reps[i].name + "\t" + reps[i].bin;
outputName = outputName + "|" + toString(reps[i].size);
if (reps[i].group != "") {
outputName = outputName + "|" + reps[i].group;
ifstream in;
m->openInputFile(filename, in);
- int i = 0;
string rep, binnames;
string tempGroup = "";
while (!in.eof()) {
if (m->control_pressed) { break; }
- in >> i >> rep >> binnames; m->gobble(in);
+ string binLabel;
+ in >> binLabel >> rep >> binnames; m->gobble(in);
if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
else {
struct repStruct {
string name;
- int bin;
+ string bin;
+ int simpleBin;
int size;
string group;
repStruct(){}
- repStruct(string n, int b, int s, string g) : name(n), bin(b), size(s), group(g) {}
+ repStruct(string n, string b, int sb, int s, string g) : name(n), bin(b), size(s), group(g), simpleBin(sb) { }
~repStruct() {}
};
//**********************************************************************************************************************
int GetOtusCommand::readListGroup(){
try {
- string thisOutputDir = outputDir;
+ InputData* input = new InputData(listfile, "list");
+ ListVector* list = input->getListVector();
+ string lastLabel = list->getLabel();
+
+ //using first label seen if none is provided
+ if (label == "") { label = lastLabel; }
+
+ string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- map<string, string> variables;
+ map<string, string> variables;
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
variables["[tag]"] = label;
variables["[extension]"] = m->getExtension(listfile);
ofstream out;
m->openOutputFile(outputFileName, out);
-
- string GroupOutputDir = outputDir;
+
+ string GroupOutputDir = outputDir;
if (outputDir == "") { GroupOutputDir += m->hasPath(groupfile); }
variables["[filename]"] = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile));
variables["[extension]"] = m->getExtension(groupfile);
ofstream outGroup;
m->openOutputFile(outputGroupFileName, outGroup);
-
- InputData* input = new InputData(listfile, "list");
- ListVector* list = input->getListVector();
- string lastLabel = list->getLabel();
-
- //using first label seen if none is provided
- if (label == "") { label = lastLabel; }
+
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> labels; labels.insert(label);
int numOtus = 0;
//for each bin
+ vector<string> binLabels = list->getLabels();
+ vector<string> newBinLabels;
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { return 0; }
//if there are sequences from the groups we want in this bin add to new list, output to groupfile
if (keepBin) {
- newList.push_back(binnames);
+ newList.push_back(binnames);
+ newBinLabels.push_back(binLabels[i]);
outGroup << groupFileOutput;
numOtus++;
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
- newList.print(out);
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
+ newList.print(out);
}
m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
getRelAbundance(lookup, out);
processedLabels.insert(lookup[0]->getLabel());
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
lookup = input->getSharedRAbundVectors(lastLabel);
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
getRelAbundance(lookup, out);
processedLabels.insert(lookup[0]->getLabel());
lookup = input->getSharedRAbundVectors(lastLabel);
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
getRelAbundance(lookup, out);
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
vector<string> GetSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
+ CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
string GetSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
+ helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
- helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
+ helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport, fastq and dups. You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n";
helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
setParameters();
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
+ outputTypes["fastq"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["group"] = tempOutNames;
string pattern = "";
if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "fastq") { pattern = "[filename],pick,[extension]"; }
else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
else if (type == "name") { pattern = "[filename],pick,[extension]"; }
else if (type == "group") { pattern = "[filename],pick,[extension]"; }
else if (type == "count") { pattern = "[filename],pick,[extension]"; }
- else if (type == "list") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; }
else if (type == "qfile") { pattern = "[filename],pick,[extension]"; }
else if (type == "accnosreport") { pattern = "[filename],pick.accnos.report"; }
else if (type == "alignreport") { pattern = "[filename],pick.align.report"; }
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
+ outputTypes["fastq"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["group"] = tempOutNames;
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["count"] = inputDir + it->second; }
}
+
+ it = parameters.find("fastq");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fastq"] = inputDir + it->second; }
+ }
}
if (qualfile == "not open") { abort = true; }
else if (qualfile == "not found") { qualfile = ""; }
else { m->setQualFile(qualfile); }
+
+ fastqfile = validParameter.validFile(parameters, "fastq", true);
+ if (fastqfile == "not open") { abort = true; }
+ else if (fastqfile == "not found") { fastqfile = ""; }
accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
if (accnosfile2 == "not open") { abort = true; }
string temp = validParameter.validFile(parameters, "dups", false); if (temp == "not found") { temp = "true"; usedDups = ""; }
dups = m->isTrue(temp);
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
+ if ((fastqfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality, fastq or listfile."); m->mothurOutEndLine(); abort = true; }
if (countfile == "") {
if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
//read through the correct file and output lines you want to keep
if (namefile != "") { readName(); }
if (fastafile != "") { readFasta(); }
+ if (fastqfile != "") { readFastq(); }
if (groupfile != "") { readGroup(); }
if (countfile != "") { readCount(); }
if (alignfile != "") { readAlign(); }
exit(1);
}
}
+//**********************************************************************************************************************
+int GetSeqsCommand::readFastq(){
+ try {
+ bool wroteSomething = false;
+ int selectedCount = 0;
+
+ ifstream in;
+ m->openInputFile(fastqfile, in);
+
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(fastqfile); }
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
+ variables["[extension]"] = m->getExtension(fastqfile);
+ string outputFileName = getOutputFileName("fastq", variables);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+
+ while(!in.eof()){
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+
+ //read sequence name
+ string input = m->getline(in); m->gobble(in);
+
+ string outputString = input + "\n";
+
+ if (input[0] == '@') {
+ //get rest of lines
+ outputString += m->getline(in) + "\n"; m->gobble(in);
+ outputString += m->getline(in) + "\n"; m->gobble(in);
+ outputString += m->getline(in) + "\n"; m->gobble(in);
+
+ vector<string> splits = m->splitWhiteSpace(input);
+ string name = splits[0];
+ name = name.substr(1);
+ m->checkName(name);
+
+ if (names.count(name) != 0) {
+ wroteSomething = true;
+ selectedCount++;
+ out << outputString;
+ }
+ }
+
+ m->gobble(in);
+ }
+ in.close();
+ out.close();
+
+
+ if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["fastq"].push_back(outputFileName);
+
+ m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetSeqsCommand", "readFastq");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
int GetSeqsCommand::readFasta(){
map<string, string> variables;
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
variables["[extension]"] = m->getExtension(listfile);
- string outputFileName = getOutputFileName("list", variables);
- ofstream out;
- m->openOutputFile(outputFileName, out);
ifstream in;
m->openInputFile(listfile, in);
while(!in.eof()){
selectedCount = 0;
-
- if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
//read in list vector
ListVector list(in);
//make a new list vector
ListVector newList;
newList.setLabel(list.getLabel());
+
+ variables["[distance]"] = list.getLabel();
+ string outputFileName = getOutputFileName("list", variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ vector<string> binLabels = list.getLabels();
+ vector<string> newBinLabels;
+
+ if (m->control_pressed) { in.close(); out.close(); return 0; }
//for each bin
for (int i = 0; i < list.getNumBins(); i++) {
m->splitAtComma(binnames, bnames);
string newNames = "";
- for (int i = 0; i < bnames.size(); i++) {
- string name = bnames[i];
+ for (int j = 0; j < bnames.size(); j++) {
+ string name = bnames[j];
//if that name is in the .accnos file, add it
if (names.count(name) != 0) { newNames += name + ","; selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
}
//if there are names in this bin add to new list
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
- newList.push_back(newNames);
+ newList.push_back(newNames);
+ newBinLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
newList.print(out);
}
m->gobble(in);
+ out.close();
}
in.close();
- out.close();
+
if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
- outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
private:
set<string> names;
vector<string> outputNames;
- string accnosfile, accnosfile2, fastafile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
+ string accnosfile, accnosfile2, fastafile, fastqfile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
bool abort, dups;
map<string, string> uniqueMap;
//for debug
map<string, set<string> > sanity; //maps file type to names chosen for file. something like "fasta" -> vector<string>. If running in debug mode this is filled and we check to make sure all the files have the same names. If they don't we output the differences for the user.
int readFasta();
+ int readFastq();
int readName();
int readGroup();
int readCount();
int num = 0;
//go through each bin, find out if shared
+ vector<string> binLabels = shared->getLabels();
for (int i = 0; i < shared->getNumBins(); i++) {
if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
//find group
string seqGroup = groupMap->getGroup(name);
if (output != "accnos") {
- namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + toString(i+1)));
+ namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + binLabels[i]));
}else { namesOfSeqsInThisBin.push_back(name); }
if (seqGroup == "not found") { m->mothurOut(name + " is not in your groupfile. Please correct."); m->mothurOutEndLine(); exit(1); }
for(int j = 0; j < lookup.size(); j++) {
string seqGroup = lookup[j]->getGroup();
- string name = m->currentBinLabels[i];
+ string name = m->currentSharedBinLabels[i];
if (lookup[j]->getAbundance(i) != 0) {
if (output != "accnos") {
}else{
m->mothurOut("Error: no list vector!"); m->mothurOutEndLine(); return 0;
}
-
+
+ list->printHeaders(listFile);
+
float previousDist = 0.00000;
float rndPreviousDist = 0.00000;
oldRAbund = *rabund;
}
//sort lookup so shared bins are on top
- vector<string> sortedLabels = m->currentBinLabels;
+ vector<string> sortedLabels = m->currentSharedBinLabels;
if (sorted != "none") { sortedLabels = sortSharedVectors(lookup); }
vector<vector<string> > scaleRelAbund;
map<int, int> place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2.
map<int, int>::iterator it;
- vector<string> sortedLabels = m->currentBinLabels;
+ vector<string> sortedLabels = m->currentSharedBinLabels;
/****************** find order of otus **********************/
if (sorted == "shared") {
int newAbund = looktemp[j]->getAbundance(i); // 1 -> 3
lookup[j]->set(place[i], newAbund, looktemp[j]->getGroup()); //binNumber, abundance, group
}
- sortedLabels[place[i]] = m->currentBinLabels[i];
+ sortedLabels[place[i]] = m->currentSharedBinLabels[i];
}
//delete looktemp -- Sarah look at - this is causing segmentation faults
}
//sort lookup so shared bins are on top
- vector<string> sortedLabels = m->currentBinLabels;
+ vector<string> sortedLabels = m->currentSharedBinLabels;
if (sorted != "none") { sortedLabels = sortSharedVectors(lookup); }
vector<vector<string> > scaleRelAbund;
map<int, int> place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2.
map<int, int>::iterator it;
- vector<string> sortedLabels = m->currentBinLabels;
+ vector<string> sortedLabels = m->currentSharedBinLabels;
/****************** find order of otus **********************/
if (sorted == "shared") {
for (int j = 0; j < looktemp.size(); j++) { // 3 -> 2
float newAbund = looktemp[j]->getAbundance(i); // 1 -> 3
lookup[j]->set(place[i], newAbund, looktemp[j]->getGroup()); //binNumber, abundance, group
- sortedLabels[place[i]] = m->currentBinLabels[i];
+ sortedLabels[place[i]] = m->currentSharedBinLabels[i];
}
}
if (m->control_pressed) { out.close(); return 0; }
- out << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t';
+ out << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t';
if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; }
else { out << "<" << (1/(float)iters) << endl; }
if (pValues[j] <= 0.05) {
- cout << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t';
+ cout << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t';
string pValueString = "<" + toString((1/(float)iters));
if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];}
else { cout << "<" << (1/(float)iters); }
- m->mothurOutJustToLog(m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString);
+ m->mothurOutJustToLog(m->currentSharedBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString);
m->mothurOutEndLine();
}
}
//print headings
out << "TreeNode\t";
- for (int i = 0; i < numBins; i++) { out << m->currentBinLabels[i] << "_IndGroups" << '\t' << m->currentBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; }
+ for (int i = 0; i < numBins; i++) { out << m->currentSharedBinLabels[i] << "_IndGroups" << '\t' << m->currentSharedBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; }
out << endl;
m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicator_Groups\tIndicatorValue\tpValue\n");
}
if (pValues[j] <= 0.05) {
- cout << i+1 << '\t' << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t';
+ cout << i+1 << '\t' << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t';
string pValueString = "<" + toString((1/(float)iters));
if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];}
else { cout << "<" << (1/(float)iters); }
- m->mothurOutJustToLog(toString(i) + "\t" + m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString);
+ m->mothurOutJustToLog(toString(i) + "\t" + m->currentSharedBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString);
m->mothurOutEndLine();
}
}
double H = linear.calcKruskalWallis(values, pValue);
//output H and signifigance
- out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
+ out << m->currentSharedBinLabels[i] << '\t' << H << '\t' << pValue << endl;
}
out.close();
if (maxMean > logMaxMean) { logMaxMean = maxMean; }
logMaxMean = log10(logMaxMean);
- out << m->currentBinLabels[i] << '\t' << logMaxMean << '\t';
- if (m->debug) { temp = m->currentBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; }
+ out << m->currentSharedBinLabels[i] << '\t' << logMaxMean << '\t';
+ if (m->debug) { temp = m->currentSharedBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; }
map<int, double>::iterator it = sigLDA.find(i);
if (it != sigLDA.end()) {
for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
if (m->control_pressed) { break; }
- cout << m->currentBinLabels[it->first] << " <- c(";
+ cout << m->currentSharedBinLabels[it->first] << " <- c(";
for (int h = 0; h < rand_s.size()-1; h++) { cout << (adjustedLookup[count][rand_s[h]]) << ", "; }
cout << (adjustedLookup[count][rand_s[rand_s.size()-1]]) << ")\n";
count++;
for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
if (m->control_pressed) { break; }
- tempOutput += "\"" + m->currentBinLabels[it->first] + "\"=" + m->currentBinLabels[it->first] + ",";
+ tempOutput += "\"" + m->currentSharedBinLabels[it->first] + "\"=" + m->currentSharedBinLabels[it->first] + ",";
}
//tempOutput = tempOutput.substr(0, tempOutput.length()-1);
tempOutput += " class=treatments";
for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
if (m->control_pressed) { break; }
- tempOutput += m->currentBinLabels[it->first] + "+";
+ tempOutput += m->currentSharedBinLabels[it->first] + "+";
}
tempOutput = tempOutput.substr(0, tempOutput.length()-1); //rip off extra plus sign
tempOutput += "), data = dat, tol = 1e-10))";
lookup.push_back(temp);
}
- m->currentBinLabels.clear();
+ m->currentSharedBinLabels.clear();
int count = 0;
while (!in.eof()) {
if (m->control_pressed) { return 0; }
lookup[i-1]->push_back(value, toString(i-1));
//cout << pieces[i] << '\t';
}
- m->currentBinLabels.push_back(toString(count));
+ m->currentSharedBinLabels.push_back(toString(count));
//m->currentBinLabels.push_back(pieces[0]);
//cout << line<< endl;
//cout << endl;
ofstream out;
m->openOutputFile(outputFileName, out);
- for (int i = 0; i < m->currentBinLabels.size(); i++) { out << m->currentBinLabels[i] << endl; }
+ for (int i = 0; i < m->currentSharedBinLabels.size(); i++) { out << m->currentSharedBinLabels[i] << endl; }
out.close();
ofstream out;
m->openOutputFile(outputFileName, out);
- for (int i = 0; i < m->currentBinLabels.size(); i++) { out << m->currentBinLabels[i] << endl; }
+ for (int i = 0; i < m->currentSharedBinLabels.size(); i++) { out << m->currentSharedBinLabels[i] << endl; }
out.close();
ofstream out;
m->openOutputFile(outputFileName, out);
- string snumBins = toString(list->getNumBins());
- for (int i = 0; i < list->getNumBins(); i++) {
- if (m->control_pressed) { break; }
-
- string otuLabel = "Otu";
- string sbinNumber = toString(i+1);
- if (sbinNumber.length() < snumBins.length()) {
- int diff = snumBins.length() - sbinNumber.length();
- for (int h = 0; h < diff; h++) { otuLabel += "0"; }
- }
- otuLabel += sbinNumber;
-
- out << otuLabel << endl;
- }
+ vector<string> binLabels = list->getLabels();
+ for (int i = 0; i < binLabels.size(); i++) { out << binLabels[i] << endl; }
out.close();
//**********************************************************************************************************************
vector<string> ListSeqsCommand::setParameters(){
try {
+ CommandParameter pfastq("fastq", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfastq);
CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta);
CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount);
string ListSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
- helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport. You must provide one of these parameters.\n";
+ helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy, fastq or alignreport file and outputs a .accnos file containing sequence names.\n";
+ helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy, fastq and alignreport. You must provide one of these parameters.\n";
helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
helpString += "Example list.seqs(fasta=amazon.fasta).\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["count"] = inputDir + it->second; }
}
+
+ it = parameters.find("fastq");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fastq"] = inputDir + it->second; }
+ }
}
//check for required parameters
if (countfile == "not open") { abort = true; }
else if (countfile == "not found") { countfile = ""; }
else { m->setCountTableFile(countfile); }
+
+ fastqfile = validParameter.validFile(parameters, "fastq", true);
+ if (fastqfile == "not open") { abort = true; }
+ else if (fastqfile == "not found") { fastqfile = ""; }
- if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
+ if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == "")) { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
int okay = 1;
if (outputDir != "") { okay++; }
//read functions fill names vector
if (fastafile != "") { inputFileName = fastafile; readFasta(); }
+ else if (fastqfile != "") { inputFileName = fastqfile; readFastq(); }
else if (namefile != "") { inputFileName = namefile; readName(); }
else if (groupfile != "") { inputFileName = groupfile; readGroup(); }
else if (alignfile != "") { inputFileName = alignfile; readAlign(); }
exit(1);
}
}
+//**********************************************************************************************************************
+int ListSeqsCommand::readFastq(){
+ try {
+
+ ifstream in;
+ m->openInputFile(fastqfile, in);
+ string name;
+
+ //ofstream out;
+ //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
+ //m->openOutputFile(newFastaName, out);
+ int count = 1;
+ //string lastName = "";
+
+ while(!in.eof()){
+
+ if (m->control_pressed) { in.close(); return 0; }
+
+ //read sequence name
+ string name = m->getline(in); m->gobble(in);
+
+ if (name[0] == '@') {
+ vector<string> splits = m->splitWhiteSpace(name);
+ name = splits[0];
+ name = name.substr(1);
+ m->checkName(name);
+ names.push_back(name);
+ //get rest of lines
+ name = m->getline(in); m->gobble(in);
+ name = m->getline(in); m->gobble(in);
+ name = m->getline(in); m->gobble(in);
+ }
+
+ m->gobble(in);
+ if (m->debug) { count++; cout << "[DEBUG]: count = " + toString(count) + ", name = " + name + "\n"; }
+ }
+ in.close();
+ //out.close();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListSeqsCommand", "readFastq");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
int ListSeqsCommand::readFasta(){
private:
vector<string> names, outputNames;
- string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile;
+ string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile, fastqfile;
bool abort;
int readFasta();
int readList();
int readTax();
int readCount();
-
+ int readFastq();
};
#endif
ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
try {
int hold;
- f >> label >> hold;
+
+ //are we at the beginning of the file??
+ if (m->saveNextLabel == "") {
+ f >> label;
+
+ //is this a shared file that has headers
+ if (label == "label") {
+
+ //gets "numOtus"
+ f >> label; m->gobble(f);
+
+ //eat rest of line
+ label = m->getline(f); m->gobble(f);
+
+ //parse labels to save
+ istringstream iStringStream(label);
+ m->listBinLabelsInFile.clear();
+ while(!iStringStream.eof()){
+ if (m->control_pressed) { break; }
+ string temp;
+ iStringStream >> temp; m->gobble(iStringStream);
+
+ m->listBinLabelsInFile.push_back(temp);
+ }
+
+ f >> label >> hold;
+ }else {
+ //read in first row
+ f >> hold;
+
+ //make binlabels because we don't have any
+ string snumBins = toString(hold);
+ m->listBinLabelsInFile.clear();
+ for (int i = 0; i < hold; i++) {
+ //if there is a bin label use it otherwise make one
+ string binLabel = "Otu";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ m->listBinLabelsInFile.push_back(binLabel);
+ }
+ }
+ m->saveNextLabel = label;
+ }else {
+ f >> label >> hold;
+ m->saveNextLabel = label;
+ }
+ binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold);
+
data.assign(hold, "");
string inputData = "";
set(i, inputData);
}
m->gobble(f);
+
+ if (f.eof()) { m->saveNextLabel = ""; }
}
catch(exception& e) {
m->errorOut(e, "ListVector", "ListVector");
string ListVector::get(int index){
return data[index];
}
+/***********************************************************************/
+
+void ListVector::setLabels(vector<string> labels){
+ try {
+ binLabels = labels;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListVector", "setLabels");
+ exit(1);
+ }
+}
+
+/***********************************************************************/
+//could potentially end up with duplicate binlabel names with code below.
+//we don't currently use them in a way that would do that.
+//if you had a listfile that had been subsampled and then added to it, dup names would be possible.
+vector<string> ListVector::getLabels(){
+ try {
+
+ string tagHeader = "Otu";
+ if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; }
+
+ if (binLabels.size() < data.size()) {
+ string snumBins = toString(numBins);
+
+ for (int i = 0; i < numBins; i++) {
+ string binLabel = tagHeader;
+
+ if (i < binLabels.size()) { //label exists, check leading zeros length
+ string sbinNumber = m->getSimpleLabel(binLabels[i]);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ binLabels[i] = binLabel;
+ }else{
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ binLabels.push_back(binLabel);
+ }
+ }
+ }
+ return binLabels;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListVector", "getLabels");
+ exit(1);
+ }
+}
/***********************************************************************/
}
+/***********************************************************************/
+void ListVector::printHeaders(ostream& output){
+ try {
+ string snumBins = toString(numBins);
+ output << "label\tnumOtus\t";
+ if (m->sharedHeaderMode == "tax") {
+ for (int i = 0; i < numBins; i++) {
+
+ //if there is a bin label use it otherwise make one
+ string binLabel = "PhyloType";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ if (i < binLabels.size()) { binLabel = binLabels[i]; }
+
+ output << binLabel << '\t';
+ }
+ output << endl;
+ }else {
+ for (int i = 0; i < numBins; i++) {
+ //if there is a bin label use it otherwise make one
+ string binLabel = "Otu";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ if (i < binLabels.size()) { binLabel = binLabels[i]; }
+
+ output << binLabel << '\t';
+ }
+
+ output << endl;
+ }
+ m->printedListHeaders = true;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ListVector", "printHeaders");
+ exit(1);
+ }
+}
+
/***********************************************************************/
void ListVector::print(ostream& output){
ListVector(int);
// ListVector(const ListVector&);
ListVector(string, vector<string>);
- ListVector(const ListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){};
+ ListVector(const ListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs), binLabels(lv.binLabels) {};
ListVector(ifstream&);
~ListVector(){};
void set(int, string);
string get(int);
+ vector<string> getLabels();
+ void setLabels(vector<string>);
void push_back(string);
void resize(int);
void clear();
int size();
void print(ostream&);
+ void printHeaders(ostream&);
RAbundVector getRAbundVector();
SAbundVector getSAbundVector();
int maxRank;
int numBins;
int numSeqs;
+ vector<string> binLabels;
};
#include "sharedrabundvector.h"
#include "inputdata.h"
#include "sharedutilities.h"
+#include "phylotree.h"
//taken from http://biom-format.org/documentation/biom_format.html
/* Minimal Sparse
try {
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","biom",false,true,true); parameters.push_back(pshared);
CommandParameter pcontaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy);
- //CommandParameter preference("referencetax", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preference);
+ CommandParameter preference("reftaxonomy", "InputTypes", "", "", "none", "none", "refPi","",false,false); parameters.push_back(preference);
CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata);
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
- //CommandParameter ppicrust("picrust", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppicrust);
+ CommandParameter ppicrust("picrust", "InputTypes", "", "", "none", "none", "refPi","shared",false,false); parameters.push_back(ppicrust);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "","",false,false); parameters.push_back(pmatrixtype);
string MakeBiomCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label. shared is required, unless you have a valid current file.\n"; //, picrust and referencetax
+ helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype, picrust, reftaxonomy and label. shared is required, unless you have a valid current file.\n"; //
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n";
helpString += "The metadata parameter is used to provide experimental parameters to the columns. Things like 'sample1 gut human_gut'. \n";
- //helpString += "The picrust parameter is used to indicate the biom file is for input to picrust. NOTE: Picrust requires a greengenes taxonomy. \n";
- //helpString += "The referencetax parameter is used with the picrust parameter. Picrust requires the name of the reference taxonomy sequence to be in the biom file. \n";
+ helpString += "The picrust parameter is used to provide the greengenes OTU IDs map table. NOTE: Picrust requires a greengenes taxonomy. \n";
+ helpString += "The referencetax parameter is used with the picrust parameter. Picrust requires the greengenes OTU IDs to be in the biom file. \n";
helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
try {
string pattern = "";
- if (type == "biom") { pattern = "[filename],[distance],biom"; }
+ if (type == "biom") { pattern = "[filename],[distance],biom"; }
+ else if (type == "shared") { pattern = "[filename],[distance],biom_shared"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
setParameters();
vector<string> tempOutNames;
outputTypes["biom"] = tempOutNames;
+ outputTypes["shared"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["biom"] = tempOutNames;
+ outputTypes["shared"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
}
- it = parameters.find("referencetax");
+ it = parameters.find("reftaxonomy");
//user has given a template file
if(it != parameters.end()){
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["referencetax"] = inputDir + it->second; }
+ if (path == "") { parameters["reftaxonomy"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("picrust");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["picrust"] = inputDir + it->second; }
}
it = parameters.find("metadata");
if (contaxonomyfile == "not found") { contaxonomyfile = ""; }
else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
- //referenceTax = validParameter.validFile(parameters, "referencetax", true);
- //if (referenceTax == "not found") { referenceTax = ""; }
- //else if (referenceTax == "not open") { referenceTax = ""; abort = true; }
+ referenceTax = validParameter.validFile(parameters, "reftaxonomy", true);
+ if (referenceTax == "not found") { referenceTax = ""; }
+ else if (referenceTax == "not open") { referenceTax = ""; abort = true; }
+
+ picrustOtuFile = validParameter.validFile(parameters, "picrust", true);
+ if (picrustOtuFile == "not found") { picrustOtuFile = ""; }
+ else if (picrustOtuFile == "not open") { picrustOtuFile = ""; abort = true; }
metadatafile = validParameter.validFile(parameters, "metadata", true);
if (metadatafile == "not found") { metadatafile = ""; }
if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
else { allLines = 1; }
}
-
- //string temp = validParameter.validFile(parameters, "picrust", false); if (temp == "not found"){ temp = "f"; }
- //picrust = m->isTrue(temp);
- //if (picrust && ((contaxonomyfile == "") || (referenceTax == ""))) {
- //m->mothurOut("[ERROR]: the picrust parameter requires a consensus taxonomy with greengenes taxonomy the reference."); m->mothurOutEndLine(); abort = true;
- //}
- picrust=false;
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; }
m->setGroups(Groups);
}
+ if (picrustOtuFile != "") {
+ picrust=true;
+ if (contaxonomyfile == "") { m->mothurOut("[ERROR]: the constaxonomy parameter is required with the picrust parameter, aborting."); m->mothurOutEndLine(); abort = true; }
+ if (referenceTax == "") { m->mothurOut("[ERROR]: the reftaxonomy parameter is required with the picrust parameter, aborting."); m->mothurOutEndLine(); abort = true; }
+ }else { picrust=false; }
+
if ((contaxonomyfile != "") && (labels.size() > 1)) { m->mothurOut("[ERROR]: the contaxonomy parameter cannot be used with multiple labels."); m->mothurOutEndLine(); abort = true; }
format = validParameter.validFile(parameters, "matrixtype", false); if (format == "not found") { format = "sparse"; }
out << "{\n" + spaces + "\"id\":\"" + sharedfile + "-" + lookup[0]->getLabel() + "\",\n" + spaces + "\"format\": \"Biological Observation Matrix 0.9.1\",\n" + spaces + "\"format_url\": \"http://biom-format.org\",\n";
out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
+
+ vector<string> metadata = getMetaData(lookup);
int numBins = lookup[0]->getNumBins();
- vector<string> picrustLabels;
- vector<string> metadata = getMetaData(lookup, picrustLabels);
if (m->control_pressed) { out.close(); return 0; }
string rowBack = "\", \"metadata\":";
for (int i = 0; i < numBins-1; i++) {
if (m->control_pressed) { out.close(); return 0; }
- if (!picrust) { out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n"; }
- else { out << rowFront << picrustLabels[i] << rowBack << metadata[i] << "},\n"; }
+ out << rowFront << m->currentSharedBinLabels[i] << rowBack << metadata[i] << "},\n";
}
- if (!picrust) { out << rowFront << m->currentBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; }
- else { out << rowFront << picrustLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; }
+ out << rowFront << m->currentSharedBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n";
+
//get column info
/*"columns": [
{"id":"Sample1", "metadata":null},
}
}
//**********************************************************************************************************************
-vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup, vector<string>& picrustLabels){
+vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup){
try {
vector<string> metadata;
in.close();
//should the labels be Otu001 or PhyloType001
- string firstBin = m->currentBinLabels[0];
+ string firstBin = m->currentSharedBinLabels[0];
string binTag = "Otu";
if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType"; }
}else { labelTaxMap[m->getSimpleLabel(otuLabels[i])] = taxs[i]; }
}
+ //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus. overwritting of otulabels is fine because constaxonomy only allows for one label to be processed. If this assumption changes, could cause bug.
+ if (picrust) { getGreenGenesOTUIDs(lookup, labelTaxMap); }
//{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
if (m->control_pressed) { return metadata; }
- it = labelTaxMap.find(m->getSimpleLabel(m->currentBinLabels[i]));
+ it = labelTaxMap.find(m->getSimpleLabel(m->currentSharedBinLabels[i]));
- if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; }
+ if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentSharedBinLabels[i] + ".\n"); m->control_pressed = true; }
else {
- if (picrust) {
- string temp = it->second; m->removeConfidences(temp);
- picrustLabels.push_back(temp);
- }
vector<string> bootstrapValues;
string data = "{\"taxonomy\":[";
exit(1);
}
+}
+//**********************************************************************************************************************
+int MakeBiomCommand::getGreenGenesOTUIDs(vector<SharedRAbundVector*>& lookup, map<string, string>& labelTaxMap){
+ try {
+ //read reftaxonomy
+ PhyloTree phylo(referenceTax);
+
+ //read otu map file
+ map<string, string> otuMap = readGGOtuMap(); //maps reference ID -> OTU ID
+
+ if (m->control_pressed) { return 0; }
+
+ map<string, vector<string> > ggOTUIDs;
+ //loop through otu taxonomies
+ for (map<string, string>::iterator it = labelTaxMap.begin(); it != labelTaxMap.end(); it++) { //maps label -> consensus taxonomy
+ if (m->control_pressed) { break; }
+
+ //get list of reference ids that map to this taxonomy
+ vector<string> referenceIds = phylo.getSeqs(it->second);
+
+ if (m->control_pressed) { break; }
+
+ //look for each one in otu map to find match
+ string otuID = "not found";
+ string referenceString = "";
+ for (int i = 0; i < referenceIds.size(); i++) {
+ referenceString += referenceIds[i] + " ";
+ map<string, string>::iterator itMap = otuMap.find(referenceIds[i]);
+ if (itMap != otuMap.end()) { //found it
+ otuID = itMap->second;
+ i += referenceIds.size(); //stop looking
+ }
+ }
+
+ //if found, add otu to ggOTUID list
+ if (otuID != "not found") {
+ map<string, vector<string> >::iterator itGG = ggOTUIDs.find(otuID);
+ if (itGG == ggOTUIDs.end()) {
+ vector<string> temp; temp.push_back(it->first); //save mothur OTU label
+ ggOTUIDs[otuID] = temp;
+ }else { ggOTUIDs[otuID].push_back(it->first); } //add mothur OTU label to list
+ }else { m->mothurOut("[ERROR]: could not find OTUId for " + it->second + ". Its reference sequences are " + referenceString + ".\n"); m->control_pressed = true; }
+
+ }
+
+
+ vector<SharedRAbundVector*> newLookup;
+ for (int i = 0; i < lookup.size(); i++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(lookup[i]->getLabel());
+ temp->setGroup(lookup[i]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ map<string, int> labelIndex;
+ for (int i = 0; i < m->currentSharedBinLabels.size(); i++) { labelIndex[m->getSimpleLabel(m->currentSharedBinLabels[i])] = i; }
+
+ vector<string> newBinLabels;
+ map<string, string> newLabelTaxMap;
+ //loop through ggOTUID list combining mothur otus and adjusting labels
+ //ggOTUIDs = 16097 -> <OTU01, OTU10, OTU22>
+ for (map<string, vector<string> >::iterator itMap = ggOTUIDs.begin(); itMap != ggOTUIDs.end(); itMap++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+
+ //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria
+ //find taxonomy of this otu
+ map<string, string>::iterator it = labelTaxMap.find(m->getSimpleLabel(itMap->second[0]));
+ vector<string> scores;
+ vector<string> taxonomies = parseTax(it->second, scores);
+
+ //merge/set OTU abundances
+ vector<int> abunds; abunds.resize(lookup.size(), 0);
+ string mergeString = "";
+ vector<float> boots; boots.resize(scores.size(), 0);
+ for (int j = 0; j < itMap->second.size(); j++) { //<OTU01, OTU10, OTU22>
+ //merge bootstrap scores
+ vector<string> scores;
+ vector<string> taxonomies = parseTax(it->second, scores);
+ for (int i = 0; i < boots.size(); i++) {
+ float tempScore; m->mothurConvert(scores[i], tempScore);
+ boots[i] += tempScore;
+ }
+
+ //merge abunds
+ mergeString += (itMap->second)[j] + " ";
+ for (int i = 0; i < lookup.size(); i++) {
+ abunds[i] += lookup[i]->getAbundance(labelIndex[m->getSimpleLabel((itMap->second)[j])]);
+ }
+ }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: merging " + mergeString + " for ggOTUid = " + itMap->first + ".\n"); }
+
+ //average scores
+ //add merged otu to new lookup
+ for (int j = 0; j < boots.size(); j++) { boots[j] /= (float) itMap->second.size(); }
+
+ //assemble new taxomoy
+ string newTaxString = "";
+ for (int j = 0; j < boots.size(); j++) {
+ newTaxString += taxonomies[j] + "(" + toString(boots[j]) + ");";
+ }
+
+ //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria
+ //find taxonomy of this otu
+ newLabelTaxMap[itMap->first] = newTaxString;
+
+ //add merged otu to new lookup
+ for (int j = 0; j < abunds.size(); j++) { newLookup[j]->push_back(abunds[j], newLookup[j]->getGroup()); }
+
+ //saved otu label
+ newBinLabels.push_back(itMap->first);
+ }
+
+ for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
+
+ lookup = newLookup;
+ m->currentSharedBinLabels = newBinLabels;
+ labelTaxMap = newLabelTaxMap;
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+ string outputFileName = getOutputFileName("shared",variables);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
+
+ lookup[0]->printHeaders(out);
+
+ for (int i = 0; i < lookup.size(); i++) {
+ out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
+ lookup[i]->print(out);
+ }
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeBiomCommand", "getGreenGenesOTUIDs");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
+map<string, string> MakeBiomCommand::readGGOtuMap(){
+ try {
+ map<string, string> otuMap;
+
+ ifstream in;
+ m->openInputFile(picrustOtuFile, in);
+
+ //map referenceIDs -> otuIDs
+ //lines look like:
+ //16097 671376 616121 533566 683683 4332909 4434717 772666 611808 695209
+ while(!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ string line = m->getline(in); m->gobble(in);
+ vector<string> pieces = m->splitWhiteSpace(line);
+
+ if (pieces.size() != 0) {
+ string otuID = pieces[1];
+ for (int i = 1; i < pieces.size(); i++) { otuMap[pieces[i]] = otuID; }
+ }
+ }
+ in.close();
+
+ return otuMap;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeBiomCommand", "readGGOtuMap");
+ exit(1);
+ }
+
}
//**********************************************************************************************************************
int MakeBiomCommand::getSampleMetaData(vector<SharedRAbundVector*>& lookup){
private:
- string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label, referenceTax;
+ string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label, referenceTax, picrustOtuFile;
vector<string> outputNames, Groups, sampleMetadata;
set<string> labels;
bool abort, allLines, picrust;
int getBiom(vector<SharedRAbundVector*>&);
- vector<string> getMetaData(vector<SharedRAbundVector*>&, vector<string>&);
+ vector<string> getMetaData(vector<SharedRAbundVector*>&);
vector<string> parseTax(string tax, vector<string>& scores);
int getSampleMetaData(vector<SharedRAbundVector*>&);
+ //for picrust
+ int getGreenGenesOTUIDs(vector<SharedRAbundVector*>&, map<string, string>&);
+ map<string, string> readGGOtuMap();
};
if (uniques.size() != 0) {
for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
+ if (m->control_pressed) { break; }
m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
}
for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
+ if (m->control_pressed) { break; }
m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
}
m->mothurOutEndLine();
vector<int> qualScores = convertQual(quality);
+ m->checkName(name);
read.name = name;
read.sequence = sequence;
read.scores = qualScores;
else { uniquePrimers.insert(tempPair); }
if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
-
primers[indexPrimer]=newPrimer; indexPrimer++;
primerNameVector.push_back(group);
}else if(type == "BARCODE"){
CYGWIN_BUILD ?= no
USECOMPRESSION ?= no
MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"10/01/2013\""
-VERSION = "\"1.32.0\""
+RELEASE_DATE = "\"10/16/2013\""
+VERSION = "\"1.32.1\""
FORTAN_COMPILER = gfortran
FORTRAN_FLAGS =
for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
if (m->control_pressed) { break; }
- string nameOfOtu = m->currentBinLabels[i];
+ string nameOfOtu = m->currentSharedBinLabels[i];
if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
if (it != consTax.end()) {
//remove confidences and change ; to |
m->removeConfidences(nameOfOtu);
for (int j = 0; j < nameOfOtu.length()-1; j++) {
- if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentBinLabels[i] + '|'; }
+ if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentSharedBinLabels[i] + '|'; }
else { fixedName += nameOfOtu[j]; }
}
nameOfOtu = fixedName;
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
process(lookup, out);
processedLabels.insert(lookup[0]->getLabel());
lookup = input.getSharedRAbundVectors(lastLabel);
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
process(lookup, out);
processedLabels.insert(lookup[0]->getLabel());
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
process(lookup, out);
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
out << subset[subset.size()-1]->getGroup() << endl;
for (int i = 0; i < subset[0]->getNumBins(); i++) {
- out << m->currentBinLabels[i] << '\t';
+ out << m->currentSharedBinLabels[i] << '\t';
for (int j = 0; j < subset.size()-1; j++) {
out << subset[j]->getAbundance(i) << '\t';
}
m->openOutputFile(rabundFileName, rabundFile);
}
m->openOutputFile(listFileName, listFile);
+ list->printHeaders(listFile);
if (m->control_pressed) {
delete nameMap; delete read; delete list; delete rabund;
if (m->control_pressed) { out.close(); return 0; }
//if there are binlabels use them otherwise count.
- if (m->binLabelsInFile.size() == row) { out << m->binLabelsInFile[i] << '\t'; }
+ if (i < m->currentSharedBinLabels.size()) { out << m->currentSharedBinLabels[i] << '\t'; }
else { out << (i+1) << '\t'; }
out << C1[i][0] << '\t' << C1[i][1] << '\t' << C1[i][2] << '\t' << C2[i][0] << '\t' << C2[i][1] << '\t' << C2[i][2] << '\t' << pvalues[i] << '\t' << qvalues[i] << endl;
bool MothurOut::dirCheck(string& dirName){
try {
+ if (dirName == "") { return false; }
+
string tag = "";
#ifdef USE_MPI
int pid;
//test to make sure directory exists
dirName = getFullPathName(dirName);
- string outTemp = dirName + tag + "temp";
+ string outTemp = dirName + tag + "temp"+ toString(time(NULL));
ofstream out;
out.open(outTemp.c_str(), ios::trunc);
if(!out) {
exit(1);
}
}
+/***********************************************************************/
+int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle){
+ try {
+
+ //get full path name
+ string completeFileName = getFullPathName(fileName);
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#ifdef USE_COMPRESSION
+ // check for gzipped or bzipped file
+ if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
+ string tempName = string(tmpnam(0));
+ mkfifo(tempName.c_str(), 0666);
+ int fork_result = fork();
+ if (fork_result < 0) {
+ cerr << "Error forking.\n";
+ exit(1);
+ } else if (fork_result == 0) {
+ string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
+ cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
+ system(command.c_str());
+ cerr << "Done decompressing " << completeFileName << "\n";
+ mothurRemove(tempName);
+ exit(EXIT_SUCCESS);
+ } else {
+ cerr << "waiting on child process " << fork_result << "\n";
+ completeFileName = tempName;
+ }
+ }
+#endif
+#endif
+
+ fileHandle.open(completeFileName.c_str(), ios::binary);
+ if(!fileHandle) {
+ mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
+ return 1;
+ }
+ else {
+ //check for blank file
+ gobble(fileHandle);
+ if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
+
+ return 0;
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "openInputFileBinary");
+ exit(1);
+ }
+}
+/***********************************************************************/
+int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle, string noerror){
+ try {
+
+ //get full path name
+ string completeFileName = getFullPathName(fileName);
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#ifdef USE_COMPRESSION
+ // check for gzipped or bzipped file
+ if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
+ string tempName = string(tmpnam(0));
+ mkfifo(tempName.c_str(), 0666);
+ int fork_result = fork();
+ if (fork_result < 0) {
+ cerr << "Error forking.\n";
+ exit(1);
+ } else if (fork_result == 0) {
+ string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
+ cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
+ system(command.c_str());
+ cerr << "Done decompressing " << completeFileName << "\n";
+ mothurRemove(tempName);
+ exit(EXIT_SUCCESS);
+ } else {
+ cerr << "waiting on child process " << fork_result << "\n";
+ completeFileName = tempName;
+ }
+ }
+#endif
+#endif
+
+ fileHandle.open(completeFileName.c_str(), ios::binary);
+ if(!fileHandle) {
+ //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
+ return 1;
+ }
+ else {
+ //check for blank file
+ gobble(fileHandle);
+ //if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine(); }
+
+ return 0;
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "openInputFileBinary - no error");
+ exit(1);
+ }
+}
+
/***********************************************************************/
int MothurOut::renameFile(string oldName, string newName){
exit(1);
}
}
+/**************************************************************************************************/
+int MothurOut::appendBinaryFiles(string temp, string filename) {
+ try{
+ ofstream output;
+ ifstream input;
+
+ //open output file in append mode
+ openOutputFileBinaryAppend(filename, output);
+ int ableToOpen = openInputFileBinary(temp, input, "no error");
+
+ if (ableToOpen == 0) { //you opened it
+
+ char buffer[4096];
+ while (!input.eof()) {
+ input.read(buffer, 4096);
+ output.write(buffer, input.gcount());
+ }
+ input.close();
+ }
+
+ output.close();
+
+ return ableToOpen;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "appendBinaryFiles");
+ exit(1);
+ }
+}
+
/**************************************************************************************************/
int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
try{
}
}
/***********************************************************************/
+bool MothurOut::checkLocations(string& filename, string inputDir){
+ try {
+ filename = getFullPathName(filename);
+
+ int ableToOpen;
+ ifstream in;
+ ableToOpen = openInputFile(filename, in, "noerror");
+ in.close();
+
+ //if you can't open it, try input location
+ if (ableToOpen == 1) {
+ if (inputDir != "") { //default path is set
+ string tryPath = inputDir + getSimpleName(filename);
+ mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath); mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ filename = tryPath;
+ }
+ }
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (getDefaultPath() != "") { //default path is set
+ string tryPath = getDefaultPath() + getSimpleName(filename);
+ mothurOut("Unable to open " + filename + ". Trying default " + tryPath); mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ filename = tryPath;
+ }
+ }
+
+ //if you can't open it its not in current working directory or inputDir, try mothur excutable location
+ if (ableToOpen == 1) {
+ string exepath = argv;
+ string tempPath = exepath;
+ for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+ exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+
+ string tryPath = getFullPathName(exepath) + getSimpleName(filename);
+ mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath); mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ filename = tryPath;
+ }
+
+ if (ableToOpen == 1) { mothurOut("Unable to open " + filename + "."); mothurOutEndLine(); return false; }
+
+ return true;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "checkLocations");
+ exit(1);
+ }
+}
+/***********************************************************************/
//This function parses the estimator options and puts them in a vector
void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
vector<string> getAllGroups() { sort(namesOfGroups.begin(), namesOfGroups.end()); return namesOfGroups; }
vector<string> Treenames;
- //map<string, string> names;
- vector<string> binLabelsInFile;
- vector<string> currentBinLabels;
+ vector<string> sharedBinLabelsInFile;
+ vector<string> currentSharedBinLabels;
+ vector<string> listBinLabelsInFile;
string saveNextLabel, argv, sharedHeaderMode, groupMode;
- bool printedHeaders, commandInputsConvertError, changedSeqNames, modifyNames;
+ bool printedSharedHeaders, printedListHeaders, commandInputsConvertError, changedSeqNames, modifyNames;
//functions from mothur.h
//file operations
vector<unsigned long long> setFilePosFasta(string, int&);
string sortFile(string, string);
int appendFiles(string, string);
+ int appendBinaryFiles(string, string);
int appendFilesWithoutHeaders(string, string);
int renameFile(string, string); //oldname, newname
string getFullPathName(string);
int openOutputFileAppend(string, ofstream&);
int openOutputFileBinaryAppend(string, ofstream&);
int openInputFile(string, ifstream&);
- int openInputFile(string, ifstream&, string); //no error given
+ int openInputFileBinary(string, ifstream&);
+ int openInputFileBinary(string, ifstream&, string);
+ int openInputFile(string, ifstream&, string); //no error given
+
+ bool checkLocations(string&, string); //filename, inputDir. checks for file in ./, inputdir, default and mothur's exe location. Returns false if cant be found. If found completes name with location
string getline(ifstream&);
string getline(istringstream&);
void gobble(istream&);
counttablefile = "";
summaryfile = "";
gui = false;
- printedHeaders = false;
+ printedSharedHeaders = false;
+ printedListHeaders = false;
commandInputsConvertError = false;
mothurCalling = false;
debug = false;
outBest.setf(ios::fixed, ios::floatfield);
outBest.setf(ios::showpoint);
- outBest << '\t';
+ outBest << "group" << '\t';
for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; }
outBest << endl;
int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp){
try {
//save mothurOut's binLabels to restore for next label
- vector<string> saveBinLabels = m->currentBinLabels;
+ vector<string> saveBinLabels = m->currentSharedBinLabels;
if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
out.close();
- m->currentBinLabels = saveBinLabels;
+ m->currentSharedBinLabels = saveBinLabels;
return 0;
}
try {
//save mothurOut's binLabels to restore for next label
- vector<string> saveBinLabels = m->currentBinLabels;
+ vector<string> saveBinLabels = m->currentSharedBinLabels;
map<string, string> variables;
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
out.close();
- m->currentBinLabels = saveBinLabels;
+ m->currentSharedBinLabels = saveBinLabels;
return 0;
}
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
newBinLabels.push_back(binLabel);
}
for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
thislookup = newLookup;
- m->currentBinLabels = newBinLabels;
+ m->currentSharedBinLabels = newBinLabels;
return 0;
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
newBinLabels.push_back(binLabel);
}
for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
thislookup = newLookup;
- m->currentBinLabels = newBinLabels;
+ m->currentSharedBinLabels = newBinLabels;
return 0;
else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
- if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; }
+ if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << m->currentSharedBinLabels[k] << '\t' << coef << '\t' << sig << endl; }
}
}
}else { //compare otus to metadata
else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); }
else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
- if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
+ if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
}
}
else if (method == "kendall") { coef = linear.calcKendall(xy[i], xy[k], sig); }
else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
- if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; }
+ if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << m->currentSharedBinLabels[k] << '\t' << coef << '\t' << sig << endl; }
}
}
}else { //compare otus to metadata
else if (method == "kendall") { coef = linear.calcKendall(xy[i], metadata[k], sig); }
else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
- if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
+ if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
}
}
}
//map sequences to bin number in the "little" otu
- map<string, int> littleBins;
+ map<string, int> littleBins;
+ vector<string> binLabels0 = lists[0].getLabels();
for (int i = 0; i < lists[0].getNumBins(); i++) {
if (m->control_pressed) { return 0; }
-
string bin = lists[0].get(i);
vector<string> names; m->splitAtComma(bin, names);
for (int j = 0; j < names.size(); j++) { littleBins[names[j]] = i; }
m->openOutputFile(outputFileName, out);
//go through each bin in "big" otu and output the bins in "little" otu which created it
+ vector<string> binLabels1 = lists[1].getLabels();
for (int i = 0; i < lists[1].getNumBins(); i++) {
if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
string binnames = lists[1].get(i);
vector<string> names; m->splitAtComma(binnames, names);
-
//output column 1
if (output == "name") { out << binnames << '\t'; }
- else { out << (i+1) << '\t'; }
+ else { out << binLabels1[i] << '\t'; }
map<int, int> bins; //bin numbers in little that are in this bin in big
map<int, int>::iterator it;
string col2 = "";
for (it = bins.begin(); it != bins.end(); it++) {
if (output == "name") { col2 += lists[0].get(it->first) + "\t"; }
- else { col2 += toString(it->first) + "\t"; }
+ else { col2 += binLabels0[it->first] + "\t"; }
}
//output column 2
vector<string> ParseFastaQCommand::setParameters(){
try {
CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
+ CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
+ CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+ CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+ CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+ CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+ CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
try {
string helpString = "";
helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
- helpString += "The fastq.info command parameters are fastq, fasta, qfile and format; fastq is required.\n";
+ helpString += "The fastq.info command parameters are fastq, fasta, qfile, oligos, group and format; fastq is required.\n";
helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
+ helpString += "The oligos parameter allows you to provide an oligos file to split your fastq file into separate fastq files by barcode and primers. \n";
+ helpString += "The group parameter allows you to provide a group file to split your fastq file into separate fastq files by group. \n";
+ helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the reads. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+ helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+ helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+ helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
string pattern = "";
if (type == "fasta") { pattern = "[filename],fasta"; }
- else if (type == "qfile") { pattern = "[filename],qual"; }
+ else if (type == "qfile") { pattern = "[filename],qual"; }
+ else if (type == "fastq") { pattern = "[filename],[group],fastq"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
+ outputTypes["fastq"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
//**********************************************************************************************************************
ParseFastaQCommand::ParseFastaQCommand(string option){
try {
- abort = false; calledHelp = false;
+ abort = false; calledHelp = false;
+ split = 1;
if(option == "help") { help(); abort = true; calledHelp = true; }
else if(option == "citation") { citation(); abort = true; calledHelp = true;}
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["qfile"] = tempOutNames;
+ outputTypes["fastq"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["fastq"] = inputDir + it->second; }
}
+
+ it = parameters.find("oligos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["oligos"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
}
//check for required parameters
fastaQFile = validParameter.validFile(parameters, "fastq", true);
if (fastaQFile == "not found") { m->mothurOut("fastq is a required parameter for the fastq.info command."); m->mothurOutEndLine(); abort = true; }
- else if (fastaQFile == "not open") { fastaQFile = ""; abort = true; }
+ else if (fastaQFile == "not open") { fastaQFile = ""; abort = true; }
+
+ oligosfile = validParameter.validFile(parameters, "oligos", true);
+ if (oligosfile == "not found") { oligosfile = ""; }
+ else if (oligosfile == "not open") { oligosfile = ""; abort = true; }
+ else { m->setOligosFile(oligosfile); split = 2; }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not found") { groupfile = ""; }
+ else if (groupfile == "not open") { groupfile = ""; abort = true; }
+ else { m->setGroupFile(groupfile); split = 2; }
+
+ if ((groupfile != "") && (oligosfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastaQFile); }
temp = validParameter.validFile(parameters, "pacbio", false); if(temp == "not found"){ temp = "F"; }
pacbio = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, bdiffs);
+
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, pdiffs);
+
+ temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, ldiffs);
+
+ temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, sdiffs);
+
+ temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
+ m->mothurConvert(temp, tdiffs);
+
+ if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
+
format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "sanger"; }
if (fasta) { m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); }
if (qual) { m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile); }
+
+ TrimOligos* trimOligos = NULL;
+ int numBarcodes, numPrimers; numBarcodes = 0; numPrimers = 0;
+ if (oligosfile != "") {
+ readOligos(oligosfile);
+ numPrimers = primers.size(); numBarcodes = barcodes.size();
+ //find group read belongs to
+ if (pairedOligos) { trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); numPrimers = pairedPrimers.size(); }
+ else { trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer); }
+
+ }
+ else if (groupfile != "") { readGroup(groupfile); }
ifstream in;
m->openInputFile(fastaQFile, in);
convertTable.push_back(temp);
}
+
+ int count = 0;
while (!in.eof()) {
if (m->control_pressed) { break; }
-
- //read sequence name
- string name = m->getline(in); m->gobble(in);
- if (name == "") { m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else {
- name = name.substr(1);
- m->checkName(name);
- }
-
- //read sequence
- string sequence = m->getline(in); m->gobble(in);
- if (sequence == "") { m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; break; }
-
- //read sequence name
- string name2 = m->getline(in); m->gobble(in);
- if (name2 == "") { m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else {
- name2 = name2.substr(1);
- m->checkName(name2);
- }
-
- //read quality scores
- string quality = m->getline(in); m->gobble(in);
- if (quality == "") { m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
-
- //sanity check sequence length and number of quality scores match
- if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
- if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-
- vector<int> qualScores;
- if (qual) {
- qualScores = convertQual(quality);
- outQual << ">" << name << endl;
- for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
- outQual << endl;
- }
- if (m->control_pressed) { break; }
+ bool ignore;
+ fastqRead2 thisRead = readFastq(in, ignore);
- if (pacbio) {
- if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already
- for (int i = 0; i < qualScores.size(); i++) {
- if (qualScores[i] == 0){ sequence[i] = 'N'; }
+ if (!ignore) {
+ vector<int> qualScores;
+ if (qual) {
+ qualScores = convertQual(thisRead.quality);
+ outQual << ">" << thisRead.seq.getName() << endl;
+ for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
+ outQual << endl;
}
- }
-
- //print sequence info to files
- if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
-
+
+ if (m->control_pressed) { break; }
+
+ if (pacbio) {
+ if (!qual) { qualScores = convertQual(thisRead.quality); } //convert if not done
+ string sequence = thisRead.seq.getAligned();
+ for (int i = 0; i < qualScores.size(); i++) {
+ if (qualScores[i] == 0){ sequence[i] = 'N'; }
+ }
+ thisRead.seq.setAligned(sequence);
+ }
+
+ //print sequence info to files
+ if (fasta) { thisRead.seq.printSequence(outFasta); }
+
+ if (split > 1) {
+ int barcodeIndex, primerIndex, trashCodeLength;
+ if (oligosfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, numBarcodes, numPrimers); }
+ else if (groupfile != "") { trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode"); }
+ else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
+
+ if(trashCodeLength == 0){
+ ofstream out;
+ m->openOutputFileAppend(fastqFileNames[barcodeIndex][primerIndex], out);
+ out << thisRead.wholeRead;
+ out.close();
+ }else{
+ ofstream out;
+ m->openOutputFileAppend(noMatchFile, out);
+ out << thisRead.wholeRead;
+ out.close();
+ }
+ }
+ //report progress
+ if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }
+ if(count > 100000){ break; }
+ count++;
+ }
}
in.close();
if (fasta) { outFasta.close(); }
if (qual) { outQual.close(); }
+
+ //report progress
+ if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } }
+
+ if (split > 1) {
+
+ if (groupfile != "") { delete groupMap; }
+ else if (oligosfile != "") { delete trimOligos; }
+
+ map<string, string>::iterator it;
+ set<string> namesToRemove;
+ for(int i=0;i<fastqFileNames.size();i++){
+ for(int j=0;j<fastqFileNames[0].size();j++){
+ if (fastqFileNames[i][j] != "") {
+ if (namesToRemove.count(fastqFileNames[i][j]) == 0) {
+ if(m->isBlank(fastqFileNames[i][j])){
+ m->mothurRemove(fastqFileNames[i][j]);
+ namesToRemove.insert(fastqFileNames[i][j]);
+ }
+ }
+ }
+ }
+ }
+
+ //remove names for outputFileNames, just cleans up the output
+ for(int i = 0; i < outputNames.size(); i++) {
+ if (namesToRemove.count(outputNames[i]) != 0) {
+ outputNames.erase(outputNames.begin()+i);
+ i--;
+ }
+ }
+ if(m->isBlank(noMatchFile)){ m->mothurRemove(noMatchFile); }
+ else { outputNames.push_back(noMatchFile); outputTypes["fastq"].push_back(noMatchFile); }
+ }
if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
exit(1);
}
}
+//**********************************************************************************************************************
+fastqRead2 ParseFastaQCommand::readFastq(ifstream& in, bool& ignore){
+ try {
+ ignore = false;
+ string wholeRead = "";
+
+ //read sequence name
+ string line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
+ vector<string> pieces = m->splitWhiteSpace(line);
+ string name = ""; if (pieces.size() != 0) { name = pieces[0]; }
+ if (name == "") { m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true; }
+ else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
+ else { name = name.substr(1); }
+
+ //read sequence
+ string sequence = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += sequence + "\n"; }
+ if (sequence == "") { m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
+
+ //read sequence name
+ line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
+ pieces = m->splitWhiteSpace(line);
+ string name2 = ""; if (pieces.size() != 0) { name2 = pieces[0]; }
+ if (name2 == "") { m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
+ else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
+ else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
+
+
+ //read quality scores
+ string quality = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += quality + "\n"; }
+ if (quality == "") { m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
+
+ //sanity check sequence length and number of quality scores match
+ if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
+ if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
+
+ m->checkName(name);
+ Sequence seq(name, sequence);
+ fastqRead2 read(seq, quality, wholeRead);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: " + read.seq.getName() + " " + read.seq.getAligned() + " " + quality + "\n"); }
+
+ return read;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "readFastq");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
vector<int> ParseFastaQCommand::convertQual(string qual) {
try {
}
}
//**********************************************************************************************************************
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, int numBarcodes, int numPrimers) {
+ try {
+ int success = 1;
+ string trashCode = "";
+ int currentSeqsDiffs = 0;
+
+ Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
+ QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
+
+ if(linker.size() != 0){
+ success = trimOligos->stripLinker(currSeq, currQual);
+ if(success > ldiffs) { trashCode += 'k'; }
+ else{ currentSeqsDiffs += success; }
+
+ }
+
+ if(numBarcodes != 0){
+ success = trimOligos->stripBarcode(currSeq, currQual, barcode);
+ if(success > bdiffs) { trashCode += 'b'; }
+ else{ currentSeqsDiffs += success; }
+ }
+
+ if(spacer.size() != 0){
+ success = trimOligos->stripSpacer(currSeq, currQual);
+ if(success > sdiffs) { trashCode += 's'; }
+ else{ currentSeqsDiffs += success; }
+
+ }
+
+ if(numPrimers != 0){
+ success = trimOligos->stripForward(currSeq, currQual, primer, true);
+ if(success > pdiffs) { trashCode += 'f'; }
+ else{ currentSeqsDiffs += success; }
+ }
+
+ if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
+
+ if(revPrimer.size() != 0){
+ success = trimOligos->stripReverse(currSeq, currQual);
+ if(!success) { trashCode += 'r'; }
+ }
+
+
+ return trashCode.length();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "findGroup");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, string groupMode) {
+ try {
+ string trashCode = "";
+ primer = 0;
+
+ string group = groupMap->getGroup(thisRead.seq.getName());
+ if (group == "not found") { trashCode += "g"; } //scrap for group
+ else { //find file group
+ map<string, int>::iterator it = barcodes.find(group);
+ if (it != barcodes.end()) {
+ barcode = it->second;
+ }else { trashCode += "g"; }
+ }
+
+ return trashCode.length();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "findGroup");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+
+bool ParseFastaQCommand::readOligos(string oligoFile){
+ try {
+ ifstream inOligos;
+ m->openInputFile(oligoFile, inOligos);
+
+ string type, oligo, roligo, group;
+ bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
+
+ int indexPrimer = 0;
+ int indexBarcode = 0;
+ int indexPairedPrimer = 0;
+ int indexPairedBarcode = 0;
+ set<string> uniquePrimers;
+ set<string> uniqueBarcodes;
+
+ while(!inOligos.eof()){
+
+ inOligos >> type;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+
+ if(type[0] == '#'){
+ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ m->gobble(inOligos);
+ }
+ else{
+ m->gobble(inOligos);
+ //make type case insensitive
+ for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
+
+ inOligos >> oligo;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
+
+ for(int i=0;i<oligo.length();i++){
+ oligo[i] = toupper(oligo[i]);
+ if(oligo[i] == 'U') { oligo[i] = 'T'; }
+ }
+
+ if(type == "FORWARD"){
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ //check for repeat barcodes
+ map<string, int>::iterator itPrime = primers.find(oligo);
+ if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
+
+ primers[oligo]=indexPrimer; indexPrimer++;
+ primerNameVector.push_back(group);
+ }
+ else if (type == "PRIMER"){
+ m->gobble(inOligos);
+
+ inOligos >> roligo;
+
+ for(int i=0;i<roligo.length();i++){
+ roligo[i] = toupper(roligo[i]);
+ if(roligo[i] == 'U') { roligo[i] = 'T'; }
+ }
+ roligo = reverseOligo(roligo);
+
+ group = "";
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { group += c; }
+ }
+
+ oligosPair newPrimer(oligo, roligo);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
+
+ //check for repeat barcodes
+ string tempPair = oligo+roligo;
+ if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine(); }
+ else { uniquePrimers.insert(tempPair); }
+
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); } }
+
+ pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+ primerNameVector.push_back(group);
+ hasPrimer = true;
+ }
+ else if(type == "REVERSE"){
+ //Sequence oligoRC("reverse", oligo);
+ //oligoRC.reverseComplement();
+ string oligoRC = reverseOligo(oligo);
+ revPrimer.push_back(oligoRC);
+ }
+ else if(type == "BARCODE"){
+ inOligos >> group;
+
+ //barcode lines can look like BARCODE atgcatgc groupName - for 454 seqs
+ //or BARCODE atgcatgc atgcatgc groupName - for illumina data that has forward and reverse info
+
+ string temp = "";
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ else { temp += c; }
+ }
+
+ //then this is illumina data with 4 columns
+ if (temp != "") {
+ hasPairedBarcodes = true;
+ string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+ group = temp;
+
+ for(int i=0;i<reverseBarcode.length();i++){
+ reverseBarcode[i] = toupper(reverseBarcode[i]);
+ if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
+ }
+
+ reverseBarcode = reverseOligo(reverseBarcode);
+ oligosPair newPair(oligo, reverseBarcode);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+ //check for repeat barcodes
+ string tempPair = oligo+reverseBarcode;
+ if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse + " is in your oligos file already, disregarding."); m->mothurOutEndLine(); }
+ else { uniqueBarcodes.insert(tempPair); }
+
+ pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+ barcodeNameVector.push_back(group);
+ }else {
+ //check for repeat barcodes
+ map<string, int>::iterator itBar = barcodes.find(oligo);
+ if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ barcodes[oligo]=indexBarcode; indexBarcode++;
+ barcodeNameVector.push_back(group);
+ }
+ }else if(type == "LINKER"){
+ linker.push_back(oligo);
+ }else if(type == "SPACER"){
+ spacer.push_back(oligo);
+ }
+ else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
+ }
+ m->gobble(inOligos);
+ }
+ inOligos.close();
+
+ if (hasPairedBarcodes || hasPrimer) {
+ pairedOligos = true;
+ if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
+ }
+
+ //add in potential combos
+ if(barcodeNameVector.size() == 0){
+ barcodes[""] = 0;
+ barcodeNameVector.push_back("");
+ }
+
+ if(primerNameVector.size() == 0){
+ primers[""] = 0;
+ primerNameVector.push_back("");
+ }
+
+ fastqFileNames.resize(barcodeNameVector.size());
+ for(int i=0;i<fastqFileNames.size();i++){
+ fastqFileNames[i].assign(primerNameVector.size(), "");
+ }
+
+
+ set<string> uniqueNames; //used to cleanup outputFileNames
+ if (pairedOligos) {
+ for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+ for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+
+ string primerName = primerNameVector[itPrimer->first];
+ string barcodeName = barcodeNameVector[itBar->first];
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastqFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->first];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->first];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+ }
+ }
+
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = comboGroupName;
+ fastqFileName = getOutputFileName("fastq", variables);
+ if (uniqueNames.count(fastqFileName) == 0) {
+ outputNames.push_back(fastqFileName);
+ outputTypes["fastq"].push_back(fastqFileName);
+ uniqueNames.insert(fastqFileName);
+ }
+
+ fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
+ m->openOutputFile(fastqFileName, temp); temp.close();
+
+ }
+ }
+ }
+ }else {
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+
+ string primerName = primerNameVector[itPrimer->second];
+ string barcodeName = barcodeNameVector[itBar->second];
+
+ if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+ else {
+ string comboGroupName = "";
+ string fastqFileName = "";
+
+ if(primerName == ""){
+ comboGroupName = barcodeNameVector[itBar->second];
+ }
+ else{
+ if(barcodeName == ""){
+ comboGroupName = primerNameVector[itPrimer->second];
+ }
+ else{
+ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+ }
+ }
+
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = comboGroupName;
+ fastqFileName = getOutputFileName("fastq", variables);
+ if (uniqueNames.count(fastqFileName) == 0) {
+ outputNames.push_back(fastqFileName);
+ outputTypes["fastq"].push_back(fastqFileName);
+ uniqueNames.insert(fastqFileName);
+ }
+
+ fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
+ m->openOutputFile(fastqFileName, temp); temp.close();
+
+ }
+ }
+ }
+ }
+
+ ofstream temp;
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = "scrap";
+ noMatchFile = getOutputFileName("fastq", variables);
+ m->openOutputFile(noMatchFile, temp); temp.close();
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "getOligos");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+bool ParseFastaQCommand::readGroup(string groupfile){
+ try {
+ fastqFileNames.clear();
+
+ groupMap = new GroupMap();
+ groupMap->readMap(groupfile);
+
+ //like barcodeNameVector - no primer names
+ vector<string> groups = groupMap->getNamesOfGroups();
+
+ fastqFileNames.resize(groups.size());
+ for (int i = 0; i < fastqFileNames.size(); i++) {
+ for (int j = 0; j < 1; j++) {
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = groups[i];
+ string thisFilename = getOutputFileName("fastq",variables);
+ outputNames.push_back(thisFilename);
+ outputTypes["fastq"].push_back(thisFilename);
+
+ ofstream temp;
+ m->openOutputFileBinary(thisFilename, temp); temp.close();
+ fastqFileNames[i].push_back(thisFilename);
+ barcodes[groups[i]] = i;
+ }
+ }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+ variables["[group]"] = "scrap";
+ noMatchFile = getOutputFileName("fastq",variables);
+ m->mothurRemove(noMatchFile);
+
+ return true;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "readGroup");
+ exit(1);
+ }
+}
+//********************************************************************/
+string ParseFastaQCommand::reverseOligo(string oligo){
+ try {
+ string reverse = "";
+
+ for(int i=oligo.length()-1;i>=0;i--){
+
+ if(oligo[i] == 'A') { reverse += 'T'; }
+ else if(oligo[i] == 'T'){ reverse += 'A'; }
+ else if(oligo[i] == 'U'){ reverse += 'A'; }
+
+ else if(oligo[i] == 'G'){ reverse += 'C'; }
+ else if(oligo[i] == 'C'){ reverse += 'G'; }
+
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }
+
+ else if(oligo[i] == 'M'){ reverse += 'K'; }
+ else if(oligo[i] == 'K'){ reverse += 'M'; }
+
+ else if(oligo[i] == 'W'){ reverse += 'W'; }
+ else if(oligo[i] == 'S'){ reverse += 'S'; }
+
+ else if(oligo[i] == 'B'){ reverse += 'V'; }
+ else if(oligo[i] == 'V'){ reverse += 'B'; }
+
+ else if(oligo[i] == 'D'){ reverse += 'H'; }
+ else if(oligo[i] == 'H'){ reverse += 'D'; }
+
+ else { reverse += 'N'; }
+ }
+
+
+ return reverse;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ParseFastaQCommand", "reverseOligo");
+ exit(1);
+ }
+}
+
+
+//**********************************************************************************************************************
#include "command.hpp"
+#include "trimoligos.h"
+#include "sequence.hpp"
+#include "groupmap.h"
+
+struct fastqRead2 {
+ string quality;
+ Sequence seq;
+ string wholeRead;
+
+ fastqRead2() { };
+ fastqRead2(Sequence s, string q, string w) : seq(s), quality(q), wholeRead(w){};
+ ~fastqRead2() {};
+};
+
class ParseFastaQCommand : public Command {
private:
vector<string> outputNames;
- string outputDir, fastaQFile, format;
- bool abort, fasta, qual, pacbio;
+ string outputDir, fastaQFile, format, oligosfile, groupfile;
+ bool abort, fasta, qual, pacbio, pairedOligos;
+ int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split;
+ GroupMap* groupMap;
+
+ //oligos file data structures
+ vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
+ map<string, int> barcodes;
+ map<string, int> primers;
+ map<int, oligosPair> pairedBarcodes;
+ map<int, oligosPair> pairedPrimers;
+ vector<vector<string> > fastqFileNames;
+ string noMatchFile;
vector<int> convertQual(string);
vector<char> convertTable;
+ bool readOligos(string oligosFile);
+ bool readGroup(string oligosFile);
+ string reverseOligo(string oligo);
+ fastqRead2 readFastq(ifstream&, bool&);
+ int findGroup(fastqRead2, int&, int&, TrimOligos*&, int, int);
+ int findGroup(fastqRead2, int&, int&, string);
+
};
#endif
try {
string pattern = "";
- if (type == "list") { pattern = "[filename],[group],list"; }
+ if (type == "list") { pattern = "[filename],[group],[distance],list"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- //set fileroot
- map<string, string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
-
- //fill filehandles with neccessary ofstreams
- int i;
- ofstream* temp;
- vector<string> gGroups;
- if (groupfile != "") { gGroups = groupMap->getNamesOfGroups(); }
- else { gGroups = ct.getNamesOfGroups(); }
-
- for (i=0; i<gGroups.size(); i++) {
- temp = new ofstream;
- filehandles[gGroups[i]] = temp;
-
- variables["[group]"] = gGroups[i];
- string filename = getOutputFileName("list",variables);
- outputNames.push_back(filename); outputTypes["list"].push_back(filename);
- m->openOutputFile(filename, *temp);
- }
-
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> processedLabels;
set<string> userLabels = labels;
if (m->control_pressed) {
delete list; if (groupfile != "") { delete groupMap; }
- for (i=0; i<gGroups.size(); i++) { (*(filehandles[gGroups[i]])).close(); delete filehandles[gGroups[i]]; }
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
}
while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
if (m->control_pressed) {
delete list; if (groupfile != "") { delete groupMap; }
- for (i=0; i<gGroups.size(); i++) { (*(filehandles[gGroups[i]])).close(); delete filehandles[gGroups[i]]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
return 0;
}
if (m->control_pressed) {
if (groupfile != "") { delete groupMap; }
- for (i=0; i<gGroups.size(); i++) { (*(filehandles[gGroups[i]])).close(); delete filehandles[gGroups[i]]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
return 0;
}
if (m->control_pressed) {
if (groupfile != "") { delete groupMap; }
- for (i=0; i<gGroups.size(); i++) { (*(filehandles[gGroups[i]])).close(); delete filehandles[gGroups[i]]; }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
return 0;
}
delete list;
}
- for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
- (*(filehandles[it3->first])).close();
- delete it3->second;
- }
-
if (groupfile != "") { delete groupMap; }
if (m->control_pressed) {
/**********************************************************************************************************************/
int ParseListCommand::parse(ListVector* thisList) {
try {
+ map<string, ofstream*> filehandles;
+ map<string, ofstream*>::iterator it3;
+
+ //set fileroot
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+ variables["[distance]"] = thisList->getLabel();
+
+ //fill filehandles with neccessary ofstreams
+ ofstream* temp;
+ vector<string> gGroups;
+ if (groupfile != "") { gGroups = groupMap->getNamesOfGroups(); }
+ else { gGroups = ct.getNamesOfGroups(); }
+
+ for (int i=0; i<gGroups.size(); i++) {
+ temp = new ofstream;
+ filehandles[gGroups[i]] = temp;
+
+ variables["[group]"] = gGroups[i];
+ string filename = getOutputFileName("list",variables);
+ m->openOutputFile(filename, *temp);
+ outputNames.push_back(filename); outputTypes["list"].push_back(filename);
+ }
+
map<string, string> groupVector;
+ map<string, string> groupLabels;
map<string, string>::iterator itGroup;
map<string, int> groupNumBins;
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
groupNumBins[it3->first] = 0;
groupVector[it3->first] = "";
+ groupLabels[it3->first] = "label\tnumOtus\t";
}
-
+ vector<string> binLabels = thisList->getLabels();
for (int i = 0; i < thisList->getNumBins(); i++) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { break; }
map<string, string> groupBins;
string bin = list->get(i);
//print parsed bin info to files
for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
- groupVector[itGroup->first] += itGroup->second + '\t';
+ groupVector[itGroup->first] += itGroup->second + '\t';
+ groupLabels[itGroup->first] += binLabels[i] + '\t';
}
}
+ if (m->control_pressed) {
+ for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+ (*(filehandles[it3->first])).close();
+ delete it3->second;
+ }
+ return 0;
+ }
+
//end list vector
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+ (*(filehandles[it3->first])) << groupLabels[it3->first] << endl;
(*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
+ (*(filehandles[it3->first])).close();
+ delete it3->second;
}
return 0;
set<string> labels;
bool abort, allLines;
vector<string> outputNames;
-
- map<string, ofstream*> filehandles;
- map<string, ofstream*>::iterator it3;
};
int readCount(set<string>);
bool readOligos();
bool readEcoli();
- int driverPcr(string, string, string, string, set<string>&, linePair, int&, int&, bool&);
+ int driverPcr(string, string, string, string, set<string>&, linePair, int&, bool&);
int createProcesses(string, string, string, set<string>&);
bool isAligned(string, map<int, int>&);
string reverseOligo(string);
nomatch = nm;
keepprimer = kp;
keepdots = kd;
+ end = en;
start = st;
- end = en;
length = l;
fstart = fst;
fend = fen;
set<int> lengths;
//pdiffs, bdiffs, primers, barcodes, revPrimers
map<string, int> faked;
- vector< set<int> > locations; //locations[0] = beginning locations, locations[1] = ending locations
- locations.resize(2);
+ set<int> locations; //locations = beginning locations
+
TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
else {
currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
if (pDataArray->fileAligned) {
- thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1);
+ thisPStart = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
}
}
else {
currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
if (pDataArray->fileAligned) {
- thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]);
+ thisPStart = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
}
}
else {
currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
if (pDataArray->fileAligned) {
- thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]);
+ thisPEnd = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
}
else {
currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
if (pDataArray->fileAligned) {
- thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1);
+ thisPEnd = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
}
if(goodSeq == 1) {
currSeq.printSequence(goodFile);
if (locationsString != "") { locationsFile << locationsString; }
- if (thisPStart != -1) { locations[0].insert(thisPStart); }
- if (thisPEnd != -1) { locations[1].insert(thisPEnd); }
+ if (thisPStart != -1) { locations.insert(thisPStart); }
}
else {
pDataArray->badSeqNames.insert(currSeq.getName());
if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: fileAligned = " + toString(pDataArray->fileAligned) +'\n'); }
if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value
- if ((locations[0].size() > 1) || (locations[1].size() > 1)) { pDataArray->adjustNeeded = true; }
- if (pDataArray->primers.size() != 0) { set<int>::iterator it = locations[0].begin(); pDataArray->pstart = *it; }
- if (pDataArray->revPrimer.size() != 0) { set<int>::reverse_iterator it2 = locations[1].rbegin(); pDataArray->pend = *it2; }
+ if (locations.size() > 1) { pDataArray->adjustNeeded = true; }
+ if (pDataArray->primers.size() != 0) { set<int>::iterator it = locations.begin(); pDataArray->pstart = *it; }
}
return 0;
exit(1);
}
}
+/**************************************************************************************************/
+
+vector<string> PhyloTree::getSeqs(string seqTaxonomy){
+ try {
+ string taxCopy = seqTaxonomy;
+ vector<string> names;
+ map<string, int>::iterator childPointer;
+
+ int currentNode = 0;
+
+ m->removeConfidences(seqTaxonomy);
+
+ string taxon;
+ while(seqTaxonomy != ""){
+
+ if (m->control_pressed) { return names; }
+
+ taxon = getNextTaxon(seqTaxonomy, "");
+
+ if (m->debug) { m->mothurOut(taxon +'\n'); }
+
+ if (taxon == "") { m->mothurOut(taxCopy + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); break; }
+
+ childPointer = tree[currentNode].children.find(taxon);
+
+ if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
+ currentNode = childPointer->second;
+ }
+ else{ //otherwise, error this taxonomy is not in tree
+ m->mothurOut("[ERROR]: " + taxCopy + " is not in taxonomy tree, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+ }
+
+ if (seqTaxonomy == "") { names = tree[currentNode].accessions; }
+ }
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloTree", "getSeqs");
+ exit(1);
+ }
+}
/**************************************************************************************************/
string getName(int i);
int getGenusIndex(string seqName);
string getFullTaxonomy(string); //pass a sequence name return taxonomy
+ vector<string> getSeqs(string); //returns names of sequences in given taxonomy
int getMaxLevel() { return maxLevel; }
int getNumSeqs() { return numSeqs; }
}
//print listvector
- list.print(outList);
+ if (!m->printedListHeaders) { list.printHeaders(outList); }
+ list.print(outList);
+
if (countfile == "") {
//print rabund
list.getRAbundVector().print(outRabund);
process++;
}else if (pid == 0){
string locationsFile = toString(getpid()) + ".temp";
- num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, pend, adjustNeeded);
+ num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
//pass numSeqs to parent
ofstream out;
string tempFile = filename + toString(getpid()) + ".num.temp";
m->openOutputFile(tempFile, out);
- out << pstart << '\t' << pend << '\t' << adjustNeeded << endl;
+ out << pstart << '\t' << adjustNeeded << endl;
out << num << '\t' << badSeqNames.size() << endl;
for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
out << (*it) << endl;
}
string locationsFile = toString(getpid()) + ".temp";
- num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, pend, adjustNeeded);
+ num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
string tempFile = filename + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
int numBadNames = 0; string name = "";
- int tpstart = -1; int tpend = -1; bool tempAdjust = false;
+ int tpstart = -1; bool tempAdjust = false;
if (!in.eof()) {
- in >> tpstart >> tpend >> tempAdjust; m->gobble(in);
+ in >> tpstart >> tempAdjust; m->gobble(in);
if (tempAdjust) { adjustNeeded = true; }
if (tpstart != -1) {
if (tpstart != pstart) { adjustNeeded = true; }
if (tpstart < pstart) { pstart = tpstart; } //smallest start
}
- if (tpend != -1) {
- if (tpend != pend) { adjustNeeded = true; }
- if (tpend > pend) { pend = tpend; } //largest end
- }
int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in);
}
for (int j = 0; j < numBadNames; j++) {
}
//do your part
- num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, pend, adjustNeeded);
+ num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, adjustNeeded);
processIDS.push_back(processors-1);
//Wait until all threads have terminated.
if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; }
if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; }
} //smallest start
- if (pDataArray[i]->pend != -1) {
- if (pDataArray[i]->pend != pend) { adjustNeeded = true; }
- if (pDataArray[i]->pend > pend) { pend = pDataArray[i]->pend; }
- } //largest end
-
+
for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it); }
CloseHandle(hThreadArray[i]);
delete pDataArray[i];
}
#endif
- if (fileAligned && adjustNeeded) { adjustDots(goodFileName, locationsFile, pstart, pend); }
+
+
+
+ if (fileAligned) {
+ //find pend - pend is the biggest ending value, but we must account for when we adjust the start. That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
+ ifstream inLocations;
+ m->openInputFile(locationsFile, inLocations);
+
+ while(!inLocations.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ string name = "";
+ int thisStart = -1; int thisEnd = -1;
+ if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
+ if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+ else { pend = -1; break; }
+
+ int myDiff = 0;
+ if (pstart != -1) {
+ if (thisStart != -1) {
+ if (thisStart != pstart) { myDiff += (thisStart - pstart); }
+ }
+ }
+
+ int myEnd = thisEnd + myDiff;
+ //cout << name << '\t' << thisStart << '\t' << thisEnd << " diff = " << myDiff << '\t' << myEnd << endl;
+
+ if (thisEnd != -1) {
+ if (myEnd > pend) { pend = myEnd; }
+ }
+
+ }
+ inLocations.close();
+
+ adjustDots(goodFileName, locationsFile, pstart, pend);
+ }
return num;
}
//**********************************************************************************************************************
-int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, int& pend, bool& adjustNeeded){
+int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, bool& adjustNeeded){
try {
ofstream goodFile;
m->openOutputFile(goodFasta, goodFile);
bool done = false;
int count = 0;
set<int> lengths;
- vector< set<int> > locations; //locations[0] = beginning locations, locations[1] = ending locations
- locations.resize(2);
+ set<int> locations; //locations[0] = beginning locations,
//pdiffs, bdiffs, primers, barcodes, revPrimers
map<string, int> faked;
if(goodSeq == 1) {
currSeq.printSequence(goodFile);
if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); }
+ if (thisPStart != -1) { locations.insert(thisPStart); }
if (locationsString != "") { locationsFile << locationsString; }
- if (thisPStart != -1) { locations[0].insert(thisPStart); }
- if (thisPEnd != -1) { locations[1].insert(thisPEnd); }
}
else {
badSeqNames.insert(currSeq.getName());
if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); }
if (fileAligned && !keepdots) { //print out smallest start value and largest end value
- if ((locations[0].size() > 1) || (locations[1].size() > 1)) { adjustNeeded = true; }
- if (primers.size() != 0) { set<int>::iterator it = locations[0].begin(); pstart = *it; }
- if (revPrimer.size() != 0) { set<int>::reverse_iterator it2 = locations[1].rbegin(); pend = *it2; }
+ if (locations.size() > 1) { adjustNeeded = true; }
+ if (primers.size() != 0) { set<int>::iterator it = locations.begin(); pstart = *it; }
}
return count;
set<int> lengths;
//cout << pstart << '\t' << pend << endl;
+ //if (pstart > pend) { //swap them
while(!inFasta.eof()) {
if(m->control_pressed) { break; }
int thisStart = -1; int thisEnd = -1;
if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); }
if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); }
+
+
//cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
//cout << seq.getName() << '\t' << pstart << '\t' << pend << endl;
//cout << "final lengths = \n";
//for (set<int>::iterator it = lengths.begin(); it != lengths.end(); it++) {
- // cout << *it << endl;
+ //cout << *it << endl;
+ // cout << lengths.count(*it) << endl;
// }
return 0;
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine();
}else {
+ if (processors != 1) { m->mothurOut("When using running without group information mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
if (namefile != "") { readNameFile(); }
//reads fasta file and return number of seqs
CommandParameter pmintm("mintm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmintm);
CommandParameter pmaxtm("maxtm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxtm);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors);
- CommandParameter potunumber("otunumber", "Number", "", "-1", "", "", "","",false,true,true); parameters.push_back(potunumber);
+ CommandParameter potunumber("otulabel", "String", "", "", "", "", "","",false,true,true); parameters.push_back(potunumber);
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pcutoff);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
try {
string helpString = "";
helpString += "The primer.design allows you to identify sequence fragments that are specific to particular OTUs.\n";
- helpString += "The primer.design command parameters are: list, fasta, name, count, otunumber, cutoff, length, pdiffs, mintm, maxtm, processors and label.\n";
+ helpString += "The primer.design command parameters are: list, fasta, name, count, otulabel, cutoff, length, pdiffs, mintm, maxtm, processors and label.\n";
helpString += "The list parameter allows you to provide a list file and is required.\n";
helpString += "The fasta parameter allows you to provide a fasta file and is required.\n";
helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
helpString += "The count parameter allows you to provide a count file associated with your fasta file.\n";
helpString += "The label parameter is used to indicate the label you want to use from your list file.\n";
- helpString += "The otunumber parameter is used to indicate the otu you want to use from your list file. It is required.\n";
+ helpString += "The otulabel parameter is used to indicate the otu you want to use from your list file. It is required.\n";
helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
helpString += "The length parameter is used to indicate the length of the primer. The default is 18.\n";
helpString += "The mintm parameter is used to indicate minimum melting temperature.\n";
temp = validParameter.validFile(parameters, "maxtm", false); if (temp == "not found") { temp = "-1"; }
m->mothurConvert(temp, maxTM);
- temp = validParameter.validFile(parameters, "otunumber", false); if (temp == "not found") { temp = "-1"; }
- m->mothurConvert(temp, otunumber);
- if (otunumber < 1) { m->mothurOut("[ERROR]: You must provide an OTU number, aborting.\n"); abort = true; }
+ otulabel = validParameter.validFile(parameters, "otulabel", false); if (otulabel == "not found") { temp = ""; }
+ if (otulabel == "") { m->mothurOut("[ERROR]: You must provide an OTU label, aborting.\n"); abort = true; }
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
//reads list file and selects the label the users specified or the first label
getListVector();
- if (otunumber > list->getNumBins()) { m->mothurOut("[ERROR]: You selected an OTU number larger than the number of OTUs you have in your list file, quitting.\n"); return 0; }
+ vector<string> binLabels = list->getLabels();
+ int binIndex = findIndex(otulabel, binLabels);
+ if (binIndex == -1) { m->mothurOut("[ERROR]: You selected an OTU label that is not in your in your list file, quitting.\n"); return 0; }
map<string, int> nameMap;
unsigned long int numSeqs; //used to sanity check the files. numSeqs = total seqs for namefile and uniques for count.
m->mothurOut("Done.\n\n");
- set<string> primers = getPrimer(conSeqs[otunumber-1]);
+ set<string> primers = getPrimer(conSeqs[binIndex]);
if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
ofstream outSum;
m->openOutputFile(consSummaryFile, outSum);
- outSum << "PrimerOtu: " << otunumber << " Members: " << list->get(otunumber-1) << endl << "Primers\tminTm\tmaxTm" << endl;
+ outSum << "PrimerOtu: " << otulabel << " Members: " << list->get(binIndex) << endl << "Primers\tminTm\tmaxTm" << endl;
//find min and max melting points
vector<double> minTms;
outSum.close();
//check each otu's conseq for each primer in otunumber
- set<int> otuToRemove = createProcesses(consSummaryFile, minTms, maxTms, primers, conSeqs);
+ set<int> otuToRemove = createProcesses(consSummaryFile, minTms, maxTms, primers, conSeqs, binIndex);
if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
mvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
mvariables["[extension]"] = m->getExtension(listfile);
string newListFile = getOutputFileName("list", mvariables);
- outputNames.push_back(newListFile); outputTypes["list"].push_back(newListFile);
- ofstream outList;
- m->openOutputFile(newListFile, outList);
+ ofstream outListTemp;
+ m->openOutputFile(newListFile+".temp", outListTemp);
- outList << list->getLabel() << '\t' << (list->getNumBins()-otuToRemove.size()) << '\t';
+ outListTemp << list->getLabel() << '\t' << (list->getNumBins()-otuToRemove.size()) << '\t';
+ string headers = "label\tnumOtus\t";
for (int j = 0; j < list->getNumBins(); j++) {
if (m->control_pressed) { break; }
//good otus
if (otuToRemove.count(j) == 0) {
string bin = list->get(j);
- if (bin != "") { outList << bin << '\t'; }
+ if (bin != "") { outListTemp << bin << '\t'; headers += binLabels[j] + '\t'; }
}
}
- outList << endl;
+ outListTemp << endl;
+ outListTemp.close();
+
+ ofstream outList;
+ m->openOutputFile(newListFile, outList);
+ outList << headers << endl;
outList.close();
+ m->appendFiles(newListFile+".temp", newListFile);
+ m->mothurRemove(newListFile+".temp");
+ outputNames.push_back(newListFile); outputTypes["list"].push_back(newListFile);
if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
}
}
/**************************************************************************************************/
-set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs) {
+set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int binIndex) {
try {
vector<int> processIDS;
//clear old file because we append in driver
m->mothurRemove(newSummaryFile + toString(getpid()) + ".temp");
- otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed);
+ otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed, binIndex);
string tempFile = toString(getpid()) + ".otus2Remove.temp";
ofstream outTemp;
}
//do my part
- otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+ otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed, binIndex);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
string extension = toString(i) + ".temp";
m->mothurRemove(newSummaryFile+extension);
- primerDesignData* tempPrimer = new primerDesignData((newSummaryFile+extension), m, lines[i].start, lines[i].end, minTms, maxTms, primers, conSeqs, pdiffs, otunumber, length, i);
+ primerDesignData* tempPrimer = new primerDesignData((newSummaryFile+extension), m, lines[i].start, lines[i].end, minTms, maxTms, primers, conSeqs, pdiffs, binIndex, length, i);
pDataArray.push_back(tempPrimer);
processIDS.push_back(i);
//using the main process as a worker saves time and memory
- otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+ otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed, binIndex);
//Wait until all threads have terminated.
WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
}
}
//**********************************************************************************************************************
-set<int> PrimerDesignCommand::driver(string summaryFileName, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int start, int end, int& numBinsProcessed){
+set<int> PrimerDesignCommand::driver(string summaryFileName, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int start, int end, int& numBinsProcessed, int binIndex){
try {
set<int> otuToRemove;
if (m->control_pressed) { break; }
- if (i != (otunumber-1)) {
+ if (i != (binIndex)) {
int primerIndex = 0;
for (set<string>::iterator it = primers.begin(); it != primers.end(); it++) {
vector<int> primerStarts;
}
}
//**********************************************************************************************************************
+int PrimerDesignCommand::findIndex(string binLabel, vector<string> binLabels){
+ try {
+ int index = -1;
+ for (int i = 0; i < binLabels.size(); i++){
+ if (m->control_pressed) { return index; }
+ if (m->isLabelEquivalent(binLabel, binLabels[i])) { index = i; break; }
+ }
+ return index;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PrimerDesignCommand", "findIndex");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
};
bool abort, allLines, large;
- int cutoff, pdiffs, length, otunumber, processors, alignedLength;
- string outputDir, listfile, namefile, countfile, fastafile, label;
+ int cutoff, pdiffs, length, processors, alignedLength;
+ string outputDir, listfile, otulabel, namefile, countfile, fastafile, label;
double minTM, maxTM;
ListVector* list;
vector<string> outputNames;
bool findPrimer(string, string, vector<int>&, vector<int>&, vector<int>&);
int findMeltingPoint(string primer, double&, double&);
- set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&);
- set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&);
+ set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int);
+ set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&, int);
vector< vector< vector<unsigned int> > > driverGetCounts(map<string, int>&, unsigned long int&, vector<unsigned int>&, unsigned long long&, unsigned long long&);
vector<Sequence> createProcessesConSeqs(map<string, int>&, unsigned long int&);
+ int findIndex(string binLabel, vector<string> binLabels);
};
MothurOut* m;
int start;
int end;
- int pdiffs, threadID, otunumber, length;
+ int pdiffs, threadID, length, binIndex;
set<string> primers;
vector<double> minTms, maxTms;
set<int> otusToRemove;
maxTms = max;
primers = pri;
consSeqs = seqs;
- otunumber = otun;
+ binIndex = otun;
length = l;
threadID = tid;
numBinsProcessed = 0;
if (pDataArray->m->control_pressed) { break; }
- if (i != (pDataArray->otunumber-1)) {
+ if (i != (pDataArray->binIndex)) {
int primerIndex = 0;
for (set<string>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
vector<int> primerStarts;
if(seqName != sequence.getName()){
m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
- m->mothurOutEndLine();
+ m->mothurOutEndLine(); m->control_pressed = true;
}
int end;
m->openOutputFile(filename, out);
out <<"OTU\tMean decrease accuracy\n";
for (int i = 0; i < globalVariableRanks.size(); i++) {
- out << m->currentBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl;
+ out << m->currentSharedBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl;
}
out.close();
return 0;
else if (type == "count") { pattern = "[filename],pick,[extension]"; }
else if (type == "list") { pattern = "[filename],pick,[extension]"; }
else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; }
- else if (type == "design") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "design") { pattern = "[filename],[tag],pick,[extension]"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
m->setGroups(groupsToKeep);
m->clearAllGroups();
m->saveNextLabel = "";
- m->printedHeaders = false;
- m->currentBinLabels.clear();
- m->binLabelsInFile.clear();
+ m->printedSharedHeaders = false;
+ m->currentSharedBinLabels.clear();
+ m->sharedBinLabelsInFile.clear();
InputData input(sharedfile, "sharedfile");
lookup = input.getSharedRAbundVectors();
map<string, string> variables;
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
variables["[extension]"] = m->getExtension(listfile);
- string outputFileName = getOutputFileName("list", variables);
-
-
- ofstream out;
- m->openOutputFile(outputFileName, out);
-
+
ifstream in;
m->openInputFile(listfile, in);
//read in list vector
ListVector list(in);
+
+ variables["[tag]"] = list.getLabel();
+ string outputFileName = getOutputFileName("list", variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ vector<string> binLabels = list.getLabels();
+ vector<string> newBinLabels;
//make a new list vector
ListVector newList;
//if there are names in this bin add to new list
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
- newList.push_back(newNames);
+ newList.push_back(newNames);
+ newBinLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
newList.print(out);
}
m->gobble(in);
+ out.close();
}
in.close();
- out.close();
- if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); }
- outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+ if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); }
m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
return 0;
else if (type == "name") { pattern = "[filename],pick,[extension]"; }
else if (type == "group") { pattern = "[filename],pick,[extension]"; }
else if (type == "count") { pattern = "[filename],pick,[extension]"; }
- else if (type == "list") { pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]"; }
+ else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; }
else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; }
else if (type == "alignreport") { pattern = "[filename],pick.align.report"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
map<string, string> variables;
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
variables["[extension]"] = m->getExtension(listfile);
- string outputFileName = getOutputFileName("list", variables);
- ofstream out;
- m->openOutputFile(outputFileName, out);
ifstream in;
m->openInputFile(listfile, in);
bool wroteSomething = false;
while(!in.eof()){
+
//read in list vector
ListVector list(in);
//make a new list vector
ListVector newList;
newList.setLabel(list.getLabel());
+
+ variables["[distance]"] = list.getLabel();
+ string outputFileName = getOutputFileName("list", variables);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ if (m->control_pressed) { in.close(); out.close(); return 0; }
+
+ vector<string> binLabels = list.getLabels();
+ vector<string> newBinLabels;
//for each bin
for (int i = 0; i < list.getNumBins(); i++) {
//if there are names in this bin add to new list
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
- newList.push_back(newNames);
+ newList.push_back(newNames);
+ newBinLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
newList.print(out);
}
m->gobble(in);
+ out.close();
}
in.close();
- out.close();
+
if (wroteSomething == false) { m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
- outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
return 0;
bool wroteSomething = false;
string snumBins = toString(list->getNumBins());
+ vector<string> binLabels = list->getLabels();
+ vector<string> newBinLabels;
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { delete list; return 0;}
if (names.count(m->getSimpleLabel(otuLabel)) == 0) {
newList.push_back(list->get(i));
+ newBinLabels.push_back(binLabels[i]);
}else { removedCount++; }
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
newList.print(out);
}
out.close();
if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
//is this otu on the list
- if (names.count(m->getSimpleLabel(m->currentBinLabels[i])) == 0) {
+ if (names.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) == 0) {
wroteSomething = true;
- newLabels.push_back(m->currentBinLabels[i]);
+ newLabels.push_back(m->currentSharedBinLabels[i]);
for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
}
for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
- m->currentBinLabels = newLabels;
+ m->currentSharedBinLabels = newLabels;
newLookup[0]->printHeaders(out);
if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
//is this otu on the list
- if (labels.count(m->getSimpleLabel(m->currentBinLabels[i])) == 0) {
+ if (labels.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) == 0) {
wroteSomething = true;
- newLabels.push_back(m->currentBinLabels[i]);
+ newLabels.push_back(m->currentSharedBinLabels[i]);
for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
}
for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
- m->currentBinLabels = newLabels;
+ m->currentSharedBinLabels = newLabels;
newLookup[0]->printHeaders(out);
newList.setLabel(list->getLabel());
int removedCount = 0;
bool wroteSomething = false;
- string snumBins = toString(list->getNumBins());
+ vector<string> binLabels = list->getLabels();
+ vector<string> newLabels;
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { delete list; return 0;}
- //create a label for this otu
- string otuLabel = "Otu";
- string sbinNumber = toString(i+1);
- if (sbinNumber.length() < snumBins.length()) {
- int diff = snumBins.length() - sbinNumber.length();
- for (int h = 0; h < diff; h++) { otuLabel += "0"; }
- }
- otuLabel += sbinNumber;
-
- if (labels.count(m->getSimpleLabel(otuLabel)) == 0) {
+ if (labels.count(m->getSimpleLabel(binLabels[i])) == 0) {
newList.push_back(list->get(i));
+ newLabels.push_back(binLabels[i]);
}else { removedCount++; }
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newLabels);
+ newList.printHeaders(out);
newList.print(out);
}
out.close();
//**********************************************************************************************************************
int RemoveOtusCommand::readListGroup(){
try {
+ InputData* input = new InputData(listfile, "list");
+ ListVector* list = input->getListVector();
+ string lastLabel = list->getLabel();
+
+ //using first label seen if none is provided
+ if (label == "") { label = lastLabel; }
+
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
map<string, string> variables;
ofstream outGroup;
m->openOutputFile(outputGroupFileName, outGroup);
- InputData* input = new InputData(listfile, "list");
- ListVector* list = input->getListVector();
- string lastLabel = list->getLabel();
-
- //using first label seen if none is provided
- if (label == "") { label = lastLabel; }
-
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> labels; labels.insert(label);
set<string> processedLabels;
int numOtus = 0;
//for each bin
+ vector<string> binLabels = list->getLabels();
+ vector<string> newBinLabels;
for (int i = 0; i < list->getNumBins(); i++) {
if (m->control_pressed) { return 0; }
if (!removeBin) {
//if there are no sequences from the groups we want to remove in this bin add to new list, output to groupfile
- newList.push_back(binnames);
+ newList.push_back(binnames);
+ newBinLabels.push_back(binLabels[i]);
outGroup << groupFileOutput;
}else {
numOtus++;
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
- newList.print(out);
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
+ newList.print(out);
}
m->mothurOut(newList.getLabel() + " - removed " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
else if (type == "sabund") { pattern = "[filename],pick,[extension]"; }
else if (type == "group") { pattern = "[filename],pick,[extension]"; }
else if (type == "count") { pattern = "[filename],pick,[extension]"; }
- else if (type == "list") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "list") { pattern = "[filename],[tag],pick,[extension]"; }
else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
//**********************************************************************************************************************
int RemoveRareCommand::processList(){
try {
- string thisOutputDir = outputDir;
- if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- map<string, string> variables;
- variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
- variables["[extension]"] = m->getExtension(listfile);
- string outputFileName = getOutputFileName("list", variables);
- variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
- variables["[extension]"] = m->getExtension(groupfile);
- string outputGroupFileName = getOutputFileName("group", variables);
- variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
- variables["[extension]"] = m->getExtension(countfile);
- string outputCountFileName = getOutputFileName("count", variables);
-
- ofstream out, outGroup;
- m->openOutputFile(outputFileName, out);
-
- bool wroteSomething = false;
-
+
//you must provide a label because the names in the listfile need to be consistent
string thisLabel = "";
if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
list = input.getListVector(lastLabel);
}
}
+
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
+ variables["[extension]"] = m->getExtension(listfile);
+ variables["[tag]"] = list->getLabel();
+ string outputFileName = getOutputFileName("list", variables);
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
+ variables["[extension]"] = m->getExtension(groupfile);
+ string outputGroupFileName = getOutputFileName("group", variables);
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+ variables["[extension]"] = m->getExtension(countfile);
+ string outputCountFileName = getOutputFileName("count", variables);
+
+ ofstream out, outGroup;
+ m->openOutputFile(outputFileName, out);
+
+ bool wroteSomething = false;
+
//if groupfile is given then use it
GroupMap* groupMap;
}
- if (list != NULL) {
+ if (list != NULL) {
+
+ vector<string> binLabels = list->getLabels();
+ vector<string> newLabels;
+
//make a new list vector
ListVector newList;
newList.setLabel(list->getLabel());
if (binsize > nseqs) { //keep bin
newList.push_back(saveBinNames);
+ newLabels.push_back(binLabels[i]);
if (groupfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } }
else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { ct.remove(newGroupFile[k]); } }
}else { if (countfile != "") { for(int k = 0; k < names.size(); k++) { ct.remove(names[k]); } } }
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
- newList.print(out);
+ newList.setLabels(newLabels);
+ newList.printHeaders(out);
+ newList.print(out);
}
}
//eliminates zero otus
if (allZero) { for (int j = 0; j < newRabunds.size(); j++) { newRabunds[j].pop_back(); } }
- else { headers.push_back(m->currentBinLabels[i]); }
+ else { headers.push_back(m->currentSharedBinLabels[i]); }
}
}else {
//for each otu
//eliminates otus below rare cutoff
if (totalAbund <= nseqs) { for (int j = 0; j < newRabunds.size(); j++) { newRabunds[j].pop_back(); } }
- else { headers.push_back(m->currentBinLabels[i]); }
+ else { headers.push_back(m->currentSharedBinLabels[i]); }
}
}
//**********************************************************************************************************************
vector<string> RemoveSeqsCommand::setParameters(){
try {
+ CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
string RemoveSeqsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
+ helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
- helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport and dups. You must provide accnos and at least one of the file parameters.\n";
+ helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport, fastq and dups. You must provide accnos and at least one of the file parameters.\n";
helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
string pattern = "";
if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "fastq") { pattern = "[filename],pick,[extension]"; }
else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
else if (type == "name") { pattern = "[filename],pick,[extension]"; }
else if (type == "group") { pattern = "[filename],pick,[extension]"; }
else if (type == "count") { pattern = "[filename],pick,[extension]"; }
- else if (type == "list") { pattern = "[filename],pick,[extension]"; }
+ else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; }
else if (type == "qfile") { pattern = "[filename],pick,[extension]"; }
else if (type == "alignreport") { pattern = "[filename],pick.align.report"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
setParameters();
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
+ outputTypes["fastq"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["group"] = tempOutNames;
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
+ outputTypes["fastq"] = tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
outputTypes["name"] = tempOutNames;
outputTypes["group"] = tempOutNames;
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["count"] = inputDir + it->second; }
}
+
+ it = parameters.find("fastq");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fastq"] = inputDir + it->second; }
+ }
}
if (qualfile == "not open") { abort = true; }
else if (qualfile == "not found") { qualfile = ""; }
else { m->setQualFile(qualfile); }
+
+ fastqfile = validParameter.validFile(parameters, "fastq", true);
+ if (fastqfile == "not open") { abort = true; }
+ else if (fastqfile == "not found") { fastqfile = ""; }
string usedDups = "true";
string temp = validParameter.validFile(parameters, "dups", false);
m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
}
- if ((countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+ if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport, fastq or list."); m->mothurOutEndLine(); abort = true; }
if (countfile == "") {
if ((fastafile != "") && (namefile == "")) {
//read through the correct file and output lines you want to keep
if (namefile != "") { readName(); }
if (fastafile != "") { readFasta(); }
+ if (fastqfile != "") { readFastq(); }
if (groupfile != "") { readGroup(); }
if (alignfile != "") { readAlign(); }
if (listfile != "") { readList(); }
}
}
//**********************************************************************************************************************
+int RemoveSeqsCommand::readFastq(){
+ try {
+ bool wroteSomething = false;
+ int removedCount = 0;
+
+ ifstream in;
+ m->openInputFile(fastqfile, in);
+
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(fastqfile); }
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
+ variables["[extension]"] = m->getExtension(fastqfile);
+ string outputFileName = getOutputFileName("fastq", variables);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+
+ while(!in.eof()){
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+
+ //read sequence name
+ string input = m->getline(in); m->gobble(in);
+
+ string outputString = input + "\n";
+
+ if (input[0] == '@') {
+ //get rest of lines
+ outputString += m->getline(in) + "\n"; m->gobble(in);
+ outputString += m->getline(in) + "\n"; m->gobble(in);
+ outputString += m->getline(in) + "\n"; m->gobble(in);
+
+ vector<string> splits = m->splitWhiteSpace(input);
+ string name = splits[0];
+ name = name.substr(1);
+ m->checkName(name);
+
+ if (names.count(name) == 0) {
+ wroteSomething = true;
+ out << outputString;
+ }else { removedCount++; }
+ }
+
+ m->gobble(in);
+ }
+ in.close();
+ out.close();
+
+
+ if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
+ outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ m->mothurOut("Removed " + toString(removedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
+
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveSeqsCommand", "readFastq");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
int RemoveSeqsCommand::readQual(){
try {
string thisOutputDir = outputDir;
map<string, string> variables;
variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
variables["[extension]"] = m->getExtension(listfile);
- string outputFileName = getOutputFileName("list", variables);
- ofstream out;
- m->openOutputFile(outputFileName, out);
-
+
ifstream in;
m->openInputFile(listfile, in);
//make a new list vector
ListVector newList;
newList.setLabel(list.getLabel());
+
+ variables["[distance]"] = list.getLabel();
+ string outputFileName = getOutputFileName("list", variables);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ vector<string> binLabels = list.getLabels();
+ vector<string> newBinLabels;
+
+ if (m->control_pressed) { in.close(); out.close(); return 0; }
+
//for each bin
for (int i = 0; i < list.getNumBins(); i++) {
if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
//parse out names that are in accnos file
- string binnames = list.get(i);
+ string bin = list.get(i);
+ vector<string> bnames;
+ m->splitAtComma(bin, bnames);
string newNames = "";
- while (binnames.find_first_of(',') != -1) {
- string name = binnames.substr(0,binnames.find_first_of(','));
- binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-
- //if that name is in the .accnos file, add it
+ for (int j = 0; j < bnames.size(); j++) {
+ string name = bnames[j];
+ //if that name is in the .accnos file, add it
if (names.count(name) == 0) { newNames += name + ","; }
else { removedCount++; }
- }
-
- //get last name
- if (names.count(binnames) == 0) { newNames += binnames + ","; }
- else { removedCount++; }
+ }
//if there are names in this bin add to new list
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
- newList.push_back(newNames);
+ newList.push_back(newNames);
+ newBinLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newBinLabels);
+ newList.printHeaders(out);
newList.print(out);
+
}
m->gobble(in);
+ out.close();
}
in.close();
- out.close();
+
if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
- outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
private:
set<string> names;
- string accnosfile, fastafile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir;
+ string accnosfile, fastafile, fastqfile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir;
bool abort, dups;
vector<string> outputNames;
map<string, string> uniqueMap;
int readFasta();
+ int readFastq();
int readName();
int readGroup();
int readCount();
while(!inputGroups.eof()){
if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodGroupFile); return 0; }
- inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
+ inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group; m->gobble(inputGroups);
it = badSeqNames.find(seqName);
if(it != badSeqNames.end()){
else{
goodGroupOut << seqName << '\t' << group << endl;
}
- m->gobble(inputGroups);
}
if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodGroupFile); return 0; }
//make a new list vector
ListVector newList;
newList.setLabel(list.getLabel());
+ vector<string> binLabels = list.getLabels();
+ vector<string> newLabels;
//for each bin
for (int i = 0; i < list.getNumBins(); i++) {
//if there are names in this bin add to new list
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
- newList.push_back(newNames);
+ newList.push_back(newNames);
+ newLabels.push_back(binLabels[i]);
}
}
//print new listvector
if (newList.getNumBins() != 0) {
wroteSomething = true;
+ newList.setLabels(newLabels);
+ if (!m->printedListHeaders) { newList.printHeaders(out); }
newList.print(out);
}
#include "seqnoise.h"
#include "sequence.hpp"
+#include "listvector.hpp"
+#include "inputdata.h"
#define MIN_DELTA 1.0e-6
#define MIN_ITER 20
ifstream listFile;
m->openInputFile(listFileName, listFile);
- double threshold;
- int numOTUs;
- string line = "";
- bool adjustCutoff = true;
-
- if(listFile.peek() == 'u'){ m->getline(listFile); }
- while(listFile){
- listFile >> threshold;
-
- if(threshold < cutOff){
- line = m->getline(listFile); m->gobble(listFile);
- }
- else{
- adjustCutoff = false;
- listFile >> numOTUs;
- otuFreq.resize(numOTUs, 0);
-
- for(int i=0;i<numOTUs;i++){
-
- if (m->control_pressed) { return 0; }
-
- string otu;
- listFile >> otu;
-
- int count = 0;
-
- string number = "";
-
- for(int j=0;j<otu.size();j++){
- if(otu[j] != ','){
- number += otu[j];
- }
- else{
- int index = atoi(number.c_str());
- otuData[index] = i;
- count++;
- number = "";
- }
- }
-
- int index = atoi(number.c_str());
- otuData[index] = i;
- count++;
-
- otuFreq[i] = count;
- }
-
- otuBySeqLookUp.resize(numOTUs);
-
- int numSeqs = otuData.size();
-
- for(int i=0;i<numSeqs;i++){
- if (m->control_pressed) { return 0; }
- otuBySeqLookUp[otuData[i]].push_back(i);
- }
- for(int i=0;i<numOTUs;i++){
- if (m->control_pressed) { return 0; }
- for(int j=otuBySeqLookUp[i].size();j<numSeqs;j++){
- otuBySeqLookUp[i].push_back(0);
- }
- }
-
- break;
- }
+ bool adjustCutoff = true;
+ string lastLabel = "";
+
+ while(!listFile.eof()){
+
+ ListVector list(listFile); m->gobble(listFile); //10/18/13 - change to reading with listvector to accomodate changes to the listfiel format. ie. adding header labels.
+
+ string thisLabel = list.getLabel();
+ lastLabel = thisLabel;
+
+ if (thisLabel == "unique") {} //skip to next label in listfile
+ else {
+ double threshold;
+ m->mothurConvert(thisLabel, threshold);
+
+ if(threshold < cutOff){} //skip to next label in listfile
+ else{
+ adjustCutoff = false;
+ int numOTUs = list.getNumBins();
+ otuFreq.resize(numOTUs, 0);
+
+ for(int i=0;i<numOTUs;i++){
+
+ if (m->control_pressed) { return 0; }
+
+ string otu = list.get(i);
+ int count = 0;
+ string number = "";
+
+ for(int j=0;j<otu.size();j++){
+ if(otu[j] != ','){
+ number += otu[j];
+ }
+ else{
+ int index = atoi(number.c_str());
+ otuData[index] = i;
+ count++;
+ number = "";
+ }
+ }
+
+ int index = atoi(number.c_str());
+ otuData[index] = i;
+ count++;
+
+ otuFreq[i] = count;
+ }
+
+ otuBySeqLookUp.resize(numOTUs);
+
+ int numSeqs = otuData.size();
+
+ for(int i=0;i<numSeqs;i++){
+ if (m->control_pressed) { return 0; }
+ otuBySeqLookUp[otuData[i]].push_back(i);
+ }
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { return 0; }
+ for(int j=otuBySeqLookUp[i].size();j<numSeqs;j++){
+ otuBySeqLookUp[i].push_back(0);
+ }
+ }
+
+ break;
+ }
+ }
}
listFile.close();
//the listfile does not contain a threshold greater than the cutoff so use highest value
if (adjustCutoff) {
- istringstream iss (line,istringstream::in);
-
- iss >> numOTUs;
- otuFreq.resize(numOTUs, 0);
+
+ InputData input(listFileName, "list");
+ ListVector* list = input.getListVector(lastLabel);
+
+ int numOTUs = list->getNumBins();
+ otuFreq.resize(numOTUs, 0);
for(int i=0;i<numOTUs;i++){
if (m->control_pressed) { return 0; }
- string otu;
- iss >> otu;
+ string otu = list->get(i);
int count = 0;
-
string number = "";
for(int j=0;j<otu.size();j++){
}
}
+ delete list;
}
return 0;
}
}
- while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ //while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ comment = getCommentString(fastaString);
int numAmbig = 0;
sequence = getSequenceString(fastaString, numAmbig);
}
}
- while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ //while (!fastaString.eof()) { char c = fastaString.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ comment = getCommentString(fastaString);
int numAmbig = 0;
sequence = getSequenceString(fastaString, numAmbig);
}
}
- //read real sequence
- while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ //while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ comment = getCommentString(fastaFile);
int numAmbig = 0;
sequence = getSequenceString(fastaFile, numAmbig);
//read info after sequence name
while (!fastaFile.eof()) {
char c = fastaFile.get();
- if (c == 10 || c == 13 || c == -1){ break; }
+ if (c == 10 || c == 13 || c == -1){ break; }
extraInfo += c;
- }
+ }
+
+ comment = extraInfo;
int numAmbig = 0;
sequence = getSequenceString(fastaFile, numAmbig);
}
}
- //read real sequence
- while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ //while (!fastaFile.eof()) { char c = fastaFile.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
+ comment = getCommentString(fastaFile);
int numAmbig = 0;
sequence = getSequenceString(fastaFile, numAmbig);
string Sequence::getCommentString(ifstream& fastaFile) {
try {
char letter;
- string sequence = "";
+ string temp = "";
while(fastaFile){
letter=fastaFile.get();
- if((letter == '\r') || (letter == '\n')){
+ if((letter == '\r') || (letter == '\n') || letter == -1){
m->gobble(fastaFile); //in case its a \r\n situation
break;
- }
+ }else {
+ temp += letter;
+ }
}
- return sequence;
+ return temp;
}
catch(exception& e) {
m->errorOut(e, "Sequence", "getCommentString");
string Sequence::getCommentString(istringstream& fastaFile) {
try {
char letter;
- string sequence = "";
+ string temp = "";
while(fastaFile){
letter=fastaFile.get();
- if((letter == '\r') || (letter == '\n')){
+ if((letter == '\r') || (letter == '\n') || letter == -1){
m->gobble(fastaFile); //in case its a \r\n situation
break;
- }
+ }else {
+ temp += letter;
+ }
}
- return sequence;
+ return temp;
}
catch(exception& e) {
m->errorOut(e, "Sequence", "getCommentString");
unaligned = "";
aligned = "";
pairwise = "";
+ comment = "";
numBases = 0;
alignmentLength = 0;
void Sequence::printSequence(ostream& out){
- out << ">" << name << endl;
+ out << ">" << name << comment << endl;
if(isAligned){
out << aligned << endl;
}
string unaligned;
string aligned;
string pairwise;
+ string comment;
int numBases;
int alignmentLength;
bool isAligned;
else if (summaryfile == "not found") { summaryfile = ""; }
if (summaryfile != "") { m->setSummaryFile(summaryfile); }
-
- processors = validParameter.validFile(parameters, "processors", false);
- if (processors == "not found") { processors = "1"; }
- if (processors != "") { m->setProcessors(processors); }
+ string temp = validParameter.validFile(parameters, "processors", false);
+ if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
clearTypes = validParameter.validFile(parameters, "clear", false);
if (clearTypes == "not found") { clearTypes = ""; }
-/*
- * sffinfocommand.cpp
- * Mothur
- *
- * Created by westcott on 7/7/10.
- * Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-
-#include "sffinfocommand.h"
-#include "endiannessmacros.h"
-#include "trimoligos.h"
-#include "sequence.hpp"
-#include "qualityscores.h"
-
-//**********************************************************************************************************************
-vector<string> SffInfoCommand::setParameters(){
- try {
- CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);
- CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);
- CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
- CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);
- CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);
- CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);
- CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
- CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);
- CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
- CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
- CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
- CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
- CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-
- vector<string> myArray;
- for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "setParameters");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-string SffInfoCommand::getHelpString(){
- try {
- string helpString = "";
- helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";
- helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";
- helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n";
- helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n";
- helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n";
- helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";
- helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
- helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
- helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
- helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
- helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
- helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n";
- helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n";
- helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
- helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n";
- helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";
- helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";
- helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";
- return helpString;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "getHelpString");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-string SffInfoCommand::getOutputPattern(string type) {
- try {
- string pattern = "";
-
- if (type == "fasta") { pattern = "[filename],fasta-[filename],[tag],fasta"; }
- else if (type == "flow") { pattern = "[filename],flow"; }
- else if (type == "sfftxt") { pattern = "[filename],sff.txt"; }
- else if (type == "sff") { pattern = "[filename],[group],sff"; }
- else if (type == "qfile") { pattern = "[filename],qual-[filename],[tag],qual"; }
- else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
-
- return pattern;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "getOutputPattern");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-SffInfoCommand::SffInfoCommand(){
- try {
- abort = true; calledHelp = true;
- setParameters();
- vector<string> tempOutNames;
- outputTypes["fasta"] = tempOutNames;
- outputTypes["flow"] = tempOutNames;
- outputTypes["sfftxt"] = tempOutNames;
- outputTypes["qfile"] = tempOutNames;
- outputTypes["sff"] = tempOutNames;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-SffInfoCommand::SffInfoCommand(string option) {
- try {
- abort = false; calledHelp = false;
- hasAccnos = false; hasOligos = false;
- split = 1;
-
- //allow user to run help
- if(option == "help") { help(); abort = true; calledHelp = true; }
- else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-
- else {
- //valid paramters for this command
- vector<string> myArray = setParameters();
-
- OptionParser parser(option);
- map<string, string> parameters = parser.getParameters();
-
- ValidParameters validParameter;
- //check to make sure all parameters are valid for command
- for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
- if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
- }
-
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["fasta"] = tempOutNames;
- outputTypes["flow"] = tempOutNames;
- outputTypes["sfftxt"] = tempOutNames;
- outputTypes["qfile"] = tempOutNames;
- outputTypes["sff"] = tempOutNames;
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; }
-
- sffFilename = validParameter.validFile(parameters, "sff", false);
- if (sffFilename == "not found") { sffFilename = ""; }
- else {
- m->splitAtDash(sffFilename, filenames);
-
- //go through files and make sure they are good, if not, then disregard them
- for (int i = 0; i < filenames.size(); i++) {
- bool ignore = false;
- if (filenames[i] == "current") {
- filenames[i] = m->getSFFFile();
- if (filenames[i] != "") { m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }
- else {
- m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true;
- //erase from file list
- filenames.erase(filenames.begin()+i);
- i--;
- }
- }
-
- if (!ignore) {
- if (inputDir != "") {
- string path = m->hasPath(filenames[i]);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { filenames[i] = inputDir + filenames[i]; }
- }
-
- ifstream in;
- int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
-
- //if you can't open it, try default location
- if (ableToOpen == 1) {
- if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
- m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- filenames[i] = tryPath;
- }
- }
-
- //if you can't open it, try default location
- if (ableToOpen == 1) {
- if (m->getOutputDir() != "") { //default path is set
- string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
- m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- filenames[i] = tryPath;
- }
- }
-
- in.close();
-
- if (ableToOpen == 1) {
- m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
- //erase from file list
- filenames.erase(filenames.begin()+i);
- i--;
- }else { m->setSFFFile(filenames[i]); }
- }
- }
-
- //make sure there is at least one valid file left
- if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
- }
-
- accnosName = validParameter.validFile(parameters, "accnos", false);
- if (accnosName == "not found") { accnosName = ""; }
- else {
- hasAccnos = true;
- m->splitAtDash(accnosName, accnosFileNames);
-
- //go through files and make sure they are good, if not, then disregard them
- for (int i = 0; i < accnosFileNames.size(); i++) {
- bool ignore = false;
- if (accnosFileNames[i] == "current") {
- accnosFileNames[i] = m->getAccnosFile();
- if (accnosFileNames[i] != "") { m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
- else {
- m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true;
- //erase from file list
- accnosFileNames.erase(accnosFileNames.begin()+i);
- i--;
- }
- }
-
- if (!ignore) {
-
- if (inputDir != "") {
- string path = m->hasPath(accnosFileNames[i]);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; }
- }
-
- ifstream in;
- int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
-
- //if you can't open it, try default location
- if (ableToOpen == 1) {
- if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
- m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- accnosFileNames[i] = tryPath;
- }
- }
- //if you can't open it, try default location
- if (ableToOpen == 1) {
- if (m->getOutputDir() != "") { //default path is set
- string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
- m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- accnosFileNames[i] = tryPath;
- }
- }
- in.close();
-
- if (ableToOpen == 1) {
- m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
- //erase from file list
- accnosFileNames.erase(accnosFileNames.begin()+i);
- i--;
- }
- }
- }
-
- //make sure there is at least one valid file left
- if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
- }
-
- oligosfile = validParameter.validFile(parameters, "oligos", false);
- if (oligosfile == "not found") { oligosfile = ""; }
- else {
- hasOligos = true;
- m->splitAtDash(oligosfile, oligosFileNames);
-
- //go through files and make sure they are good, if not, then disregard them
- for (int i = 0; i < oligosFileNames.size(); i++) {
- bool ignore = false;
- if (oligosFileNames[i] == "current") {
- oligosFileNames[i] = m->getOligosFile();
- if (oligosFileNames[i] != "") { m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
- else {
- m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true;
- //erase from file list
- oligosFileNames.erase(oligosFileNames.begin()+i);
- i--;
- }
- }
-
- if (!ignore) {
-
- if (inputDir != "") {
- string path = m->hasPath(oligosFileNames[i]);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { oligosFileNames[i] = inputDir + oligosFileNames[i]; }
- }
-
- ifstream in;
- int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");
-
- //if you can't open it, try default location
- if (ableToOpen == 1) {
- if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);
- m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- oligosFileNames[i] = tryPath;
- }
- }
- //if you can't open it, try default location
- if (ableToOpen == 1) {
- if (m->getOutputDir() != "") { //default path is set
- string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);
- m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- oligosFileNames[i] = tryPath;
- }
- }
- in.close();
-
- if (ableToOpen == 1) {
- m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
- //erase from file list
- oligosFileNames.erase(oligosFileNames.begin()+i);
- i--;
- }
- }
- }
-
- //make sure there is at least one valid file left
- if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }
- }
-
- if (hasOligos) {
- split = 2;
- if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }
- }
-
- if (hasAccnos) {
- if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
- }
-
- string temp = validParameter.validFile(parameters, "qfile", false); if (temp == "not found"){ temp = "T"; }
- qual = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; }
- fasta = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "T"; }
- flow = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; }
- trim = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }
- m->mothurConvert(temp, bdiffs);
-
- temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
- m->mothurConvert(temp, pdiffs);
-
- temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
- m->mothurConvert(temp, ldiffs);
-
- temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
- m->mothurConvert(temp, sdiffs);
-
- temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
- m->mothurConvert(temp, tdiffs);
-
- if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
-
- temp = validParameter.validFile(parameters, "sfftxt", false);
- if (temp == "not found") { temp = "F"; sfftxt = false; sfftxtFilename = ""; }
- else if (m->isTrue(temp)) { sfftxt = true; sfftxtFilename = ""; }
- else {
- //you are a filename
- if (inputDir != "") {
- map<string,string>::iterator it = parameters.find("sfftxt");
- //user has given a template file
- if(it != parameters.end()){
- string path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["sfftxt"] = inputDir + it->second; }
- }
- }
-
- sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
- if (sfftxtFilename == "not found") { sfftxtFilename = ""; }
- else if (sfftxtFilename == "not open") { sfftxtFilename = ""; }
- }
-
- if ((sfftxtFilename == "") && (filenames.size() == 0)) {
- //if there is a current sff file, use it
- string filename = m->getSFFFile();
- if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }
- }
-
-
- }
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::execute(){
- try {
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
- for (int s = 0; s < filenames.size(); s++) {
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
- int start = time(NULL);
-
- filenames[s] = m->getFullPathName(filenames[s]);
- m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
-
- string accnos = "";
- if (hasAccnos) { accnos = accnosFileNames[s]; }
-
- string oligos = "";
- if (hasOligos) { oligos = oligosFileNames[s]; }
-
- int numReads = extractSffInfo(filenames[s], accnos, oligos);
-
- m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
- }
-
- if (sfftxtFilename != "") { parseSffTxt(); }
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
- //set fasta file as new current fastafile
- string current = "";
- itTypes = outputTypes.find("fasta");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
- }
-
- itTypes = outputTypes.find("qfile");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
- }
-
- itTypes = outputTypes.find("flow");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }
- }
-
- //report output filenames
- m->mothurOutEndLine();
- m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "execute");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
- try {
- currentFileName = input;
- if (outputDir == "") { outputDir += m->hasPath(input); }
-
- if (accnos != "") { readAccnosFile(accnos); }
- else { seqNames.clear(); }
-
- if (oligos != "") { readOligos(oligos); split = 2; }
-
- ofstream outSfftxt, outFasta, outQual, outFlow;
- string outFastaFileName, outQualFileName;
- string rootName = outputDir + m->getRootName(m->getSimpleName(input));
- if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
-
- map<string, string> variables;
- variables["[filename]"] = rootName;
- string sfftxtFileName = getOutputFileName("sfftxt",variables);
- string outFlowFileName = getOutputFileName("flow",variables);
- if (!trim) { variables["[tag]"] = "raw"; }
- outFastaFileName = getOutputFileName("fasta",variables);
- outQualFileName = getOutputFileName("qfile",variables);
-
- if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); }
- if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
- if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); }
- if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); }
-
- ifstream in;
- in.open(input.c_str(), ios::binary);
-
- CommonHeader header;
- readCommonHeader(in, header);
-
- int count = 0;
- mycount = 0;
-
- //check magic number and version
- if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
- if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
-
- //print common header
- if (sfftxt) { printCommonHeader(outSfftxt, header); }
- if (flow) { outFlow << header.numFlowsPerRead << endl; }
-
- //read through the sff file
- while (!in.eof()) {
-
- bool print = true;
-
- //read data
- seqRead read; Header readheader;
- readSeqData(in, read, header.numFlowsPerRead, readheader);
- bool okay = sanityCheck(readheader, read);
- if (!okay) { break; }
-
- //if you have provided an accosfile and this seq is not in it, then dont print
- if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } }
-
- //print
- if (print) {
- if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
- if (fasta) { printFastaSeqData(outFasta, read, readheader); }
- if (qual) { printQualSeqData(outQual, read, readheader); }
- if (flow) { printFlowSeqData(outFlow, read, readheader); }
- }
-
- count++;
- mycount++;
-
- //report progress
- if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }
-
- if (m->control_pressed) { count = 0; break; }
-
- if (count >= header.numReads) { break; }
- //if (count >= 100) { break; }
- }
-
- //report progress
- if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } }
-
- in.close();
-
- if (sfftxt) { outSfftxt.close(); }
- if (fasta) { outFasta.close(); }
- if (qual) { outQual.close(); }
- if (flow) { outFlow.close(); }
-
- if (split > 1) {
- //create new common headers for each file with the correct number of reads
- adjustCommonHeader(header);
-
- map<string, string>::iterator it;
- set<string> namesToRemove;
- for(int i=0;i<filehandles.size();i++){
- for(int j=0;j<filehandles[0].size();j++){
- if (filehandles[i][j] != "") {
- if (namesToRemove.count(filehandles[i][j]) == 0) {
- if(m->isBlank(filehandles[i][j])){
- m->mothurRemove(filehandles[i][j]);
- m->mothurRemove(filehandlesHeaders[i][j]);
- namesToRemove.insert(filehandles[i][j]);
- }
- }
- }
- }
- }
-
- //append new header to reads
- for (int i = 0; i < filehandles.size(); i++) {
- for (int j = 0; j < filehandles[i].size(); j++) {
- m->appendFiles(filehandles[i][j], filehandlesHeaders[i][j]);
- m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);
- m->mothurRemove(filehandlesHeaders[i][j]);
- if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }
- }
- }
-
- //remove names for outputFileNames, just cleans up the output
- for(int i = 0; i < outputNames.size(); i++) {
- if (namesToRemove.count(outputNames[i]) != 0) {
- outputNames.erase(outputNames.begin()+i);
- i--;
- }
- }
-
- if(m->isBlank(noMatchFile)){ m->mothurRemove(noMatchFile); }
- else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }
- }
-
- return count;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "extractSffInfo");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
- try {
-
- if (!in.eof()) {
-
- //read magic number
- char buffer[4];
- in.read(buffer, 4);
- header.magicNumber = be_int4(*(unsigned int *)(&buffer));
-
- //read version
- char buffer9[4];
- in.read(buffer9, 4);
- header.version = "";
- for (int i = 0; i < 4; i++) { header.version += toString((int)(buffer9[i])); }
-
- //read offset
- char buffer2 [8];
- in.read(buffer2, 8);
- header.indexOffset = be_int8(*(unsigned long long *)(&buffer2));
-
- //read index length
- char buffer3 [4];
- in.read(buffer3, 4);
- header.indexLength = be_int4(*(unsigned int *)(&buffer3));
-
- //read num reads
- char buffer4 [4];
- in.read(buffer4, 4);
- header.numReads = be_int4(*(unsigned int *)(&buffer4));
-
- if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }
-
- //read header length
- char buffer5 [2];
- in.read(buffer5, 2);
- header.headerLength = be_int2(*(unsigned short *)(&buffer5));
-
- //read key length
- char buffer6 [2];
- in.read(buffer6, 2);
- header.keyLength = be_int2(*(unsigned short *)(&buffer6));
-
- //read number of flow reads
- char buffer7 [2];
- in.read(buffer7, 2);
- header.numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7));
-
- //read format code
- char buffer8 [1];
- in.read(buffer8, 1);
- header.flogramFormatCode = (int)(buffer8[0]);
-
- //read flow chars
- char* tempBuffer = new char[header.numFlowsPerRead];
- in.read(&(*tempBuffer), header.numFlowsPerRead);
- header.flowChars = tempBuffer;
- if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead); }
- delete[] tempBuffer;
-
- //read key
- char* tempBuffer2 = new char[header.keyLength];
- in.read(&(*tempBuffer2), header.keyLength);
- header.keySequence = tempBuffer2;
- if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength); }
- delete[] tempBuffer2;
-
- /* Pad to 8 chars */
- unsigned long long spotInFile = in.tellg();
- unsigned long long spot = (spotInFile + 7)& ~7; // ~ inverts
- in.seekg(spot);
-
- }else{
- m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
- }
-
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "readCommonHeader");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::adjustCommonHeader(CommonHeader header){
- try {
-
- char* mybuffer = new char[4];
- ifstream in;
- in.open(currentFileName.c_str(), ios::binary);
-
- //magic number
- in.read(mybuffer,4);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, in.gcount());
- out.close();
- }
- }
- delete[] mybuffer;
-
- //version
- mybuffer = new char[4];
- in.read(mybuffer,4);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, in.gcount());
- out.close();
- }
- }
- delete[] mybuffer;
-
- //offset
- mybuffer = new char[8];
- in.read(mybuffer,8);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- unsigned long long offset = 0;
- char* thisbuffer = new char[8];
- thisbuffer[0] = (offset >> 56) & 0xFF;
- thisbuffer[1] = (offset >> 48) & 0xFF;
- thisbuffer[2] = (offset >> 40) & 0xFF;
- thisbuffer[3] = (offset >> 32) & 0xFF;
- thisbuffer[4] = (offset >> 24) & 0xFF;
- thisbuffer[5] = (offset >> 16) & 0xFF;
- thisbuffer[6] = (offset >> 8) & 0xFF;
- thisbuffer[7] = offset & 0xFF;
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(thisbuffer, 8);
- out.close();
- }
- }
- delete[] mybuffer;
-
-
- //read index length
- mybuffer = new char[4];
- in.read(mybuffer,4);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- unsigned int offset = 0;
- char* thisbuffer = new char[4];
- thisbuffer[0] = (offset >> 24) & 0xFF;
- thisbuffer[1] = (offset >> 16) & 0xFF;
- thisbuffer[2] = (offset >> 8) & 0xFF;
- thisbuffer[3] = offset & 0xFF;
- out.write(thisbuffer, 4);
- out.close();
- }
- }
- delete[] mybuffer;
-
- //change num reads
- mybuffer = new char[4];
- in.read(mybuffer,4);
- delete[] mybuffer;
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- //convert number of reads to 4 byte char*
- char* thisbuffer = new char[4];
- if ((m->findEdianness()) == "BIG_ENDIAN") {
- thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;
- thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;
- thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;
- thisbuffer[3] = numSplitReads[i][j] & 0xFF;
- }else {
- thisbuffer[0] = numSplitReads[i][j] & 0xFF;
- thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF;
- thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;
- thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;
- }
- out.write(thisbuffer, 4);
- out.close();
- delete[] thisbuffer;
- }
- }
-
- //read header length
- mybuffer = new char[2];
- in.read(mybuffer,2);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, in.gcount());
- out.close();
- }
- }
- delete[] mybuffer;
-
- //read key length
- mybuffer = new char[2];
- in.read(mybuffer,2);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, in.gcount());
- out.close();
- }
- }
- delete[] mybuffer;
-
- //read number of flow reads
- mybuffer = new char[2];
- in.read(mybuffer,2);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, in.gcount());
- out.close();
- }
- }
- delete[] mybuffer;
-
- //read format code
- mybuffer = new char[1];
- in.read(mybuffer,1);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, in.gcount());
- out.close();
- }
- }
- delete[] mybuffer;
-
- //read flow chars
- mybuffer = new char[header.numFlowsPerRead];
- in.read(mybuffer,header.numFlowsPerRead);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, in.gcount());
- out.close();
- }
- }
- delete[] mybuffer;
-
- //read key
- mybuffer = new char[header.keyLength];
- in.read(mybuffer,header.keyLength);
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, in.gcount());
- out.close();
- }
- }
- delete[] mybuffer;
-
-
- /* Pad to 8 chars */
- unsigned long long spotInFile = in.tellg();
- unsigned long long spot = (spotInFile + 7)& ~7; // ~ inverts
- in.seekg(spot);
-
- mybuffer = new char[spot-spotInFile];
- for (int i = 0; i < filehandlesHeaders.size(); i++) {
- for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
- ofstream out;
- m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
- out.write(mybuffer, spot-spotInFile);
- out.close();
- }
- }
- delete[] mybuffer;
- in.close();
- return 0;
-
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){
- try {
- unsigned long long startSpotInFile = in.tellg();
- if (!in.eof()) {
-
- /*****************************************/
- //read header
-
- //read header length
- char buffer [2];
- in.read(buffer, 2);
- header.headerLength = be_int2(*(unsigned short *)(&buffer));
-
- //read name length
- char buffer2 [2];
- in.read(buffer2, 2);
- header.nameLength = be_int2(*(unsigned short *)(&buffer2));
-
- //read num bases
- char buffer3 [4];
- in.read(buffer3, 4);
- header.numBases = be_int4(*(unsigned int *)(&buffer3));
-
-
- //read clip qual left
- char buffer4 [2];
- in.read(buffer4, 2);
- header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4));
- header.clipQualLeft = 5;
-
-
- //read clip qual right
- char buffer5 [2];
- in.read(buffer5, 2);
- header.clipQualRight = be_int2(*(unsigned short *)(&buffer5));
-
-
- //read clipAdapterLeft
- char buffer6 [2];
- in.read(buffer6, 2);
- header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
-
-
- //read clipAdapterRight
- char buffer7 [2];
- in.read(buffer7, 2);
- header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
-
-
- //read name
- char* tempBuffer = new char[header.nameLength];
- in.read(&(*tempBuffer), header.nameLength);
- header.name = tempBuffer;
- if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); }
-
- delete[] tempBuffer;
-
- //extract info from name
- decodeName(header.timestamp, header.region, header.xy, header.name);
-
- /* Pad to 8 chars */
- unsigned long long spotInFile = in.tellg();
- unsigned long long spot = (spotInFile + 7)& ~7;
- in.seekg(spot);
-
- /*****************************************/
- //sequence read
-
- //read flowgram
- read.flowgram.resize(numFlowReads);
- for (int i = 0; i < numFlowReads; i++) {
- char buffer [2];
- in.read(buffer, 2);
- read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
- }
-
- //read flowIndex
- read.flowIndex.resize(header.numBases);
- for (int i = 0; i < header.numBases; i++) {
- char temp[1];
- in.read(temp, 1);
- read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
- }
-
- //read bases
- char* tempBuffer6 = new char[header.numBases];
- in.read(&(*tempBuffer6), header.numBases);
- read.bases = tempBuffer6;
- if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases); }
- delete[] tempBuffer6;
-
- //read qual scores
- read.qualScores.resize(header.numBases);
- for (int i = 0; i < header.numBases; i++) {
- char temp[1];
- in.read(temp, 1);
- read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
- }
-
- /* Pad to 8 chars */
- spotInFile = in.tellg();
- spot = (spotInFile + 7)& ~7;
- in.seekg(spot);
-
- if (split > 1) {
- char * mybuffer;
- mybuffer = new char [spot-startSpotInFile];
- ifstream in2;
- in2.open(currentFileName.c_str(), ios::binary);
- in2.seekg(startSpotInFile);
- in2.read(mybuffer,spot-startSpotInFile);
- in2.close();
-
- int barcodeIndex, primerIndex;
- int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);
-
- if(trashCodeLength == 0){
- //cout << header.name << " length = " << spot << '\t' << startSpotInFile << '\t' << in2.gcount() << endl;
-
- ofstream out;
- m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);
- out.write(mybuffer, in2.gcount());
- out.close();
- numSplitReads[barcodeIndex][primerIndex]++;
- }
- else{
- ofstream out;
- m->openOutputFileBinaryAppend(noMatchFile, out);
- out.write(mybuffer, in2.gcount());
- out.close();
- }
- delete[] mybuffer;
- }
- }else{
- m->mothurOut("Error reading."); m->mothurOutEndLine();
- }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "readSeqData");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {
- try {
- //find group read belongs to
- TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
-
- int success = 1;
- string trashCode = "";
- int currentSeqsDiffs = 0;
-
- string seq = read.bases;
-
- if (trim) {
- if(header.clipQualRight < header.clipQualLeft){
- if (header.clipQualRight == 0) { //don't trim right
- seq = seq.substr(header.clipQualLeft-1);
- }else {
- seq = "NNNN";
- }
- }
- else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
- seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
- }
- else {
- seq = seq.substr(header.clipQualLeft-1);
- }
- }else{
- //if you wanted the sfftxt then you already converted the bases to the right case
- if (!sfftxt) {
- int endValue = header.clipQualRight;
- //make the bases you want to clip lowercase and the bases you want to keep upper case
- if(endValue == 0){ endValue = seq.length(); }
- for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); }
- for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); }
- for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); }
- }
- }
-
- Sequence currSeq(header.name, seq);
- QualityScores currQual;
-
- if(numLinkers != 0){
- success = trimOligos.stripLinker(currSeq, currQual);
- if(success > ldiffs) { trashCode += 'k'; }
- else{ currentSeqsDiffs += success; }
-
- }
-
- if(barcodes.size() != 0){
- success = trimOligos.stripBarcode(currSeq, currQual, barcode);
- if(success > bdiffs) { trashCode += 'b'; }
- else{ currentSeqsDiffs += success; }
- }
-
- if(numSpacers != 0){
- success = trimOligos.stripSpacer(currSeq, currQual);
- if(success > sdiffs) { trashCode += 's'; }
- else{ currentSeqsDiffs += success; }
-
- }
-
- if(numFPrimers != 0){
- success = trimOligos.stripForward(currSeq, currQual, primer, true);
- if(success > pdiffs) { trashCode += 'f'; }
- else{ currentSeqsDiffs += success; }
- }
-
- if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }
-
- if(revPrimer.size() != 0){
- success = trimOligos.stripReverse(currSeq, currQual);
- if(!success) { trashCode += 'r'; }
- }
-
-
- return trashCode.length();
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "findGroup");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
- try {
-
- if (name.length() >= 6) {
- string time = name.substr(0, 6);
- unsigned int timeNum = m->fromBase36(time);
-
- int q1 = timeNum / 60;
- int sec = timeNum - 60 * q1;
- int q2 = q1 / 60;
- int minute = q1 - 60 * q2;
- int q3 = q2 / 24;
- int hr = q2 - 24 * q3;
- int q4 = q3 / 32;
- int day = q3 - 32 * q4;
- int q5 = q4 / 13;
- int mon = q4 - 13 * q5;
- int year = 2000 + q5;
-
- timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
- }
-
- if (name.length() >= 9) {
- region = name.substr(7, 2);
-
- string xyNum = name.substr(9);
- unsigned int myXy = m->fromBase36(xyNum);
- int x = myXy >> 12;
- int y = myXy & 4095;
-
- xy = toString(x) + "_" + toString(y);
- }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "decodeName");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
- try {
-
- out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
- out << "Version: " << header.version << endl;
- out << "Index Offset: " << header.indexOffset << endl;
- out << "Index Length: " << header.indexLength << endl;
- out << "Number of Reads: " << header.numReads << endl;
- out << "Header Length: " << header.headerLength << endl;
- out << "Key Length: " << header.keyLength << endl;
- out << "Number of Flows: " << header.numFlowsPerRead << endl;
- out << "Format Code: " << header.flogramFormatCode << endl;
- out << "Flow Chars: " << header.flowChars << endl;
- out << "Key Sequence: " << header.keySequence << endl << endl;
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "printCommonHeader");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::printHeader(ofstream& out, Header& header) {
- try {
-
- out << ">" << header.name << endl;
- out << "Run Prefix: " << header.timestamp << endl;
- out << "Region #: " << header.region << endl;
- out << "XY Location: " << header.xy << endl << endl;
-
- out << "Run Name: " << endl;
- out << "Analysis Name: " << endl;
- out << "Full Path: " << endl << endl;
-
- out << "Read Header Len: " << header.headerLength << endl;
- out << "Name Length: " << header.nameLength << endl;
- out << "# of Bases: " << header.numBases << endl;
- out << "Clip Qual Left: " << header.clipQualLeft << endl;
- out << "Clip Qual Right: " << header.clipQualRight << endl;
- out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
- out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "printHeader");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {
- try {
- bool okay = true;
- string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";
-
- if (header.clipQualLeft > read.bases.length()) {
- okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
- }
- if (header.clipQualRight > read.bases.length()) {
- okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
- }
- if (header.clipQualLeft > read.qualScores.size()) {
- okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
- }
- if (header.clipQualRight > read.qualScores.size()) {
- okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
- }
-
- if (okay == false) {
- m->mothurOut(message); m->mothurOutEndLine();
- }
-
- return okay;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "sanityCheck");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
- try {
- out << "Flowgram: ";
- for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; }
-
- out << endl << "Flow Indexes: ";
- int sum = 0;
- for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; }
-
- //make the bases you want to clip lowercase and the bases you want to keep upper case
- int endValue = header.clipQualRight;
- if(endValue == 0){ endValue = read.bases.length(); }
- for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
- for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { read.bases[i] = toupper(read.bases[i]); }
- for (int i = (endValue-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); }
-
- out << endl << "Bases: " << read.bases << endl << "Quality Scores: ";
- for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }
-
-
- out << endl << endl;
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
- try {
- string seq = read.bases;
-
- if (trim) {
- if(header.clipQualRight < header.clipQualLeft){
- if (header.clipQualRight == 0) { //don't trim right
- seq = seq.substr(header.clipQualLeft-1);
- }else {
- seq = "NNNN";
- }
- }
- else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
- seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
- }
- else {
- seq = seq.substr(header.clipQualLeft-1);
- }
- }else{
- //if you wanted the sfftxt then you already converted the bases to the right case
- if (!sfftxt) {
- int endValue = header.clipQualRight;
- //make the bases you want to clip lowercase and the bases you want to keep upper case
- if(endValue == 0){ endValue = seq.length(); }
- for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); }
- for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); }
- for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); }
- }
- }
-
- out << ">" << header.name << " xy=" << header.xy << endl;
- out << seq << endl;
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
- try {
-
- if (trim) {
- if(header.clipQualRight < header.clipQualLeft){
- if (header.clipQualRight == 0) { //don't trim right
- out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;
- for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }
- }else {
- out << ">" << header.name << " xy=" << header.xy << endl;
- out << "0\t0\t0\t0";
- }
- }
- else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
- out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
- for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { out << read.qualScores[i] << '\t'; }
- }
- else{
- out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
- for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }
- }
- }else{
- out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
- for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }
- }
-
- out << endl;
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "printQualSeqData");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
- try {
-
- int endValue = header.clipQualRight;
- if (header.clipQualRight == 0) {
- endValue = read.flowIndex.size();
- if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }
- }
- if(endValue > header.clipQualLeft){
-
- int rightIndex = 0;
- for (int i = 0; i < endValue; i++) { rightIndex += read.flowIndex[i]; }
-
- out << header.name << ' ' << rightIndex;
- for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100); }
- out << endl;
- }
-
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::readAccnosFile(string filename) {
- try {
- //remove old names
- seqNames.clear();
-
- ifstream in;
- m->openInputFile(filename, in);
- string name;
-
- while(!in.eof()){
- in >> name; m->gobble(in);
-
- seqNames.insert(name);
-
- if (m->control_pressed) { seqNames.clear(); break; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "readAccnosFile");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::parseSffTxt() {
- try {
-
- ifstream inSFF;
- m->openInputFile(sfftxtFilename, inSFF);
-
- if (outputDir == "") { outputDir += m->hasPath(sfftxtFilename); }
-
- //output file names
- ofstream outFasta, outQual, outFlow;
- string outFastaFileName, outQualFileName;
- string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));
- if (fileRoot.length() > 0) {
- //rip off last .
- fileRoot = fileRoot.substr(0, fileRoot.length()-1);
- fileRoot = m->getRootName(fileRoot);
- }
-
- map<string, string> variables;
- variables["[filename]"] = fileRoot;
- string sfftxtFileName = getOutputFileName("sfftxt",variables);
- string outFlowFileName = getOutputFileName("flow",variables);
- if (!trim) { variables["[tag]"] = "raw"; }
- outFastaFileName = getOutputFileName("fasta",variables);
- outQualFileName = getOutputFileName("qfile",variables);
-
- if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
- if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); }
- if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); }
-
- //read common header
- string commonHeader = m->getline(inSFF);
- string magicNumber = m->getline(inSFF);
- string version = m->getline(inSFF);
- string indexOffset = m->getline(inSFF);
- string indexLength = m->getline(inSFF);
- int numReads = parseHeaderLineToInt(inSFF);
- string headerLength = m->getline(inSFF);
- string keyLength = m->getline(inSFF);
- int numFlows = parseHeaderLineToInt(inSFF);
- string flowgramCode = m->getline(inSFF);
- string flowChars = m->getline(inSFF);
- string keySequence = m->getline(inSFF);
- m->gobble(inSFF);
-
- string seqName;
-
- if (flow) { outFlow << numFlows << endl; }
-
- for(int i=0;i<numReads;i++){
-
- //sanity check
- if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
-
- Header header;
-
- //parse read header
- inSFF >> seqName;
- seqName = seqName.substr(1);
- m->gobble(inSFF);
- header.name = seqName;
-
- string runPrefix = parseHeaderLineToString(inSFF); header.timestamp = runPrefix;
- string regionNumber = parseHeaderLineToString(inSFF); header.region = regionNumber;
- string xyLocation = parseHeaderLineToString(inSFF); header.xy = xyLocation;
- m->gobble(inSFF);
-
- string runName = parseHeaderLineToString(inSFF);
- string analysisName = parseHeaderLineToString(inSFF);
- string fullPath = parseHeaderLineToString(inSFF);
- m->gobble(inSFF);
-
- string readHeaderLen = parseHeaderLineToString(inSFF); convert(readHeaderLen, header.headerLength);
- string nameLength = parseHeaderLineToString(inSFF); convert(nameLength, header.nameLength);
- int numBases = parseHeaderLineToInt(inSFF); header.numBases = numBases;
- string clipQualLeft = parseHeaderLineToString(inSFF); convert(clipQualLeft, header.clipQualLeft);
- int clipQualRight = parseHeaderLineToInt(inSFF); header.clipQualRight = clipQualRight;
- string clipAdapLeft = parseHeaderLineToString(inSFF); convert(clipAdapLeft, header.clipAdapterLeft);
- string clipAdapRight = parseHeaderLineToString(inSFF); convert(clipAdapRight, header.clipAdapterRight);
- m->gobble(inSFF);
-
- seqRead read;
-
- //parse read
- vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows); read.flowgram = flowVector;
- vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases);
-
- //adjust for print
- vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
- for (int j = 1; j < flowIndices.size(); j++) { flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]); }
- read.flowIndex = flowIndicesAdjusted;
-
- string bases = parseHeaderLineToString(inSFF); read.bases = bases;
- vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases); read.qualScores = qualityScores;
- m->gobble(inSFF);
-
- //if you have provided an accosfile and this seq is not in it, then dont print
- bool print = true;
- if (seqNames.size() != 0) { if (seqNames.count(header.name) == 0) { print = false; } }
-
- //print
- if (print) {
- if (fasta) { printFastaSeqData(outFasta, read, header); }
- if (qual) { printQualSeqData(outQual, read, header); }
- if (flow) { printFlowSeqData(outFlow, read, header); }
- }
-
- //report progress
- if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }
-
- if (m->control_pressed) { break; }
- }
-
- //report progress
- if (!m->control_pressed) { if((numReads) % 10000 != 0){ m->mothurOut(toString(numReads)); m->mothurOutEndLine(); } }
-
- inSFF.close();
-
- if (fasta) { outFasta.close(); }
- if (qual) { outQual.close(); }
- if (flow) { outFlow.close(); }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "parseSffTxt");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
- try {
- int number;
-
- while (!file.eof()) {
-
- char c = file.get();
- if (c == ':'){
- file >> number;
- break;
- }
-
- }
- m->gobble(file);
- return number;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
- exit(1);
- }
-
-}
-
-//**********************************************************************************************************************
-
-string SffInfoCommand::parseHeaderLineToString(ifstream& file){
- try {
- string text;
-
- while (!file.eof()) {
- char c = file.get();
-
- if (c == ':'){
- //m->gobble(file);
- //text = m->getline(file);
- file >> text;
- break;
- }
- }
- m->gobble(file);
-
- return text;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
- try {
- vector<unsigned short> floatVector(length);
-
- while (!file.eof()) {
- char c = file.get();
- if (c == ':'){
- float temp;
- for(int i=0;i<length;i++){
- file >> temp;
- floatVector[i] = temp * 100;
- }
- break;
- }
- }
- m->gobble(file);
- return floatVector;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
- try {
- vector<unsigned int> intVector(length);
-
- while (!file.eof()) {
- char c = file.get();
- if (c == ':'){
- for(int i=0;i<length;i++){
- file >> intVector[i];
- }
- break;
- }
- }
- m->gobble(file);
- return intVector;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
- exit(1);
- }
-}
-//***************************************************************************************************************
-
-bool SffInfoCommand::readOligos(string oligoFile){
- try {
- filehandles.clear();
- numSplitReads.clear();
- filehandlesHeaders.clear();
-
- ifstream inOligos;
- m->openInputFile(oligoFile, inOligos);
-
- string type, oligo, group;
-
- int indexPrimer = 0;
- int indexBarcode = 0;
-
- while(!inOligos.eof()){
-
- inOligos >> type;
-
- if(type[0] == '#'){
- while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
- m->gobble(inOligos);
- }
- else{
- m->gobble(inOligos);
- //make type case insensitive
- for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
-
- inOligos >> oligo;
-
- for(int i=0;i<oligo.length();i++){
- oligo[i] = toupper(oligo[i]);
- if(oligo[i] == 'U') { oligo[i] = 'T'; }
- }
-
- if(type == "FORWARD"){
- group = "";
-
- // get rest of line in case there is a primer name
- while (!inOligos.eof()) {
- char c = inOligos.get();
- if (c == 10 || c == 13 || c == -1){ break; }
- else if (c == 32 || c == 9){;} //space or tab
- else { group += c; }
- }
-
- //check for repeat barcodes
- map<string, int>::iterator itPrime = primers.find(oligo);
- if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
-
- primers[oligo]=indexPrimer; indexPrimer++;
- primerNameVector.push_back(group);
- }else if(type == "REVERSE"){
- //Sequence oligoRC("reverse", oligo);
- //oligoRC.reverseComplement();
- string oligoRC = reverseOligo(oligo);
- revPrimer.push_back(oligoRC);
- }
- else if(type == "BARCODE"){
- inOligos >> group;
-
- //check for repeat barcodes
- map<string, int>::iterator itBar = barcodes.find(oligo);
- if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
-
- barcodes[oligo]=indexBarcode; indexBarcode++;
- barcodeNameVector.push_back(group);
- }else if(type == "LINKER"){
- linker.push_back(oligo);
- }else if(type == "SPACER"){
- spacer.push_back(oligo);
- }
- else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
- }
- m->gobble(inOligos);
- }
- inOligos.close();
-
- if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; }
-
- //add in potential combos
- if(barcodeNameVector.size() == 0){
- barcodes[""] = 0;
- barcodeNameVector.push_back("");
- }
-
- if(primerNameVector.size() == 0){
- primers[""] = 0;
- primerNameVector.push_back("");
- }
-
- filehandles.resize(barcodeNameVector.size());
- for(int i=0;i<filehandles.size();i++){
- filehandles[i].assign(primerNameVector.size(), "");
- }
-
- if(split > 1){
- set<string> uniqueNames; //used to cleanup outputFileNames
- for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
- for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
-
- string primerName = primerNameVector[itPrimer->second];
- string barcodeName = barcodeNameVector[itBar->second];
-
- string comboGroupName = "";
- string fastaFileName = "";
- string qualFileName = "";
- string nameFileName = "";
-
- if(primerName == ""){
- comboGroupName = barcodeNameVector[itBar->second];
- }
- else{
- if(barcodeName == ""){
- comboGroupName = primerNameVector[itPrimer->second];
- }
- else{
- comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
- }
- }
-
- ofstream temp;
- map<string, string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
- variables["[group]"] = comboGroupName;
- string thisFilename = getOutputFileName("sff",variables);
- if (uniqueNames.count(thisFilename) == 0) {
- outputNames.push_back(thisFilename);
- outputTypes["sff"].push_back(thisFilename);
- uniqueNames.insert(thisFilename);
- }
-
- filehandles[itBar->second][itPrimer->second] = thisFilename;
- temp.open(thisFilename.c_str(), ios::binary); temp.close();
- }
- }
- }
- numFPrimers = primers.size();
- numLinkers = linker.size();
- numSpacers = spacer.size();
- map<string, string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
- variables["[group]"] = "scrap";
- noMatchFile = getOutputFileName("sff",variables);
- m->mothurRemove(noMatchFile);
-
- bool allBlank = true;
- for (int i = 0; i < barcodeNameVector.size(); i++) {
- if (barcodeNameVector[i] != "") {
- allBlank = false;
- break;
- }
- }
- for (int i = 0; i < primerNameVector.size(); i++) {
- if (primerNameVector[i] != "") {
- allBlank = false;
- break;
- }
- }
-
- filehandlesHeaders.resize(filehandles.size());
- numSplitReads.resize(filehandles.size());
- for (int i = 0; i < filehandles.size(); i++) {
- numSplitReads[i].resize(filehandles[i].size(), 0);
- for (int j = 0; j < filehandles[i].size(); j++) {
- filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");
- }
- }
-
- if (allBlank) {
- m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a split the sff file."); m->mothurOutEndLine();
- split = 1;
- return false;
- }
-
- return true;
-
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "readOligos");
- exit(1);
- }
-}
-//********************************************************************/
-string SffInfoCommand::reverseOligo(string oligo){
- try {
- string reverse = "";
-
- for(int i=oligo.length()-1;i>=0;i--){
-
- if(oligo[i] == 'A') { reverse += 'T'; }
- else if(oligo[i] == 'T'){ reverse += 'A'; }
- else if(oligo[i] == 'U'){ reverse += 'A'; }
-
- else if(oligo[i] == 'G'){ reverse += 'C'; }
- else if(oligo[i] == 'C'){ reverse += 'G'; }
-
- else if(oligo[i] == 'R'){ reverse += 'Y'; }
- else if(oligo[i] == 'Y'){ reverse += 'R'; }
-
- else if(oligo[i] == 'M'){ reverse += 'K'; }
- else if(oligo[i] == 'K'){ reverse += 'M'; }
-
- else if(oligo[i] == 'W'){ reverse += 'W'; }
- else if(oligo[i] == 'S'){ reverse += 'S'; }
-
- else if(oligo[i] == 'B'){ reverse += 'V'; }
- else if(oligo[i] == 'V'){ reverse += 'B'; }
-
- else if(oligo[i] == 'D'){ reverse += 'H'; }
- else if(oligo[i] == 'H'){ reverse += 'D'; }
-
- else { reverse += 'N'; }
- }
-
-
- return reverse;
- }
- catch(exception& e) {
- m->errorOut(e, "SffInfoCommand", "reverseOligo");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-
-
-
+/*\r
+ * sffinfocommand.cpp\r
+ * Mothur\r
+ *\r
+ * Created by westcott on 7/7/10.\r
+ * Copyright 2010 Schloss Lab. All rights reserved.\r
+ *\r
+ */\r
+\r
+#include "sffinfocommand.h"\r
+#include "endiannessmacros.h"\r
+#include "trimoligos.h"\r
+#include "sequence.hpp"\r
+#include "qualityscores.h"\r
+\r
+//**********************************************************************************************************************\r
+vector<string> SffInfoCommand::setParameters(){ \r
+ try { \r
+ CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
+ CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
+ CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
+ CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
+ CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
+ CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
+ CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);\r
+ CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);\r
+ CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);\r
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);\r
+ CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);\r
+ CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);\r
+ CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);\r
+ CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);\r
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);\r
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);\r
+ \r
+ vector<string> myArray;\r
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }\r
+ return myArray;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "setParameters");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+string SffInfoCommand::getHelpString(){ \r
+ try {\r
+ string helpString = "";\r
+ helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
+ helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
+ helpString += "The sff parameter allows you to enter the sff file you would like to extract data from. You may enter multiple files by separating them by -'s.\n";\r
+ helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated. Default=True. \n";\r
+ helpString += "The qfile parameter allows you to indicate if you would like a quality file generated. Default=True. \n";\r
+ helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
+ helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n";\r
+ helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
+ helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
+ helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
+ helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
+ helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated. Default=True. \n";\r
+ helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated. Default=False. \n";\r
+ helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
+ helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values. Default=True. \n";\r
+ helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";\r
+ helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";\r
+ helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";\r
+ return helpString;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "getHelpString");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+string SffInfoCommand::getOutputPattern(string type) {\r
+ try {\r
+ string pattern = "";\r
+ \r
+ if (type == "fasta") { pattern = "[filename],fasta-[filename],[tag],fasta"; }\r
+ else if (type == "flow") { pattern = "[filename],flow"; }\r
+ else if (type == "sfftxt") { pattern = "[filename],sff.txt"; }\r
+ else if (type == "sff") { pattern = "[filename],[group],sff"; }\r
+ else if (type == "qfile") { pattern = "[filename],qual-[filename],[tag],qual"; }\r
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }\r
+ \r
+ return pattern;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "getOutputPattern");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+SffInfoCommand::SffInfoCommand(){ \r
+ try {\r
+ abort = true; calledHelp = true; \r
+ setParameters();\r
+ vector<string> tempOutNames;\r
+ outputTypes["fasta"] = tempOutNames;\r
+ outputTypes["flow"] = tempOutNames;\r
+ outputTypes["sfftxt"] = tempOutNames;\r
+ outputTypes["qfile"] = tempOutNames;\r
+ outputTypes["sff"] = tempOutNames;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+\r
+SffInfoCommand::SffInfoCommand(string option) {\r
+ try {\r
+ abort = false; calledHelp = false; \r
+ hasAccnos = false; hasOligos = false; hasGroup = false;\r
+ split = 1;\r
+ \r
+ //allow user to run help\r
+ if(option == "help") { help(); abort = true; calledHelp = true; }\r
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}\r
+ \r
+ else {\r
+ //valid paramters for this command\r
+ vector<string> myArray = setParameters();\r
+ \r
+ OptionParser parser(option);\r
+ map<string, string> parameters = parser.getParameters();\r
+ \r
+ ValidParameters validParameter;\r
+ //check to make sure all parameters are valid for command\r
+ for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { \r
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }\r
+ }\r
+ \r
+ //initialize outputTypes\r
+ vector<string> tempOutNames;\r
+ outputTypes["fasta"] = tempOutNames;\r
+ outputTypes["flow"] = tempOutNames;\r
+ outputTypes["sfftxt"] = tempOutNames;\r
+ outputTypes["qfile"] = tempOutNames;\r
+ outputTypes["sff"] = tempOutNames;\r
+ \r
+ //if the user changes the output directory command factory will send this info to us in the output parameter \r
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }\r
+ \r
+ //if the user changes the input directory command factory will send this info to us in the output parameter \r
+ string inputDir = validParameter.validFile(parameters, "inputdir", false); if (inputDir == "not found"){ inputDir = ""; }\r
+\r
+ sffFilename = validParameter.validFile(parameters, "sff", false);\r
+ if (sffFilename == "not found") { sffFilename = ""; }\r
+ else { \r
+ m->splitAtDash(sffFilename, filenames);\r
+ \r
+ //go through files and make sure they are good, if not, then disregard them\r
+ for (int i = 0; i < filenames.size(); i++) {\r
+ bool ignore = false;\r
+ if (filenames[i] == "current") { \r
+ filenames[i] = m->getSFFFile(); \r
+ if (filenames[i] != "") { m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }\r
+ else { \r
+ m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
+ //erase from file list\r
+ filenames.erase(filenames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ \r
+ if (!ignore) {\r
+ if (inputDir != "") {\r
+ string path = m->hasPath(filenames[i]);\r
+ //if the user has not given a path then, add inputdir. else leave path alone.\r
+ if (path == "") { filenames[i] = inputDir + filenames[i]; }\r
+ }\r
+ \r
+ ifstream in;\r
+ int ableToOpen = m->openInputFile(filenames[i], in, "noerror");\r
+ \r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getDefaultPath() != "") { //default path is set\r
+ string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);\r
+ m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ filenames[i] = tryPath;\r
+ }\r
+ }\r
+ \r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getOutputDir() != "") { //default path is set\r
+ string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);\r
+ m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ filenames[i] = tryPath;\r
+ }\r
+ }\r
+ \r
+ in.close();\r
+ \r
+ if (ableToOpen == 1) { \r
+ m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+ //erase from file list\r
+ filenames.erase(filenames.begin()+i);\r
+ i--;\r
+ }else { m->setSFFFile(filenames[i]); }\r
+ }\r
+ }\r
+ \r
+ //make sure there is at least one valid file left\r
+ if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
+ }\r
+ \r
+ accnosName = validParameter.validFile(parameters, "accnos", false);\r
+ if (accnosName == "not found") { accnosName = ""; }\r
+ else { \r
+ hasAccnos = true;\r
+ m->splitAtDash(accnosName, accnosFileNames);\r
+ \r
+ //go through files and make sure they are good, if not, then disregard them\r
+ for (int i = 0; i < accnosFileNames.size(); i++) {\r
+ bool ignore = false;\r
+ if (accnosFileNames[i] == "current") { \r
+ accnosFileNames[i] = m->getAccnosFile(); \r
+ if (accnosFileNames[i] != "") { m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
+ else { \r
+ m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
+ //erase from file list\r
+ accnosFileNames.erase(accnosFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ \r
+ if (!ignore) {\r
+ \r
+ if (inputDir != "") {\r
+ string path = m->hasPath(accnosFileNames[i]);\r
+ //if the user has not given a path then, add inputdir. else leave path alone.\r
+ if (path == "") { accnosFileNames[i] = inputDir + accnosFileNames[i]; }\r
+ }\r
+ \r
+ ifstream in;\r
+ int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");\r
+ \r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getDefaultPath() != "") { //default path is set\r
+ string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);\r
+ m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ accnosFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getOutputDir() != "") { //default path is set\r
+ string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);\r
+ m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ accnosFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ in.close();\r
+ \r
+ if (ableToOpen == 1) { \r
+ m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+ //erase from file list\r
+ accnosFileNames.erase(accnosFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ }\r
+ \r
+ //make sure there is at least one valid file left\r
+ if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
+ }\r
+ \r
+ oligosfile = validParameter.validFile(parameters, "oligos", false);\r
+ if (oligosfile == "not found") { oligosfile = ""; }\r
+ else { \r
+ hasOligos = true;\r
+ m->splitAtDash(oligosfile, oligosFileNames);\r
+ \r
+ //go through files and make sure they are good, if not, then disregard them\r
+ for (int i = 0; i < oligosFileNames.size(); i++) {\r
+ bool ignore = false;\r
+ if (oligosFileNames[i] == "current") { \r
+ oligosFileNames[i] = m->getOligosFile(); \r
+ if (oligosFileNames[i] != "") { m->mothurOut("Using " + oligosFileNames[i] + " as input file for the oligos parameter where you had given current."); m->mothurOutEndLine(); }\r
+ else { \r
+ m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
+ //erase from file list\r
+ oligosFileNames.erase(oligosFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ \r
+ if (!ignore) {\r
+ \r
+ if (inputDir != "") {\r
+ string path = m->hasPath(oligosFileNames[i]);\r
+ //if the user has not given a path then, add inputdir. else leave path alone.\r
+ if (path == "") { oligosFileNames[i] = inputDir + oligosFileNames[i]; }\r
+ }\r
+ \r
+ ifstream in;\r
+ int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");\r
+ \r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getDefaultPath() != "") { //default path is set\r
+ string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);\r
+ m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ oligosFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getOutputDir() != "") { //default path is set\r
+ string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);\r
+ m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ oligosFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ in.close();\r
+ \r
+ if (ableToOpen == 1) { \r
+ m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+ //erase from file list\r
+ oligosFileNames.erase(oligosFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ }\r
+ \r
+ //make sure there is at least one valid file left\r
+ if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
+ }\r
+ \r
+ groupfile = validParameter.validFile(parameters, "group", false);\r
+ if (groupfile == "not found") { groupfile = ""; }\r
+ else {\r
+ hasGroup = true;\r
+ m->splitAtDash(groupfile, groupFileNames);\r
+ \r
+ //go through files and make sure they are good, if not, then disregard them\r
+ for (int i = 0; i < groupFileNames.size(); i++) {\r
+ bool ignore = false;\r
+ if (groupFileNames[i] == "current") {\r
+ groupFileNames[i] = m->getGroupFile();\r
+ if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }\r
+ else {\r
+ m->mothurOut("You have no current group file, ignoring current."); m->mothurOutEndLine(); ignore=true;\r
+ //erase from file list\r
+ groupFileNames.erase(groupFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ \r
+ if (!ignore) {\r
+ \r
+ if (inputDir != "") {\r
+ string path = m->hasPath(groupFileNames[i]);\r
+ //if the user has not given a path then, add inputdir. else leave path alone.\r
+ if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }\r
+ }\r
+ \r
+ ifstream in;\r
+ int ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");\r
+ \r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getDefaultPath() != "") { //default path is set\r
+ string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ groupFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ //if you can't open it, try default location\r
+ if (ableToOpen == 1) {\r
+ if (m->getOutputDir() != "") { //default path is set\r
+ string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+ ifstream in2;\r
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+ in2.close();\r
+ groupFileNames[i] = tryPath;\r
+ }\r
+ }\r
+ in.close();\r
+ \r
+ if (ableToOpen == 1) {\r
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+ //erase from file list\r
+ groupFileNames.erase(groupFileNames.begin()+i);\r
+ i--;\r
+ }\r
+ }\r
+ }\r
+ \r
+ //make sure there is at least one valid file left\r
+ if (groupFileNames.size() == 0) { m->mothurOut("no valid group files."); m->mothurOutEndLine(); abort = true; }\r
+ }\r
+\r
+ if (hasGroup) {\r
+ split = 2;\r
+ if (groupFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+ }\r
+ \r
+ if (hasOligos) {\r
+ split = 2;\r
+ if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide an oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+ }\r
+ \r
+ if (hasGroup && hasOligos) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;}\r
+ \r
+ if (hasAccnos) {\r
+ if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+ }\r
+ \r
+ string temp = validParameter.validFile(parameters, "qfile", false); if (temp == "not found"){ temp = "T"; }\r
+ qual = m->isTrue(temp); \r
+ \r
+ temp = validParameter.validFile(parameters, "fasta", false); if (temp == "not found"){ temp = "T"; }\r
+ fasta = m->isTrue(temp); \r
+ \r
+ temp = validParameter.validFile(parameters, "flow", false); if (temp == "not found"){ temp = "T"; }\r
+ flow = m->isTrue(temp); \r
+ \r
+ temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; }\r
+ trim = m->isTrue(temp); \r
+ \r
+ temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found") { temp = "0"; }\r
+ m->mothurConvert(temp, bdiffs);\r
+ \r
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }\r
+ m->mothurConvert(temp, pdiffs);\r
+ \r
+ temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }\r
+ m->mothurConvert(temp, ldiffs);\r
+ \r
+ temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }\r
+ m->mothurConvert(temp, sdiffs);\r
+ \r
+ temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }\r
+ m->mothurConvert(temp, tdiffs);\r
+ \r
+ if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }\r
+ \r
+ temp = validParameter.validFile(parameters, "sfftxt", false); \r
+ if (temp == "not found") { temp = "F"; sfftxt = false; sfftxtFilename = ""; }\r
+ else if (m->isTrue(temp)) { sfftxt = true; sfftxtFilename = ""; }\r
+ else {\r
+ //you are a filename\r
+ if (inputDir != "") {\r
+ map<string,string>::iterator it = parameters.find("sfftxt");\r
+ //user has given a template file\r
+ if(it != parameters.end()){ \r
+ string path = m->hasPath(it->second);\r
+ //if the user has not given a path then, add inputdir. else leave path alone.\r
+ if (path == "") { parameters["sfftxt"] = inputDir + it->second; }\r
+ }\r
+ }\r
+ \r
+ sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);\r
+ if (sfftxtFilename == "not found") { sfftxtFilename = ""; }\r
+ else if (sfftxtFilename == "not open") { sfftxtFilename = ""; }\r
+ }\r
+ \r
+ if ((sfftxtFilename == "") && (filenames.size() == 0)) { \r
+ //if there is a current sff file, use it\r
+ string filename = m->getSFFFile(); \r
+ if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }\r
+ else { m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }\r
+ }\r
+ \r
+ \r
+ }\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::execute(){\r
+ try {\r
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }\r
+ \r
+ for (int s = 0; s < filenames.size(); s++) {\r
+ \r
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }\r
+ \r
+ int start = time(NULL);\r
+ \r
+ filenames[s] = m->getFullPathName(filenames[s]);\r
+ m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();\r
+ \r
+ string accnos = "";\r
+ if (hasAccnos) { accnos = accnosFileNames[s]; }\r
+ \r
+ string oligos = "";\r
+ if (hasOligos) { oligos = oligosFileNames[s]; }\r
+ if (hasGroup) { oligos = groupFileNames[s]; }\r
+ \r
+ int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
+\r
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
+ }\r
+ \r
+ if (sfftxtFilename != "") { parseSffTxt(); }\r
+ \r
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }\r
+ \r
+ //set fasta file as new current fastafile\r
+ string current = "";\r
+ itTypes = outputTypes.find("fasta");\r
+ if (itTypes != outputTypes.end()) {\r
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }\r
+ }\r
+ \r
+ itTypes = outputTypes.find("qfile");\r
+ if (itTypes != outputTypes.end()) {\r
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }\r
+ }\r
+ \r
+ itTypes = outputTypes.find("flow");\r
+ if (itTypes != outputTypes.end()) {\r
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }\r
+ }\r
+ \r
+ //report output filenames\r
+ m->mothurOutEndLine();\r
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();\r
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }\r
+ m->mothurOutEndLine();\r
+\r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "execute");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
+ try {\r
+ currentFileName = input;\r
+ if (outputDir == "") { outputDir += m->hasPath(input); }\r
+ \r
+ if (accnos != "") { readAccnosFile(accnos); }\r
+ else { seqNames.clear(); }\r
+ \r
+ if (hasOligos) { readOligos(oligos); split = 2; }\r
+ if (hasGroup) { readGroup(oligos); split = 2; }\r
+ \r
+ ofstream outSfftxt, outFasta, outQual, outFlow;\r
+ string outFastaFileName, outQualFileName;\r
+ string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
+ if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }\r
+ \r
+ map<string, string> variables; \r
+ variables["[filename]"] = rootName;\r
+ string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
+ string outFlowFileName = getOutputFileName("flow",variables);\r
+ if (!trim) { variables["[tag]"] = "raw"; }\r
+ outFastaFileName = getOutputFileName("fasta",variables);\r
+ outQualFileName = getOutputFileName("qfile",variables);\r
+ \r
+ if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint); outputNames.push_back(sfftxtFileName); outputTypes["sfftxt"].push_back(sfftxtFileName); }\r
+ if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
+ if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); }\r
+ if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); }\r
+ \r
+ ifstream in;\r
+ m->openInputFileBinary(input, in);\r
+ \r
+ CommonHeader header;\r
+ readCommonHeader(in, header);\r
+ \r
+ int count = 0;\r
+ \r
+ //check magic number and version\r
+ if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }\r
+ if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }\r
+ \r
+ //print common header\r
+ if (sfftxt) { printCommonHeader(outSfftxt, header); }\r
+ if (flow) { outFlow << header.numFlowsPerRead << endl; }\r
+ \r
+ //read through the sff file\r
+ while (!in.eof()) {\r
+ \r
+ bool print = true;\r
+ \r
+ //read data\r
+ seqRead read; Header readheader;\r
+ readSeqData(in, read, header.numFlowsPerRead, readheader);\r
+ \r
+ bool okay = sanityCheck(readheader, read);\r
+ if (!okay) { break; }\r
+ \r
+ //if you have provided an accosfile and this seq is not in it, then dont print\r
+ if (seqNames.size() != 0) { if (seqNames.count(readheader.name) == 0) { print = false; } }\r
+ \r
+ //print \r
+ if (print) {\r
+ if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }\r
+ if (fasta) { printFastaSeqData(outFasta, read, readheader); }\r
+ if (qual) { printQualSeqData(outQual, read, readheader); }\r
+ if (flow) { printFlowSeqData(outFlow, read, readheader); }\r
+ }\r
+ \r
+ count++;\r
+ \r
+ //report progress\r
+ if((count+1) % 10000 == 0){ m->mothurOut(toString(count+1)); m->mothurOutEndLine(); }\r
+ \r
+ if (m->control_pressed) { count = 0; break; }\r
+ \r
+ if (count >= header.numReads) { break; }\r
+ }\r
+ \r
+ //report progress\r
+ if (!m->control_pressed) { if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } }\r
+ \r
+ in.close();\r
+ \r
+ if (sfftxt) { outSfftxt.close(); }\r
+ if (fasta) { outFasta.close(); }\r
+ if (qual) { outQual.close(); }\r
+ if (flow) { outFlow.close(); }\r
+ \r
+ if (split > 1) {\r
+ //create new common headers for each file with the correct number of reads\r
+ adjustCommonHeader(header);\r
+ \r
+ if (hasGroup) { delete groupMap; }\r
+ \r
+ //cout << "here" << endl;\r
+ map<string, string>::iterator it;\r
+ set<string> namesToRemove;\r
+ for(int i=0;i<filehandles.size();i++){\r
+ for(int j=0;j<filehandles[0].size();j++){\r
+ //cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << endl;\r
+ if (filehandles[i][j] != "") {\r
+ if (namesToRemove.count(filehandles[i][j]) == 0) {\r
+ if(m->isBlank(filehandles[i][j])){\r
+ //cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
+ m->mothurRemove(filehandles[i][j]);\r
+ m->mothurRemove(filehandlesHeaders[i][j]);\r
+ namesToRemove.insert(filehandles[i][j]);\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
+ //cout << "here2" << endl;\r
+ //append new header to reads\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ for (int j = 0; j < filehandles[i].size(); j++) {\r
+ m->appendBinaryFiles(filehandles[i][j], filehandlesHeaders[i][j]);\r
+ m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);\r
+ m->mothurRemove(filehandlesHeaders[i][j]);\r
+ //cout << i << '\t' << '\t' << j << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
+ if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }\r
+ }\r
+ }\r
+ //cout << "here3" << endl;\r
+ //remove names for outputFileNames, just cleans up the output\r
+ for(int i = 0; i < outputNames.size(); i++) { \r
+ if (namesToRemove.count(outputNames[i]) != 0) {\r
+ //cout << "erasing " << i << '\t' << outputNames[i] << endl;\r
+ outputNames.erase(outputNames.begin()+i);\r
+ i--;\r
+ } \r
+ }\r
+ //cout << "here4" << endl;\r
+ if(m->isBlank(noMatchFile)){ m->mothurRemove(noMatchFile); }\r
+ else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
+ }\r
+ \r
+ return count;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "extractSffInfo");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){\r
+ try {\r
+ \r
+ if (!in.eof()) {\r
+\r
+ //read magic number\r
+ char buffer[4];\r
+ in.read(buffer, 4);\r
+ header.magicNumber = be_int4(*(unsigned int *)(&buffer));\r
+ \r
+ //read version\r
+ char buffer9[4];\r
+ in.read(buffer9, 4);\r
+ header.version = "";\r
+ for (int i = 0; i < 4; i++) { header.version += toString((int)(buffer9[i])); }\r
+ \r
+ //read offset\r
+ char buffer2 [8];\r
+ in.read(buffer2, 8);\r
+ header.indexOffset = be_int8(*(unsigned long long *)(&buffer2));\r
+ \r
+ //read index length\r
+ char buffer3 [4];\r
+ in.read(buffer3, 4);\r
+ header.indexLength = be_int4(*(unsigned int *)(&buffer3));\r
+ \r
+ //read num reads\r
+ char buffer4 [4];\r
+ in.read(buffer4, 4);\r
+ header.numReads = be_int4(*(unsigned int *)(&buffer4));\r
+ \r
+ if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }\r
+ \r
+ //read header length\r
+ char buffer5 [2];\r
+ in.read(buffer5, 2);\r
+ header.headerLength = be_int2(*(unsigned short *)(&buffer5));\r
+ \r
+ //read key length\r
+ char buffer6 [2];\r
+ in.read(buffer6, 2);\r
+ header.keyLength = be_int2(*(unsigned short *)(&buffer6));\r
+ \r
+ //read number of flow reads\r
+ char buffer7 [2];\r
+ in.read(buffer7, 2);\r
+ header.numFlowsPerRead = be_int2(*(unsigned short *)(&buffer7));\r
+ \r
+ //read format code\r
+ char buffer8 [1];\r
+ in.read(buffer8, 1);\r
+ header.flogramFormatCode = (int)(buffer8[0]);\r
+ \r
+ //read flow chars\r
+ char* tempBuffer = new char[header.numFlowsPerRead];\r
+ in.read(&(*tempBuffer), header.numFlowsPerRead); \r
+ header.flowChars = tempBuffer;\r
+ if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead); }\r
+ delete[] tempBuffer;\r
+ \r
+ //read key\r
+ char* tempBuffer2 = new char[header.keyLength];\r
+ in.read(&(*tempBuffer2), header.keyLength);\r
+ header.keySequence = tempBuffer2;\r
+ if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength); }\r
+ delete[] tempBuffer2;\r
+ \r
+ /* Pad to 8 chars */\r
+ unsigned long long spotInFile = in.tellg();\r
+ unsigned long long spot = (spotInFile + 7)& ~7; // ~ inverts\r
+ in.seekg(spot);\r
+ \r
+ }else{\r
+ m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();\r
+ }\r
+ \r
+ return 0;\r
+ \r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "readCommonHeader");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::adjustCommonHeader(CommonHeader header){\r
+ try {\r
+ string endian = m->findEdianness();\r
+ char* mybuffer = new char[4];\r
+ ifstream in;\r
+ m->openInputFileBinary(currentFileName, in);\r
+ \r
+ ofstream outNoMatchHeader;\r
+ string tempNoHeader = "tempNoMatchHeader";\r
+ m->openOutputFileBinary(tempNoHeader, outNoMatchHeader);\r
+ \r
+ //magic number\r
+ in.read(mybuffer,4);\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, in.gcount());\r
+ delete[] mybuffer;\r
+ \r
+ //version\r
+ mybuffer = new char[4];\r
+ in.read(mybuffer,4);\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, in.gcount());\r
+ delete[] mybuffer;\r
+ \r
+ //offset\r
+ mybuffer = new char[8];\r
+ in.read(mybuffer,8);\r
+ unsigned long long offset = 0;\r
+ char* thisbuffer = new char[8];\r
+ thisbuffer[0] = (offset >> 56) & 0xFF;\r
+ thisbuffer[1] = (offset >> 48) & 0xFF;\r
+ thisbuffer[2] = (offset >> 40) & 0xFF;\r
+ thisbuffer[3] = (offset >> 32) & 0xFF;\r
+ thisbuffer[4] = (offset >> 24) & 0xFF;\r
+ thisbuffer[5] = (offset >> 16) & 0xFF;\r
+ thisbuffer[6] = (offset >> 8) & 0xFF;\r
+ thisbuffer[7] = offset & 0xFF;\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer, 8);\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(thisbuffer, 8);\r
+ delete[] thisbuffer;\r
+ delete[] mybuffer;\r
+ \r
+ \r
+ //read index length\r
+ mybuffer = new char[4];\r
+ in.read(mybuffer,4);\r
+ offset = 0;\r
+ char* thisbuffer2 = new char[4];\r
+ thisbuffer2[0] = (offset >> 24) & 0xFF;\r
+ thisbuffer2[1] = (offset >> 16) & 0xFF;\r
+ thisbuffer2[2] = (offset >> 8) & 0xFF;\r
+ thisbuffer2[3] = offset & 0xFF;\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer2, 4);\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(thisbuffer2, 4);\r
+ delete[] thisbuffer2;\r
+ delete[] mybuffer;\r
+ \r
+ //change num reads\r
+ mybuffer = new char[4];\r
+ in.read(mybuffer,4);\r
+ delete[] mybuffer;\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ char* thisbuffer = new char[4];\r
+ if (endian == "BIG_ENDIAN") {\r
+ thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;\r
+ thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;\r
+ thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;\r
+ thisbuffer[3] = numSplitReads[i][j] & 0xFF;\r
+ }else {\r
+ thisbuffer[0] = numSplitReads[i][j] & 0xFF;\r
+ thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF;\r
+ thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
+ thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
+ }\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(thisbuffer, 4);\r
+ out.close();\r
+ delete[] thisbuffer;\r
+ }\r
+ }\r
+ char* thisbuffer3 = new char[4];\r
+ if (endian == "BIG_ENDIAN") {\r
+ thisbuffer3[0] = (numNoMatch >> 24) & 0xFF;\r
+ thisbuffer3[1] = (numNoMatch >> 16) & 0xFF;\r
+ thisbuffer3[2] = (numNoMatch >> 8) & 0xFF;\r
+ thisbuffer3[3] = numNoMatch & 0xFF;\r
+ }else {\r
+ thisbuffer3[0] = numNoMatch & 0xFF;\r
+ thisbuffer3[1] = (numNoMatch >> 8) & 0xFF;\r
+ thisbuffer3[2] = (numNoMatch >> 16) & 0xFF;\r
+ thisbuffer3[3] = (numNoMatch >> 24) & 0xFF;\r
+ }\r
+ outNoMatchHeader.write(thisbuffer3, 4);\r
+ delete[] thisbuffer3;\r
+ \r
+ \r
+ //read header length\r
+ mybuffer = new char[2];\r
+ in.read(mybuffer,2);\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, in.gcount());\r
+ delete[] mybuffer;\r
+ \r
+ //read key length\r
+ mybuffer = new char[2];\r
+ in.read(mybuffer,2);\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, in.gcount());\r
+ delete[] mybuffer;\r
+ \r
+ //read number of flow reads\r
+ mybuffer = new char[2];\r
+ in.read(mybuffer,2);\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, in.gcount());\r
+ delete[] mybuffer;\r
+ \r
+ //read format code\r
+ mybuffer = new char[1];\r
+ in.read(mybuffer,1);\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, in.gcount());\r
+ delete[] mybuffer;\r
+ \r
+ //read flow chars\r
+ mybuffer = new char[header.numFlowsPerRead];\r
+ in.read(mybuffer,header.numFlowsPerRead);\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, in.gcount());\r
+ delete[] mybuffer;\r
+ \r
+ //read key\r
+ mybuffer = new char[header.keyLength];\r
+ in.read(mybuffer,header.keyLength);\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, in.gcount());\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, in.gcount());\r
+ delete[] mybuffer;\r
+ \r
+ \r
+ /* Pad to 8 chars */\r
+ unsigned long long spotInFile = in.tellg();\r
+ unsigned long long spot = (spotInFile + 7)& ~7; // ~ inverts\r
+ in.seekg(spot);\r
+ \r
+ mybuffer = new char[spot-spotInFile];\r
+ for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+ for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+ out.write(mybuffer, spot-spotInFile);\r
+ out.close();\r
+ }\r
+ }\r
+ outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
+ outNoMatchHeader.close();\r
+ delete[] mybuffer;\r
+ in.close();\r
+ \r
+ m->appendBinaryFiles(noMatchFile, tempNoHeader);\r
+ m->renameFile(tempNoHeader, noMatchFile);\r
+ m->mothurRemove(tempNoHeader);\r
+ \r
+ return 0;\r
+ \r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){\r
+ try {\r
+ unsigned long long startSpotInFile = in.tellg();\r
+ if (!in.eof()) {\r
+ \r
+ /*****************************************/\r
+ //read header\r
+ \r
+ //read header length\r
+ char buffer [2];\r
+ in.read(buffer, 2); \r
+ header.headerLength = be_int2(*(unsigned short *)(&buffer));\r
+ \r
+ //read name length\r
+ char buffer2 [2];\r
+ in.read(buffer2, 2);\r
+ header.nameLength = be_int2(*(unsigned short *)(&buffer2));\r
+ \r
+ //read num bases\r
+ char buffer3 [4];\r
+ in.read(buffer3, 4);\r
+ header.numBases = be_int4(*(unsigned int *)(&buffer3));\r
+ \r
+ \r
+ //read clip qual left\r
+ char buffer4 [2];\r
+ in.read(buffer4, 2);\r
+ header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4));\r
+ header.clipQualLeft = 5;\r
+ \r
+ \r
+ //read clip qual right\r
+ char buffer5 [2];\r
+ in.read(buffer5, 2);\r
+ header.clipQualRight = be_int2(*(unsigned short *)(&buffer5));\r
+ \r
+ \r
+ //read clipAdapterLeft\r
+ char buffer6 [2];\r
+ in.read(buffer6, 2);\r
+ header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));\r
+ \r
+ \r
+ //read clipAdapterRight\r
+ char buffer7 [2];\r
+ in.read(buffer7, 2);\r
+ header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));\r
+ \r
+ \r
+ //read name\r
+ char* tempBuffer = new char[header.nameLength];\r
+ in.read(&(*tempBuffer), header.nameLength);\r
+ header.name = tempBuffer;\r
+ if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength); }\r
+ \r
+ delete[] tempBuffer;\r
+ \r
+ //extract info from name\r
+ decodeName(header.timestamp, header.region, header.xy, header.name);\r
+ \r
+ /* Pad to 8 chars */\r
+ unsigned long long spotInFile = in.tellg();\r
+ unsigned long long spot = (spotInFile + 7)& ~7;\r
+ in.seekg(spot);\r
+\r
+ /*****************************************/\r
+ //sequence read \r
+ \r
+ //read flowgram\r
+ read.flowgram.resize(numFlowReads);\r
+ for (int i = 0; i < numFlowReads; i++) { \r
+ char buffer [2];\r
+ in.read(buffer, 2);\r
+ read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));\r
+ }\r
+ \r
+ //read flowIndex\r
+ read.flowIndex.resize(header.numBases);\r
+ for (int i = 0; i < header.numBases; i++) { \r
+ char temp[1];\r
+ in.read(temp, 1);\r
+ read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));\r
+ }\r
+ \r
+ //read bases\r
+ char* tempBuffer6 = new char[header.numBases];\r
+ in.read(&(*tempBuffer6), header.numBases);\r
+ read.bases = tempBuffer6;\r
+ if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases); }\r
+ delete[] tempBuffer6;\r
+\r
+ //read qual scores\r
+ read.qualScores.resize(header.numBases);\r
+ for (int i = 0; i < header.numBases; i++) { \r
+ char temp[1];\r
+ in.read(temp, 1);\r
+ read.qualScores[i] = be_int1(*(unsigned char *)(&temp));\r
+ }\r
+ \r
+ /* Pad to 8 chars */\r
+ spotInFile = in.tellg();\r
+ spot = (spotInFile + 7)& ~7;\r
+ in.seekg(spot);\r
+ \r
+ if (split > 1) { \r
+ \r
+ int barcodeIndex, primerIndex, trashCodeLength;\r
+ \r
+ if (hasOligos) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex); }\r
+ else if (hasGroup) { trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode"); }\r
+ else { m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
+\r
+ char * mybuffer;\r
+ mybuffer = new char [spot-startSpotInFile];\r
+ \r
+ ifstream in2;\r
+ m->openInputFileBinary(currentFileName, in2);\r
+ in2.seekg(startSpotInFile);\r
+ in2.read(mybuffer,spot-startSpotInFile);\r
+ \r
+ \r
+ if(trashCodeLength == 0){\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);\r
+ out.write(mybuffer, in2.gcount());\r
+ out.close();\r
+ numSplitReads[barcodeIndex][primerIndex]++;\r
+ }\r
+ else{\r
+ ofstream out;\r
+ m->openOutputFileBinaryAppend(noMatchFile, out);\r
+ out.write(mybuffer, in2.gcount());\r
+ out.close();\r
+ numNoMatch++;\r
+ }\r
+ delete[] mybuffer;\r
+ in2.close();\r
+ } \r
+ \r
+ }else{\r
+ m->mothurOut("Error reading."); m->mothurOutEndLine();\r
+ }\r
+ \r
+ if (in.eof()) { return true; }\r
+ \r
+ return false;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "readSeqData");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {\r
+ try {\r
+ //find group read belongs to\r
+ TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);\r
+ \r
+ int success = 1;\r
+ string trashCode = "";\r
+ int currentSeqsDiffs = 0;\r
+ \r
+ string seq = read.bases;\r
+ \r
+ if (trim) {\r
+ if(header.clipQualRight < header.clipQualLeft){\r
+ if (header.clipQualRight == 0) { //don't trim right\r
+ seq = seq.substr(header.clipQualLeft-1);\r
+ }else {\r
+ seq = "NNNN";\r
+ }\r
+ }\r
+ else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
+ seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
+ }\r
+ else {\r
+ seq = seq.substr(header.clipQualLeft-1);\r
+ }\r
+ }else{\r
+ //if you wanted the sfftxt then you already converted the bases to the right case\r
+ if (!sfftxt) {\r
+ int endValue = header.clipQualRight;\r
+ //make the bases you want to clip lowercase and the bases you want to keep upper case\r
+ if(endValue == 0){ endValue = seq.length(); }\r
+ for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); }\r
+ for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); }\r
+ for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); }\r
+ }\r
+ }\r
+ \r
+ Sequence currSeq(header.name, seq);\r
+ QualityScores currQual;\r
+ \r
+ if(numLinkers != 0){\r
+ success = trimOligos.stripLinker(currSeq, currQual);\r
+ if(success > ldiffs) { trashCode += 'k'; }\r
+ else{ currentSeqsDiffs += success; }\r
+ \r
+ }\r
+ \r
+ if(barcodes.size() != 0){\r
+ success = trimOligos.stripBarcode(currSeq, currQual, barcode);\r
+ if(success > bdiffs) { trashCode += 'b'; }\r
+ else{ currentSeqsDiffs += success; }\r
+ }\r
+ \r
+ if(numSpacers != 0){\r
+ success = trimOligos.stripSpacer(currSeq, currQual);\r
+ if(success > sdiffs) { trashCode += 's'; }\r
+ else{ currentSeqsDiffs += success; }\r
+ \r
+ }\r
+ \r
+ if(numFPrimers != 0){\r
+ success = trimOligos.stripForward(currSeq, currQual, primer, true);\r
+ if(success > pdiffs) { trashCode += 'f'; }\r
+ else{ currentSeqsDiffs += success; }\r
+ }\r
+ \r
+ if (currentSeqsDiffs > tdiffs) { trashCode += 't'; }\r
+ \r
+ if(revPrimer.size() != 0){\r
+ success = trimOligos.stripReverse(currSeq, currQual);\r
+ if(!success) { trashCode += 'r'; }\r
+ }\r
+\r
+ \r
+ return trashCode.length();\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "findGroup");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, string groupMode) {\r
+ try {\r
+ string trashCode = "";\r
+ primer = 0;\r
+ \r
+ string group = groupMap->getGroup(header.name);\r
+ if (group == "not found") { trashCode += "g"; } //scrap for group\r
+ else { //find file group\r
+ map<string, int>::iterator it = barcodes.find(group);\r
+ if (it != barcodes.end()) {\r
+ barcode = it->second;\r
+ }else { trashCode += "g"; }\r
+ }\r
+ \r
+ return trashCode.length();\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "findGroup");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
+ try {\r
+ \r
+ if (name.length() >= 6) {\r
+ string time = name.substr(0, 6);\r
+ unsigned int timeNum = m->fromBase36(time);\r
+ \r
+ int q1 = timeNum / 60;\r
+ int sec = timeNum - 60 * q1;\r
+ int q2 = q1 / 60;\r
+ int minute = q1 - 60 * q2;\r
+ int q3 = q2 / 24;\r
+ int hr = q2 - 24 * q3;\r
+ int q4 = q3 / 32;\r
+ int day = q3 - 32 * q4;\r
+ int q5 = q4 / 13;\r
+ int mon = q4 - 13 * q5;\r
+ int year = 2000 + q5;\r
+ \r
+ timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);\r
+ }\r
+ \r
+ if (name.length() >= 9) {\r
+ region = name.substr(7, 2);\r
+ \r
+ string xyNum = name.substr(9);\r
+ unsigned int myXy = m->fromBase36(xyNum);\r
+ int x = myXy >> 12;\r
+ int y = myXy & 4095;\r
+ \r
+ xy = toString(x) + "_" + toString(y);\r
+ }\r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "decodeName");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {\r
+ try {\r
+ \r
+ out << "Common Header:\nMagic Number: " << header.magicNumber << endl;\r
+ out << "Version: " << header.version << endl;\r
+ out << "Index Offset: " << header.indexOffset << endl;\r
+ out << "Index Length: " << header.indexLength << endl;\r
+ out << "Number of Reads: " << header.numReads << endl;\r
+ out << "Header Length: " << header.headerLength << endl;\r
+ out << "Key Length: " << header.keyLength << endl;\r
+ out << "Number of Flows: " << header.numFlowsPerRead << endl;\r
+ out << "Format Code: " << header.flogramFormatCode << endl;\r
+ out << "Flow Chars: " << header.flowChars << endl;\r
+ out << "Key Sequence: " << header.keySequence << endl << endl;\r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "printCommonHeader");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printHeader(ofstream& out, Header& header) {\r
+ try {\r
+ \r
+ out << ">" << header.name << endl;\r
+ out << "Run Prefix: " << header.timestamp << endl;\r
+ out << "Region #: " << header.region << endl;\r
+ out << "XY Location: " << header.xy << endl << endl;\r
+ \r
+ out << "Run Name: " << endl;\r
+ out << "Analysis Name: " << endl;\r
+ out << "Full Path: " << endl << endl;\r
+ \r
+ out << "Read Header Len: " << header.headerLength << endl;\r
+ out << "Name Length: " << header.nameLength << endl;\r
+ out << "# of Bases: " << header.numBases << endl;\r
+ out << "Clip Qual Left: " << header.clipQualLeft << endl;\r
+ out << "Clip Qual Right: " << header.clipQualRight << endl;\r
+ out << "Clip Adap Left: " << header.clipAdapterLeft << endl;\r
+ out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;\r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "printHeader");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {\r
+ try {\r
+ bool okay = true;\r
+ string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";\r
+ \r
+ if (header.clipQualLeft > read.bases.length()) {\r
+ okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
+ }\r
+ if (header.clipQualRight > read.bases.length()) {\r
+ okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
+ }\r
+ if (header.clipQualLeft > read.qualScores.size()) {\r
+ okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
+ }\r
+ if (header.clipQualRight > read.qualScores.size()) {\r
+ okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
+ }\r
+ \r
+ if (okay == false) {\r
+ m->mothurOut(message); m->mothurOutEndLine();\r
+ }\r
+ \r
+ return okay;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "sanityCheck");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {\r
+ try {\r
+ out << "Flowgram: ";\r
+ for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t'; }\r
+ \r
+ out << endl << "Flow Indexes: ";\r
+ int sum = 0;\r
+ for (int i = 0; i < read.flowIndex.size(); i++) { sum += read.flowIndex[i]; out << sum << '\t'; }\r
+ \r
+ //make the bases you want to clip lowercase and the bases you want to keep upper case\r
+ int endValue = header.clipQualRight;\r
+ if(endValue == 0){ endValue = read.bases.length(); }\r
+ for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }\r
+ for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { read.bases[i] = toupper(read.bases[i]); }\r
+ for (int i = (endValue-1); i < read.bases.length(); i++) { read.bases[i] = tolower(read.bases[i]); }\r
+ \r
+ out << endl << "Bases: " << read.bases << endl << "Quality Scores: ";\r
+ for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }\r
+ \r
+ \r
+ out << endl << endl;\r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {\r
+ try {\r
+ string seq = read.bases;\r
+ \r
+ if (trim) {\r
+ if(header.clipQualRight < header.clipQualLeft){\r
+ if (header.clipQualRight == 0) { //don't trim right\r
+ seq = seq.substr(header.clipQualLeft-1);\r
+ }else {\r
+ seq = "NNNN";\r
+ }\r
+ }\r
+ else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
+ seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
+ }\r
+ else {\r
+ seq = seq.substr(header.clipQualLeft-1);\r
+ }\r
+ }else{\r
+ //if you wanted the sfftxt then you already converted the bases to the right case\r
+ if (!sfftxt) {\r
+ int endValue = header.clipQualRight;\r
+ //make the bases you want to clip lowercase and the bases you want to keep upper case\r
+ if(endValue == 0){ endValue = seq.length(); }\r
+ for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]); }\r
+ for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) { seq[i] = toupper(seq[i]); }\r
+ for (int i = (endValue-1); i < seq.length(); i++) { seq[i] = tolower(seq[i]); }\r
+ }\r
+ }\r
+ \r
+ out << ">" << header.name << " xy=" << header.xy << endl;\r
+ out << seq << endl;\r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "printFastaSeqData");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {\r
+ try {\r
+ \r
+ if (trim) {\r
+ if(header.clipQualRight < header.clipQualLeft){\r
+ if (header.clipQualRight == 0) { //don't trim right\r
+ out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;\r
+ for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } \r
+ }else {\r
+ out << ">" << header.name << " xy=" << header.xy << endl;\r
+ out << "0\t0\t0\t0";\r
+ }\r
+ }\r
+ else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
+ out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
+ for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) { out << read.qualScores[i] << '\t'; }\r
+ }\r
+ else{\r
+ out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
+ for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; } \r
+ }\r
+ }else{\r
+ out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;\r
+ for (int i = 0; i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }\r
+ }\r
+ \r
+ out << endl;\r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "printQualSeqData");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {\r
+ try {\r
+ \r
+ int endValue = header.clipQualRight;\r
+ if (header.clipQualRight == 0) {\r
+ endValue = read.flowIndex.size();\r
+ if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }\r
+ }\r
+ if(endValue > header.clipQualLeft){\r
+ \r
+ int rightIndex = 0;\r
+ for (int i = 0; i < endValue; i++) { rightIndex += read.flowIndex[i]; }\r
+ \r
+ out << header.name << ' ' << rightIndex;\r
+ for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100); }\r
+ out << endl;\r
+ }\r
+ \r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "printFlowSeqData");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::readAccnosFile(string filename) {\r
+ try {\r
+ //remove old names\r
+ seqNames.clear();\r
+ \r
+ ifstream in;\r
+ m->openInputFile(filename, in);\r
+ string name;\r
+ \r
+ while(!in.eof()){\r
+ in >> name; m->gobble(in);\r
+ \r
+ seqNames.insert(name);\r
+ \r
+ if (m->control_pressed) { seqNames.clear(); break; }\r
+ }\r
+ in.close(); \r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "readAccnosFile");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::parseSffTxt() {\r
+ try {\r
+ \r
+ ifstream inSFF;\r
+ m->openInputFile(sfftxtFilename, inSFF);\r
+ \r
+ if (outputDir == "") { outputDir += m->hasPath(sfftxtFilename); }\r
+ \r
+ //output file names\r
+ ofstream outFasta, outQual, outFlow;\r
+ string outFastaFileName, outQualFileName;\r
+ string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));\r
+ if (fileRoot.length() > 0) {\r
+ //rip off last .\r
+ fileRoot = fileRoot.substr(0, fileRoot.length()-1);\r
+ fileRoot = m->getRootName(fileRoot);\r
+ }\r
+ \r
+ map<string, string> variables; \r
+ variables["[filename]"] = fileRoot;\r
+ string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
+ string outFlowFileName = getOutputFileName("flow",variables);\r
+ if (!trim) { variables["[tag]"] = "raw"; }\r
+ outFastaFileName = getOutputFileName("fasta",variables);\r
+ outQualFileName = getOutputFileName("qfile",variables);\r
+ \r
+ if (fasta) { m->openOutputFile(outFastaFileName, outFasta); outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
+ if (qual) { m->openOutputFile(outQualFileName, outQual); outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName); }\r
+ if (flow) { m->openOutputFile(outFlowFileName, outFlow); outputNames.push_back(outFlowFileName); outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName); }\r
+ \r
+ //read common header\r
+ string commonHeader = m->getline(inSFF);\r
+ string magicNumber = m->getline(inSFF); \r
+ string version = m->getline(inSFF);\r
+ string indexOffset = m->getline(inSFF);\r
+ string indexLength = m->getline(inSFF);\r
+ int numReads = parseHeaderLineToInt(inSFF);\r
+ string headerLength = m->getline(inSFF);\r
+ string keyLength = m->getline(inSFF);\r
+ int numFlows = parseHeaderLineToInt(inSFF);\r
+ string flowgramCode = m->getline(inSFF);\r
+ string flowChars = m->getline(inSFF);\r
+ string keySequence = m->getline(inSFF);\r
+ m->gobble(inSFF);\r
+ \r
+ string seqName;\r
+ \r
+ if (flow) { outFlow << numFlows << endl; }\r
+ \r
+ for(int i=0;i<numReads;i++){\r
+ \r
+ //sanity check\r
+ if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }\r
+ \r
+ Header header;\r
+ \r
+ //parse read header\r
+ inSFF >> seqName;\r
+ seqName = seqName.substr(1);\r
+ m->gobble(inSFF);\r
+ header.name = seqName;\r
+ \r
+ string runPrefix = parseHeaderLineToString(inSFF); header.timestamp = runPrefix;\r
+ string regionNumber = parseHeaderLineToString(inSFF); header.region = regionNumber;\r
+ string xyLocation = parseHeaderLineToString(inSFF); header.xy = xyLocation;\r
+ m->gobble(inSFF);\r
+ \r
+ string runName = parseHeaderLineToString(inSFF);\r
+ string analysisName = parseHeaderLineToString(inSFF);\r
+ string fullPath = parseHeaderLineToString(inSFF);\r
+ m->gobble(inSFF);\r
+ \r
+ string readHeaderLen = parseHeaderLineToString(inSFF); convert(readHeaderLen, header.headerLength);\r
+ string nameLength = parseHeaderLineToString(inSFF); convert(nameLength, header.nameLength);\r
+ int numBases = parseHeaderLineToInt(inSFF); header.numBases = numBases;\r
+ string clipQualLeft = parseHeaderLineToString(inSFF); convert(clipQualLeft, header.clipQualLeft);\r
+ int clipQualRight = parseHeaderLineToInt(inSFF); header.clipQualRight = clipQualRight;\r
+ string clipAdapLeft = parseHeaderLineToString(inSFF); convert(clipAdapLeft, header.clipAdapterLeft);\r
+ string clipAdapRight = parseHeaderLineToString(inSFF); convert(clipAdapRight, header.clipAdapterRight);\r
+ m->gobble(inSFF);\r
+ \r
+ seqRead read;\r
+ \r
+ //parse read\r
+ vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows); read.flowgram = flowVector;\r
+ vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); \r
+ \r
+ //adjust for print\r
+ vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);\r
+ for (int j = 1; j < flowIndices.size(); j++) { flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]); }\r
+ read.flowIndex = flowIndicesAdjusted;\r
+ \r
+ string bases = parseHeaderLineToString(inSFF); read.bases = bases;\r
+ vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases); read.qualScores = qualityScores;\r
+ m->gobble(inSFF);\r
+ \r
+ //if you have provided an accosfile and this seq is not in it, then dont print\r
+ bool print = true;\r
+ if (seqNames.size() != 0) { if (seqNames.count(header.name) == 0) { print = false; } }\r
+ \r
+ //print \r
+ if (print) {\r
+ if (fasta) { printFastaSeqData(outFasta, read, header); }\r
+ if (qual) { printQualSeqData(outQual, read, header); }\r
+ if (flow) { printFlowSeqData(outFlow, read, header); }\r
+ }\r
+ \r
+ //report progress\r
+ if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }\r
+ \r
+ if (m->control_pressed) { break; }\r
+ }\r
+ \r
+ //report progress\r
+ if (!m->control_pressed) { if((numReads) % 10000 != 0){ m->mothurOut(toString(numReads)); m->mothurOutEndLine(); } }\r
+ \r
+ inSFF.close();\r
+ \r
+ if (fasta) { outFasta.close(); }\r
+ if (qual) { outQual.close(); }\r
+ if (flow) { outFlow.close(); }\r
+ \r
+ return 0;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseSffTxt");\r
+ exit(1);\r
+ }\r
+}\r
+//**********************************************************************************************************************\r
+\r
+int SffInfoCommand::parseHeaderLineToInt(ifstream& file){\r
+ try {\r
+ int number;\r
+ \r
+ while (!file.eof()) {\r
+ \r
+ char c = file.get(); \r
+ if (c == ':'){\r
+ file >> number;\r
+ break;\r
+ }\r
+ \r
+ }\r
+ m->gobble(file);\r
+ return number;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");\r
+ exit(1);\r
+ }\r
+ \r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+string SffInfoCommand::parseHeaderLineToString(ifstream& file){\r
+ try {\r
+ string text;\r
+ \r
+ while (!file.eof()) {\r
+ char c = file.get(); \r
+ \r
+ if (c == ':'){\r
+ //m->gobble(file);\r
+ //text = m->getline(file); \r
+ file >> text;\r
+ break;\r
+ }\r
+ }\r
+ m->gobble(file);\r
+ \r
+ return text;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){\r
+ try {\r
+ vector<unsigned short> floatVector(length);\r
+ \r
+ while (!file.eof()) {\r
+ char c = file.get(); \r
+ if (c == ':'){\r
+ float temp;\r
+ for(int i=0;i<length;i++){\r
+ file >> temp;\r
+ floatVector[i] = temp * 100;\r
+ }\r
+ break;\r
+ }\r
+ }\r
+ m->gobble(file); \r
+ return floatVector;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){\r
+ try {\r
+ vector<unsigned int> intVector(length);\r
+ \r
+ while (!file.eof()) {\r
+ char c = file.get(); \r
+ if (c == ':'){\r
+ for(int i=0;i<length;i++){\r
+ file >> intVector[i];\r
+ }\r
+ break;\r
+ }\r
+ }\r
+ m->gobble(file); \r
+ return intVector;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");\r
+ exit(1);\r
+ }\r
+}\r
+//***************************************************************************************************************\r
+\r
+bool SffInfoCommand::readOligos(string oligoFile){\r
+ try {\r
+ filehandles.clear();\r
+ numSplitReads.clear();\r
+ filehandlesHeaders.clear();\r
+ \r
+ ifstream inOligos;\r
+ m->openInputFile(oligoFile, inOligos);\r
+ \r
+ string type, oligo, group;\r
+ \r
+ int indexPrimer = 0;\r
+ int indexBarcode = 0;\r
+ \r
+ while(!inOligos.eof()){\r
+ \r
+ inOligos >> type;\r
+ \r
+ if(type[0] == '#'){\r
+ while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there\r
+ m->gobble(inOligos);\r
+ }\r
+ else{\r
+ m->gobble(inOligos);\r
+ //make type case insensitive\r
+ for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }\r
+ \r
+ inOligos >> oligo;\r
+ \r
+ for(int i=0;i<oligo.length();i++){\r
+ oligo[i] = toupper(oligo[i]);\r
+ if(oligo[i] == 'U') { oligo[i] = 'T'; }\r
+ }\r
+ \r
+ if(type == "FORWARD"){\r
+ group = "";\r
+ \r
+ // get rest of line in case there is a primer name\r
+ while (!inOligos.eof()) {\r
+ char c = inOligos.get();\r
+ if (c == 10 || c == 13 || c == -1){ break; }\r
+ else if (c == 32 || c == 9){;} //space or tab\r
+ else { group += c; }\r
+ }\r
+ \r
+ //check for repeat barcodes\r
+ map<string, int>::iterator itPrime = primers.find(oligo);\r
+ if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
+ \r
+ primers[oligo]=indexPrimer; indexPrimer++;\r
+ primerNameVector.push_back(group);\r
+ \r
+ }else if(type == "REVERSE"){\r
+ //Sequence oligoRC("reverse", oligo);\r
+ //oligoRC.reverseComplement();\r
+ string oligoRC = reverseOligo(oligo);\r
+ revPrimer.push_back(oligoRC);\r
+ }\r
+ else if(type == "BARCODE"){\r
+ inOligos >> group;\r
+ \r
+ \r
+ //check for repeat barcodes\r
+ map<string, int>::iterator itBar = barcodes.find(oligo);\r
+ if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }\r
+ \r
+ barcodes[oligo]=indexBarcode; indexBarcode++;\r
+ barcodeNameVector.push_back(group);\r
+ }else if(type == "LINKER"){\r
+ linker.push_back(oligo);\r
+ }else if(type == "SPACER"){\r
+ spacer.push_back(oligo);\r
+ }\r
+ else{ m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
+ }\r
+ m->gobble(inOligos);\r
+ }\r
+ inOligos.close();\r
+ \r
+ if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1; }\r
+ \r
+ //add in potential combos\r
+ if(barcodeNameVector.size() == 0){\r
+ barcodes[""] = 0;\r
+ barcodeNameVector.push_back("");\r
+ }\r
+ \r
+ if(primerNameVector.size() == 0){\r
+ primers[""] = 0;\r
+ primerNameVector.push_back("");\r
+ }\r
+ \r
+ filehandles.resize(barcodeNameVector.size());\r
+ for(int i=0;i<filehandles.size();i++){\r
+ filehandles[i].assign(primerNameVector.size(), "");\r
+ }\r
+ \r
+ if(split > 1){\r
+ set<string> uniqueNames; //used to cleanup outputFileNames\r
+ for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
+ for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
+ \r
+ string primerName = primerNameVector[itPrimer->second];\r
+ string barcodeName = barcodeNameVector[itBar->second];\r
+ \r
+ string comboGroupName = "";\r
+ string fastaFileName = "";\r
+ string qualFileName = "";\r
+ string nameFileName = "";\r
+ \r
+ if(primerName == ""){\r
+ comboGroupName = barcodeNameVector[itBar->second];\r
+ }\r
+ else{\r
+ if(barcodeName == ""){\r
+ comboGroupName = primerNameVector[itPrimer->second];\r
+ }\r
+ else{\r
+ comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
+ }\r
+ }\r
+ \r
+ ofstream temp;\r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = comboGroupName;\r
+ string thisFilename = getOutputFileName("sff",variables);\r
+ if (uniqueNames.count(thisFilename) == 0) {\r
+ outputNames.push_back(thisFilename);\r
+ outputTypes["sff"].push_back(thisFilename);\r
+ uniqueNames.insert(thisFilename);\r
+ }\r
+ \r
+ filehandles[itBar->second][itPrimer->second] = thisFilename;\r
+ temp.open(thisFilename.c_str(), ios::binary); temp.close();\r
+ }\r
+ }\r
+ }\r
+ numFPrimers = primers.size();\r
+ numLinkers = linker.size();\r
+ numSpacers = spacer.size();\r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = "scrap";\r
+ noMatchFile = getOutputFileName("sff",variables);\r
+ m->mothurRemove(noMatchFile);\r
+ numNoMatch = 0;\r
+ \r
+ \r
+ bool allBlank = true;\r
+ for (int i = 0; i < barcodeNameVector.size(); i++) {\r
+ if (barcodeNameVector[i] != "") {\r
+ allBlank = false;\r
+ break;\r
+ }\r
+ }\r
+ for (int i = 0; i < primerNameVector.size(); i++) {\r
+ if (primerNameVector[i] != "") {\r
+ allBlank = false;\r
+ break;\r
+ }\r
+ }\r
+ \r
+ filehandlesHeaders.resize(filehandles.size());\r
+ numSplitReads.resize(filehandles.size());\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ numSplitReads[i].resize(filehandles[i].size(), 0);\r
+ for (int j = 0; j < filehandles[i].size(); j++) {\r
+ filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");\r
+ }\r
+ }\r
+ \r
+ if (allBlank) {\r
+ m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a split the sff file."); m->mothurOutEndLine();\r
+ split = 1;\r
+ return false;\r
+ }\r
+ \r
+ return true;\r
+ \r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "readOligos");\r
+ exit(1);\r
+ }\r
+}\r
+//***************************************************************************************************************\r
+\r
+bool SffInfoCommand::readGroup(string oligoFile){\r
+ try {\r
+ filehandles.clear();\r
+ numSplitReads.clear();\r
+ filehandlesHeaders.clear();\r
+ barcodes.clear();\r
+ \r
+ groupMap = new GroupMap();\r
+ groupMap->readMap(oligoFile);\r
+ \r
+ //like barcodeNameVector - no primer names\r
+ vector<string> groups = groupMap->getNamesOfGroups();\r
+ \r
+ filehandles.resize(groups.size());\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ for (int j = 0; j < 1; j++) {\r
+ \r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = groups[i];\r
+ string thisFilename = getOutputFileName("sff",variables);\r
+ outputNames.push_back(thisFilename);\r
+ outputTypes["sff"].push_back(thisFilename);\r
+ \r
+ ofstream temp;\r
+ m->openOutputFileBinary(thisFilename, temp); temp.close();\r
+ filehandles[i].push_back(thisFilename);\r
+ barcodes[groups[i]] = i;\r
+ }\r
+ }\r
+ \r
+ map<string, string> variables;\r
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+ variables["[group]"] = "scrap";\r
+ noMatchFile = getOutputFileName("sff",variables);\r
+ m->mothurRemove(noMatchFile);\r
+ numNoMatch = 0;\r
+ \r
+ \r
+ filehandlesHeaders.resize(groups.size());\r
+ numSplitReads.resize(filehandles.size());\r
+ for (int i = 0; i < filehandles.size(); i++) {\r
+ numSplitReads[i].resize(filehandles[i].size(), 0);\r
+ for (int j = 0; j < filehandles[i].size(); j++) {\r
+ ofstream temp ;\r
+ string thisHeader = filehandles[i][j]+"headers";\r
+ m->openOutputFileBinary(thisHeader, temp); temp.close();\r
+ filehandlesHeaders[i].push_back(thisHeader);\r
+ }\r
+ }\r
+ \r
+ return true;\r
+ \r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "readGroup");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//********************************************************************/\r
+string SffInfoCommand::reverseOligo(string oligo){\r
+ try {\r
+ string reverse = "";\r
+ \r
+ for(int i=oligo.length()-1;i>=0;i--){\r
+ \r
+ if(oligo[i] == 'A') { reverse += 'T'; }\r
+ else if(oligo[i] == 'T'){ reverse += 'A'; }\r
+ else if(oligo[i] == 'U'){ reverse += 'A'; }\r
+ \r
+ else if(oligo[i] == 'G'){ reverse += 'C'; }\r
+ else if(oligo[i] == 'C'){ reverse += 'G'; }\r
+ \r
+ else if(oligo[i] == 'R'){ reverse += 'Y'; }\r
+ else if(oligo[i] == 'Y'){ reverse += 'R'; }\r
+ \r
+ else if(oligo[i] == 'M'){ reverse += 'K'; }\r
+ else if(oligo[i] == 'K'){ reverse += 'M'; }\r
+ \r
+ else if(oligo[i] == 'W'){ reverse += 'W'; }\r
+ else if(oligo[i] == 'S'){ reverse += 'S'; }\r
+ \r
+ else if(oligo[i] == 'B'){ reverse += 'V'; }\r
+ else if(oligo[i] == 'V'){ reverse += 'B'; }\r
+ \r
+ else if(oligo[i] == 'D'){ reverse += 'H'; }\r
+ else if(oligo[i] == 'H'){ reverse += 'D'; }\r
+ \r
+ else { reverse += 'N'; }\r
+ }\r
+ \r
+ \r
+ return reverse;\r
+ }\r
+ catch(exception& e) {\r
+ m->errorOut(e, "SffInfoCommand", "reverseOligo");\r
+ exit(1);\r
+ }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+\r
+ \r
+ \r
*/
#include "command.hpp"
-
+#include "groupmap.h"
/**********************************************************/
struct CommonHeader {
void help() { m->mothurOut(getHelpString()); }
private:
- string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile;
- vector<string> filenames, outputNames, accnosFileNames, oligosFileNames;
- bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos;
- int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs;
+ string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile, groupfile;
+ vector<string> filenames, outputNames, accnosFileNames, oligosFileNames, groupFileNames;
+ bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup;
+ int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
set<string> seqNames;
map<string, int> barcodes;
map<string, int> primers;
+ GroupMap* groupMap;
vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
vector<vector<int> > numSplitReads;
- vector<vector<string> > filehandles, filehandlesHeaders;
+ vector<vector<string> > filehandles;
+ vector<vector<string> > filehandlesHeaders;
//extract sff file functions
int extractSffInfo(string, string, string);
int readCommonHeader(ifstream&, CommonHeader&);
- //int readHeader(ifstream&, Header&);
- int readSeqData(ifstream&, seqRead&, int, Header&);
+ int readHeader(ifstream&, Header&);
+ bool readSeqData(ifstream&, seqRead&, int, Header&);
int decodeName(string&, string&, string&, string);
bool readOligos(string oligosFile);
+ bool readGroup(string oligosFile);
int printCommonHeader(ofstream&, CommonHeader&);
int printHeader(ofstream&, Header&);
bool sanityCheck(Header&, seqRead&);
int adjustCommonHeader(CommonHeader);
int findGroup(Header header, seqRead read, int& barcode, int& primer);
+ int findGroup(Header header, seqRead read, int& barcode, int& primer, string);
string reverseOligo(string oligo);
//parsesfftxt file functions
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
else {
string path;
path += "lookupFiles\\";
#endif
lookupFileName = m->getFullPathName(path) + "LookUp_Titanium.pat";
-
- int ableToOpen;
- ifstream in;
- ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
- in.close();
-
- //if you can't open it, try input location
- if (ableToOpen == 1) {
- if (inputDir != "") { //default path is set
- string tryPath = inputDir + m->getSimpleName(lookupFileName);
- m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- lookupFileName = tryPath;
- }
- }
-
- //if you can't open it, try default location
- if (ableToOpen == 1) {
- if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
- m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- lookupFileName = tryPath;
- }
- }
-
- //if you can't open it its not in current working directory or inputDir, try mothur excutable location
- if (ableToOpen == 1) {
- string exepath = m->argv;
- string tempPath = exepath;
- for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
- exepath = exepath.substr(0, (tempPath.find_last_of('m')));
-
- string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
- m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- lookupFileName = tryPath;
- }
-
- if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
- }
- else if(temp == "not open") {
+ bool ableToOpen = m->checkLocations(lookupFileName, inputDir);
+ if (!ableToOpen) { abort=true; }
+ }else if(temp == "not open") {
lookupFileName = validParameter.validFile(parameters, "lookup", false);
vector<string> sffFiles, oligosFiles;
readFile(sffFiles, oligosFiles);
- outputDir = m->hasPath(filename);
- string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
+ string thisOutputDir = outputDir;
+ if (thisOutputDir == "") { thisOutputDir = m->hasPath(filename); }
+ string fileroot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
map<string, string> variables;
variables["[filename]"] = fileroot;
string fasta = getOutputFileName("fasta",variables);
if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); }
}
+ m->setProcessors(toString(processors));
+
//report output filenames
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
in >> sff;
- sff = m->getFullPathName(sff);
-
//ignore file pairing
if(sff[0] == '#'){ while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } m->gobble(in); }
else { //check for oligos file
+ bool ableToOpenSff = m->checkLocations(sff, inputDir);
+
oligos = "";
// get rest of line in case there is a oligos filename
if (c == 10 || c == 13 || c == -1){ break; }
else if (c == 32 || c == 9){;} //space or tab
else { oligos += c; }
- }
- sffFiles.push_back(sff);
- if (oligos != "") { oligos = m->getFullPathName(oligos); allBlank = false; }
- if (oligos == "") { allFull = false; }
- oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
+ }
+
+ if (ableToOpenSff) {
+ sffFiles.push_back(sff);
+ if (oligos != "") {
+ bool ableToOpenOligos = m->checkLocations(oligos, inputDir);
+ if (ableToOpenOligos) { allBlank = false; }
+ else { m->mothurOut("Can not find " + oligos + ". Ignoring.\n"); oligos = ""; }
+ }
+ if (oligos == "") { allFull = false; }
+ oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
+ }else { m->mothurOut("Can not find " + sff + ". Ignoring.\n"); }
}
m->gobble(in);
}
m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(sffFiles.size()) + ")\t<<<<<\n");
//run sff.info
+ string redirects = "";
+ if (inputDir != "") { redirects += ", inputdir=" + inputDir; }
+ if (outputDir != "") { redirects += ", outputdir=" + outputDir; }
string inputString = "sff=" + sff + ", flow=T";
if (trim) { inputString += ", trim=T"; }
+ if (redirects != "") { inputString += redirects; }
m->mothurOut("/******************************************/"); m->mothurOutEndLine();
m->mothurOut("Running command: sffinfo(" + inputString + ")"); m->mothurOutEndLine();
m->mothurCalling = true;
m->mothurCalling = false;
m->mothurOutEndLine();
+ redirects = "";
+ if (outputDir != "") { redirects += ", outputdir=" + outputDir; }
+
//run summary.seqs on the fasta file
string fastaFile = "";
map<string, vector<string> >::iterator it = filenames.find("fasta");
else { m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break; }
inputString = "fasta=" + fastaFile + ", processors=1";
+ if (redirects != "") { inputString += redirects; }
m->mothurOutEndLine();
m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine();
m->mothurCalling = true;
inputString += ", maxhomop=" + toString(maxHomoP) + ", maxflows=" + toString(maxFlows) + ", minflows=" + toString(minFlows);
inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
inputString += ", tdiffs=" + toString(tdiffs) + ", signal=" + toString(signal) + ", noise=" + toString(noise) + ", order=" + flowOrder + ", processors=1";
-
+ if (redirects != "") { inputString += redirects; }
m->mothurOutEndLine();
m->mothurOut("Running command: trim.flows(" + inputString + ")"); m->mothurOutEndLine();
m->mothurCalling = true;
inputString += ", sigma=" +toString(sigma);
inputString += ", mindelta=" + toString(minDelta);
inputString += ", order=" + flowOrder + ", processors=1";
-
+ if (redirects != "") { inputString += redirects; }
//run shhh.flows
m->mothurOutEndLine();
m->mothurOut("Running command: shhh.flows(" + inputString + ")"); m->mothurOutEndLine();
if (keepFirst != 0) { inputString += ", keepfirst=" + toString(keepFirst); }
if (removeLast != 0) { inputString += ", removelast=" + toString(removeLast); }
inputString += ", processors=1";
-
+ if (redirects != "") { inputString += redirects; }
//run trim.seqs
m->mothurOutEndLine();
m->mothurOut("Running command: trim.seqs(" + inputString + ")"); m->mothurOutEndLine();
}
inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile;
+ if (redirects != "") { inputString += redirects; }
m->mothurOutEndLine();
m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine();
m->mothurCalling = true;
m->appendFiles(nameFile, name);
if (makeGroup) { m->appendFiles(groupFile, group); }
}
- count++;
+
for (it = filenames.begin(); it != filenames.end(); it++) {
for (int i = 0; i < (it->second).size(); i++) {
outputNames.push_back((it->second)[i]); outputTypes[it->first].push_back((it->second)[i]);
}
}
+ count++;
}
return count;
int end;
linePair(int i, int j) : start(i), end(j) {}
};
-
+
+ string inputDir;
string filename, outputDir, flowOrder, lookupFileName, minDelta;
vector<string> outputNames;
bool abort, trim, large, flip, allFiles, keepforward, append, makeGroup;
try {
string pattern = "";
- if (type == "shared") { pattern = "[filename],shared"; }
+ if (type == "shared") { pattern = "[filename],shared-[filename],[distance],shared"; }
else if (type == "rabund") { pattern = "[filename],[group],rabund"; }
else if (type == "group") { pattern = "[filename],[group],groups"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
- //getting output filename
- string filename = "";
- if (listfile != "") { filename = listfile; }
- else { filename = biomfile; }
-
- if (outputDir == "") { outputDir += m->hasPath(filename); }
-
- map<string, string> variables;
- variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
- filename = getOutputFileName("shared",variables);
- outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
-
- if (listfile != "") { createSharedFromListGroup(filename); }
- else { createSharedFromBiom(filename); }
+
+ if (listfile != "") { createSharedFromListGroup(); }
+ else { createSharedFromBiom(); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } }
}
}
//**********************************************************************************************************************
-int SharedCommand::createSharedFromBiom(string filename) {
+int SharedCommand::createSharedFromBiom() {
try {
+ //getting output filename
+ string filename = biomfile;
+ if (outputDir == "") { outputDir += m->hasPath(filename); }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+ filename = getOutputFileName("shared",variables);
+ outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+
ofstream out;
m->openOutputFile(filename, out);
if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
else {
string thisLine = it->second;
- m->currentBinLabels = otuNames;
+ m->currentSharedBinLabels = otuNames;
//read data
vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
newBinLabels.push_back(binLabel);
}
for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
thislookup = newLookup;
- m->currentBinLabels = newBinLabels;
+ m->currentSharedBinLabels = newBinLabels;
return 0;
}
}
//**********************************************************************************************************************
-int SharedCommand::createSharedFromListGroup(string filename) {
+int SharedCommand::createSharedFromListGroup() {
try {
- ofstream out;
- m->openOutputFile(filename, out);
GroupMap* groupMap = NULL;
CountTable* countTable = NULL;
m->setGroups(Groups);
}else { pickedGroups = true; }
+
+ ofstream out;
+ string filename = "";
+ if (!pickedGroups) {
+ string filename = listfile;
+ if (outputDir == "") { outputDir += m->hasPath(filename); }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+ filename = getOutputFileName("shared",variables);
+ outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+ m->openOutputFile(filename, out);
+ }
+
//fill filehandles with neccessary ofstreams
int i;
ofstream* temp;
if (m->control_pressed) {
delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
- out.close(); m->mothurRemove(filename);
+ out.close(); if (!pickedGroups) { m->mothurRemove(filename); }
for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
string rabundFIleName = getOutputFileName("rabund",variables);
m->mothurRemove(rabundFIleName); }
if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error
m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
- out.close(); m->mothurRemove(filename); //remove blank shared file you made
+ out.close(); if (!pickedGroups) { m->mothurRemove(filename); } //remove blank shared file you made
//delete memory
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
string groups = "";
if (m->getNumGroups() < 4) {
- for (int i = 0; i < m->getNumGroups(); i++) {
+ for (int i = 0; i < m->getNumGroups()-1; i++) {
groups += (m->getGroups())[i] + ".";
}
+ groups+=(m->getGroups())[m->getNumGroups()-1];
}else { groups = "merge"; }
map<string, string> variables;
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
if (m->control_pressed) {
delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
- out.close(); m->mothurRemove(filename);
+ if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
string rabundFIleName = getOutputFileName("rabund",variables);
m->mothurRemove(rabundFIleName); }
lookup = SharedList->getSharedRAbundVector();
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (pickedGroups) { //check for otus with no seqs in them
- eliminateZeroOTUS(lookup);
- }
if (m->control_pressed) {
delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
- out.close(); m->mothurRemove(filename);
+ if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
string rabundFIleName = getOutputFileName("rabund",variables);
m->mothurRemove(rabundFIleName); }
return 0;
}
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
- printSharedData(lookup, out); //prints info to the .shared file
+ //if picked groups must split the shared file by label
+ if (pickedGroups) {
+ string filename = listfile;
+ if (outputDir == "") { outputDir += m->hasPath(filename); }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+ variables["[distance]"] = lookup[0]->getLabel();
+ filename = getOutputFileName("shared",variables);
+ outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+ ofstream out2;
+ m->openOutputFile(filename, out2);
+
+ vector<string> savedLabels = m->currentSharedBinLabels;
+ eliminateZeroOTUS(lookup);
+ lookup[0]->printHeaders(out2);
+ printSharedData(lookup, out2);
+ out2.close();
+ m->currentSharedBinLabels = savedLabels; //restore old labels
+
+ }else {
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
+ printSharedData(lookup, out); //prints info to the .shared file
+ }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
processedLabels.insert(SharedList->getLabel());
lookup = SharedList->getSharedRAbundVector();
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (pickedGroups) { //check for otus with no seqs in them
- eliminateZeroOTUS(lookup);
- }
-
if (m->control_pressed) {
delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
- out.close(); m->mothurRemove(filename);
+ if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
string rabundFIleName = getOutputFileName("rabund",variables);
m->mothurRemove(rabundFIleName); }
return 0;
}
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
- printSharedData(lookup, out); //prints info to the .shared file
+ //if picked groups must split the shared file by label
+ if (pickedGroups) {
+ string filename = listfile;
+ if (outputDir == "") { outputDir += m->hasPath(filename); }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+ variables["[distance]"] = lookup[0]->getLabel();
+ filename = getOutputFileName("shared",variables);
+ outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+ ofstream out2;
+ m->openOutputFile(filename, out2);
+
+ vector<string> savedLabels = m->currentSharedBinLabels;
+ eliminateZeroOTUS(lookup);
+ lookup[0]->printHeaders(out2);
+ printSharedData(lookup, out2);
+ out2.close();
+ m->currentSharedBinLabels = savedLabels; //restore old labels
+
+ }else {
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
+ printSharedData(lookup, out); //prints info to the .shared file
+ }
+
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
processedLabels.insert(SharedList->getLabel());
lookup = SharedList->getSharedRAbundVector();
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- if (pickedGroups) { //check for otus with no seqs in them
- eliminateZeroOTUS(lookup);
- }
if (m->control_pressed) {
if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
- out.close(); m->mothurRemove(filename);
+ if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
string rabundFIleName = getOutputFileName("rabund",variables);
m->mothurRemove(rabundFIleName); }
return 0;
}
- if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
- printSharedData(lookup, out); //prints info to the .shared file
+ //if picked groups must split the shared file by label
+ if (pickedGroups) {
+ string filename = listfile;
+ if (outputDir == "") { outputDir += m->hasPath(filename); }
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+ variables["[distance]"] = lookup[0]->getLabel();
+ filename = getOutputFileName("shared",variables);
+ outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+ ofstream out2;
+ m->openOutputFile(filename, out2);
+
+ vector<string> savedLabels = m->currentSharedBinLabels;
+ eliminateZeroOTUS(lookup);
+ lookup[0]->printHeaders(out2);
+ printSharedData(lookup, out2);
+ out2.close();
+ m->currentSharedBinLabels = savedLabels; //restore old labels
+
+ }else {
+ if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
+ printSharedData(lookup, out); //prints info to the .shared file
+ }
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
delete SharedList;
}
- out.close();
+ if (!pickedGroups) { out.close(); }
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
delete it3->second;
if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
if (m->control_pressed) {
- m->mothurRemove(filename);
+ if (!pickedGroups) { m->mothurRemove(filename); }
for (int i=0; i<Groups.size(); i++) { variables["[group]"] = Groups[i];
string rabundFIleName = getOutputFileName("rabund",variables);
m->mothurRemove(rabundFIleName); }
bool isValidGroup(string, vector<string>);
int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
int ListGroupSameSeqs(vector<string>&, SharedListVector*);
- int createSharedFromListGroup(string);
- int createSharedFromBiom(string);
+ int createSharedFromListGroup();
+ int createSharedFromBiom();
string getTag(string&);
vector<string> readRows(string, int&);
int getDims(string, int&, int&);
countTable->readTable(m->getCountTableFile(), true, false);
}
- int hold;
- string inputData;
- f >> label >> hold;
-
- data.assign(hold, "");
+ int hold;
+
+ //are we at the beginning of the file??
+ if (m->saveNextLabel == "") {
+ f >> label;
+
+ //is this a shared file that has headers
+ if (label == "label") {
+
+ //gets "numOtus"
+ f >> label; m->gobble(f);
+
+ //eat rest of line
+ label = m->getline(f); m->gobble(f);
+
+ //parse labels to save
+ istringstream iStringStream(label);
+ m->listBinLabelsInFile.clear();
+ while(!iStringStream.eof()){
+ if (m->control_pressed) { break; }
+ string temp;
+ iStringStream >> temp; m->gobble(iStringStream);
+
+ m->listBinLabelsInFile.push_back(temp);
+ }
+
+ f >> label >> hold;
+ }else {
+ //read in first row
+ f >> hold;
+
+ //make binlabels because we don't have any
+ string snumBins = toString(hold);
+ m->listBinLabelsInFile.clear();
+ for (int i = 0; i < hold; i++) {
+ //if there is a bin label use it otherwise make one
+ string binLabel = "Otu";
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ m->listBinLabelsInFile.push_back(binLabel);
+ }
+ }
+ m->saveNextLabel = label;
+ }else {
+ f >> label >> hold;
+ m->saveNextLabel = label;
+ }
+
+ binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold);
+ data.assign(hold, "");
+ string inputData = "";
+
for(int i=0;i<hold;i++){
f >> inputData;
set(i, inputData);
}
+ m->gobble(f);
+
+ if (f.eof()) { m->saveNextLabel = ""; }
}
catch(exception& e) {
string SharedListVector::get(int index){
return data[index];
}
+/***********************************************************************/
+
+void SharedListVector::setLabels(vector<string> labels){
+ try {
+ binLabels = labels;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SharedListVector", "setLabels");
+ exit(1);
+ }
+}
+/***********************************************************************/
+//could potentially end up with duplicate binlabel names with code below.
+//we don't currently use them in a way that would do that.
+//if you had a listfile that had been subsampled and then added to it, dup names would be possible.
+vector<string> SharedListVector::getLabels(){
+ try {
+ string tagHeader = "Otu";
+ if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; }
+
+ if (binLabels.size() < data.size()) {
+ string snumBins = toString(numBins);
+
+ for (int i = 0; i < numBins; i++) {
+ string binLabel = tagHeader;
+
+ if (i < binLabels.size()) { //label exists, check leading zeros length
+ string sbinNumber = m->getSimpleLabel(binLabels[i]);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ binLabels[i] = binLabel;
+ }else{
+ string sbinNumber = toString(i+1);
+ if (sbinNumber.length() < snumBins.length()) {
+ int diff = snumBins.length() - sbinNumber.length();
+ for (int h = 0; h < diff; h++) { binLabel += "0"; }
+ }
+ binLabel += sbinNumber;
+ binLabels.push_back(binLabel);
+ }
+ }
+ }
+ return binLabels;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SharedListVector", "getLabels");
+ exit(1);
+ }
+}
/***********************************************************************/
void SharedListVector::push_back(string seqNames){
/***********************************************************************/
SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
try {
+ m->currentSharedBinLabels = binLabels;
+
SharedRAbundVector rav(data.size());
for(int i=0;i<numBins;i++){
/***********************************************************************/
vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
try {
+ m->currentSharedBinLabels = binLabels;
+
SharedUtil* util;
util = new SharedUtil();
vector<SharedRAbundVector*> lookup; //contains just the groups the user selected
SharedListVector();
SharedListVector(int);
SharedListVector(ifstream&);
- SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; countTable = NULL; };
+ SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs), binLabels(lv.binLabels) { groupmap = NULL; countTable = NULL; };
~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } if (countTable != NULL) { delete countTable; } };
int getNumBins() { return numBins; }
void set(int, string);
string get(int);
+ vector<string> getLabels();
+ void setLabels(vector<string>);
void push_back(string);
void resize(int);
void clear();
int maxRank;
int numBins;
int numSeqs;
+ vector<string> binLabels;
};
//parse labels to save
istringstream iStringStream(label);
- m->binLabelsInFile.clear();
+ m->sharedBinLabelsInFile.clear();
while(!iStringStream.eof()){
if (m->control_pressed) { break; }
string temp;
iStringStream >> temp; m->gobble(iStringStream);
- m->binLabelsInFile.push_back(temp);
+ m->sharedBinLabelsInFile.push_back(temp);
}
f >> label;
}else { label = m->saveNextLabel; }
//reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
- m->currentBinLabels = m->binLabelsInFile;
+ m->currentSharedBinLabels = m->sharedBinLabelsInFile;
//read in first row since you know there is at least 1 group.
f >> groupN >> num;
//parse labels to save
istringstream iStringStream(label);
- m->binLabelsInFile.clear();
+ m->sharedBinLabelsInFile.clear();
while(!iStringStream.eof()){
if (m->control_pressed) { break; }
string temp;
iStringStream >> temp; m->gobble(iStringStream);
- m->binLabelsInFile.push_back(temp);
+ m->sharedBinLabelsInFile.push_back(temp);
}
f >> label >> groupN >> num;
//make binlabels because we don't have any
string snumBins = toString(num);
- m->binLabelsInFile.clear();
+ m->sharedBinLabelsInFile.clear();
for (int i = 0; i < num; i++) {
//if there is a bin label use it otherwise make one
string binLabel = "Otu";
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- m->binLabelsInFile.push_back(binLabel);
+ m->sharedBinLabelsInFile.push_back(binLabel);
}
}
}else {
}
//reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
- m->currentBinLabels = m->binLabelsInFile;
+ m->currentSharedBinLabels = m->sharedBinLabelsInFile;
holdLabel = label;
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
output << binLabel << '\t';
}
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
output << binLabel << '\t';
}
output << endl;
}
- m->printedHeaders = true;
+ m->printedSharedHeaders = true;
}
catch(exception& e) {
m->errorOut(e, "SharedRAbundVector", "printHeaders");
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
newBinLabels.push_back(binLabel);
}
for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
thislookup = newLookup;
- m->currentBinLabels = newBinLabels;
+ m->currentSharedBinLabels = newBinLabels;
return 0;
//parse labels to save
istringstream iStringStream(label);
- m->binLabelsInFile.clear();
+ m->sharedBinLabelsInFile.clear();
while(!iStringStream.eof()){
if (m->control_pressed) { break; }
string temp;
iStringStream >> temp; m->gobble(iStringStream);
- m->binLabelsInFile.push_back(temp);
+ m->sharedBinLabelsInFile.push_back(temp);
}
f >> label >> groupN >> num;
//make binlabels because we don't have any
string snumBins = toString(num);
- m->binLabelsInFile.clear();
+ m->sharedBinLabelsInFile.clear();
for (int i = 0; i < num; i++) {
//if there is a bin label use it otherwise make one
string binLabel = "Otu";
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- m->binLabelsInFile.push_back(binLabel);
+ m->sharedBinLabelsInFile.push_back(binLabel);
}
}
}else {
}
//reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
- m->currentBinLabels = m->binLabelsInFile;
+ m->currentSharedBinLabels = m->sharedBinLabelsInFile;
holdLabel = label;
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
output << binLabel << '\t';
}
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
output << binLabel << '\t';
}
output << endl;
}
- m->printedHeaders = true;
+ m->printedSharedHeaders = true;
}
catch(exception& e) {
m->errorOut(e, "SharedRAbundVector", "printHeaders");
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
newBinLabels.push_back(binLabel);
}
for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
thislookup = newLookup;
- m->currentBinLabels = newBinLabels;
+ m->currentSharedBinLabels = newBinLabels;
return 0;
}
//they are shared
- if (sharedByAll == true) { observed++; labels.push_back(m->currentBinLabels[i]); }
+ if (sharedByAll == true) { observed++; labels.push_back(m->currentSharedBinLabels[i]); }
}
data[0] = observed;
cout.setf(ios::showpoint);
vector<vector<float> > sharedVector;
- vector<string> otuNames = m->currentBinLabels;
+ vector<string> otuNames = m->currentSharedBinLabels;
//fill sharedVector to pass to CalcSparcc
for (int i = 0; i < lookup.size(); i++) {
variables["[tag]"] = tag;
variables["[tag2]"] = "rare";
string rare = getOutputFileName("list",variables);
- m->openOutputFile(rare, rout);
+ m->openOutputFile(rare+".temp", rout);
outputNames.push_back(rare); outputTypes["list"].push_back(rare);
variables["[tag2]"] = "abund";
string abund = getOutputFileName("list",variables);
- m->openOutputFile(abund, aout);
+ m->openOutputFile(abund+".temp", aout);
outputNames.push_back(abund); outputTypes["list"].push_back(abund);
if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
-
+
+ vector<string> binLabels = thisList->getLabels();
+ string rareHeader = "label\tnumOtus\t"; string abundHeader = "label\tnumOtus\t";
for (int i = 0; i < thisList->getNumBins(); i++) {
if (m->control_pressed) { break; }
for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
}
- if (size <= cutoff) { rout << bin << '\t'; }
- else { aout << bin << '\t'; }
+ if (size <= cutoff) { rout << bin << '\t'; rareHeader += binLabels[i] + '\t'; }
+ else { aout << bin << '\t'; abundHeader += binLabels[i] + '\t'; }
}
if (rareNames.size() != 0) { rout << endl; }
rout.close();
aout.close();
+
+ //add headers
+ ofstream r;
+ m->openOutputFile(rare, r);
+ r << rareHeader << endl;
+ r.close();
+ m->appendFiles(rare+".temp", rare);
+ m->mothurRemove(rare+".temp");
+
+ ofstream a;
+ m->openOutputFile(abund, a);
+ a << abundHeader << endl;
+ a.close();
+ m->appendFiles(abund+".temp", abund);
+ m->mothurRemove(abund+".temp");
}else{ //parse names by abundance and group
string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
}
map<string, string> groupVector;
+ map<string, string> groupLabels;
map<string, string>::iterator itGroup;
map<string, int> groupNumBins;
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
groupNumBins[it3->first] = 0;
groupVector[it3->first] = "";
+ groupLabels[it3->first] = "label\tnumOtus\t";
}
-
+ vector<string> binLabels = thisList->getLabels();
for (int i = 0; i < thisList->getNumBins(); i++) {
if (m->control_pressed) { break; }
for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
- groupVector[itGroup->first] += itGroup->second + '\t';
+ groupVector[itGroup->first] += itGroup->second + '\t';
+ groupLabels[itGroup->first] += binLabels[i] + '\t';
}
}
//end list vector
for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+ (*(filehandles[it3->first])) << groupLabels[it3->first] << endl;
(*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
(*(filehandles[it3->first])).close();
delete it3->second;
--- /dev/null
+//
+// sracommand.cpp
+// Mothur
+//
+// Created by SarahsWork on 10/28/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sracommand.h"
+#include "sffinfocommand.h"
+#include "parsefastaqcommand.h"
+
+//**********************************************************************************************************************
+vector<string> SRACommand::setParameters(){
+ try {
+ CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(psff);
+ CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(pgroup);
+ CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(poligos);
+ CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfile);
+ CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfastq);
+ //choose only one multiple options
+ CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+ CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+ CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+ CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+ CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
+
+ //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string SRACommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The sra command creates a sequence read archive from sff or fastq files.\n";
+ helpString += "The sra command parameters are: sff, fastqfiles, oligos, platform....\n";
+ helpString += "The sffiles parameter is used to provide a file containing a \n";
+ helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+ helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+ helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+ helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+
+ helpString += "The new command should be in the following format: \n";
+ helpString += "new(...)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string SRACommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "sra") { pattern = "[filename],sra"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+SRACommand::SRACommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["sra"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "SRACommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+SRACommand::SRACommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ string path;
+ it = parameters.find("sff");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["sff"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("fastq");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fastq"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("file");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["file"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("oligos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["oligos"] = inputDir + it->second; }
+ }
+ }
+
+ //check for parameters
+ fastqfile = validParameter.validFile(parameters, "fastq", true);
+ if (fastqfile == "not open") { fastqfile = ""; abort = true; }
+ else if (fastqfile == "not found") { fastqfile = ""; }
+
+ sfffile = validParameter.validFile(parameters, "sff", true);
+ if (sfffile == "not open") { sfffile = ""; abort = true; }
+ else if (sfffile == "not found") { sfffile = ""; }
+
+ file = validParameter.validFile(parameters, "file", true);
+ if (file == "not open") { file = ""; abort = true; }
+ else if (file == "not found") { file = ""; }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not open") { groupfile = ""; abort = true; }
+ else if (groupfile == "not found") { groupfile = ""; }
+ else { m->setGroupFile(groupfile); }
+
+ oligosfile = validParameter.validFile(parameters, "oligos", true);
+ if (oligosfile == "not found") { oligosfile = ""; }
+ else if(oligosfile == "not open") { abort = true; }
+ else { m->setOligosFile(oligosfile); }
+
+
+ file = validParameter.validFile(parameters, "file", true);
+ if (file == "not open") { file = ""; abort = true; }
+ else if (file == "not found") { file = ""; }
+
+ if ((fastqfile == "") && (sfffile == "") && (sfffile == "")) {
+ m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (oligosfile != "")) {
+ m->mothurOut("[ERROR]: You may not use a group file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((fastqfile != "") || (sfffile != "")) {
+ if ((groupfile == "") && (oligosfile == "")) {
+ oligosfile = m->getOligosFile();
+ if (oligosfile != "") { m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
+ else {
+ groupfile = m->getGroupFile();
+ if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("[ERROR]: You must provide groupfile or oligos file if splitting a fastq or sff file."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+ }
+ }
+
+ //use only one Mutliple type
+ platform = validParameter.validFile(parameters, "platform", false);
+ if (platform == "not found") { platform = "454"; }
+
+ if ((platform == "454") || (platform == "????") || (platform == "????") || (platform == "????")) { }
+ else { m->mothurOut("Not a valid platform option. Valid platform options are 454, ...."); m->mothurOutEndLine(); abort = true; }
+
+
+ string temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
+ m->mothurConvert(temp, bdiffs);
+
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; }
+ m->mothurConvert(temp, pdiffs);
+
+ temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, ldiffs);
+
+ temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, sdiffs);
+
+ temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
+ m->mothurConvert(temp, tdiffs);
+
+ if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
+
+
+
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "SRACommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SRACommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ //parse files
+ vector<string> filesBySample;
+
+ if (file != "") { readFile(filesBySample); }
+ else if (sfffile != "") { parseSffFile(filesBySample); }
+ else if (fastqfile != "") { parseFastqFile(filesBySample); }
+
+
+
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "SRACommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SRACommand::readFile(vector<string>& files){
+ try {
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readFile");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SRACommand::parseSffFile(vector<string>& files){
+ try {
+ //run sffinfo to parse sff file into individual sampled sff files
+ string commandString = "sff=" + sfffile;
+ if (groupfile != "") { commandString += ", group=" + groupfile; }
+ else if (oligosfile != "") {
+ commandString += ", oligos=" + oligosfile;
+ //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+ if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+ if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+ if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+ if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+ if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+ }
+ m->mothurOutEndLine();
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* sffinfoCommand = new SffInfoCommand(commandString);
+ sffinfoCommand->execute();
+
+ map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
+ map<string, vector<string> >::iterator it = filenames.find("sff");
+ if (it != filenames.end()) { files = it->second; }
+ else { m->control_pressed = true; } // error in sffinfo
+
+ delete sffinfoCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readFile");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+int SRACommand::parseFastqFile(vector<string>& files){
+ try {
+
+ //run sffinfo to parse sff file into individual sampled sff files
+ string commandString = "fastq=" + fastqfile;
+ if (groupfile != "") { commandString += ", group=" + groupfile; }
+ else if (oligosfile != "") {
+ commandString += ", oligos=" + oligosfile;
+ //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+ if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+ if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+ if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+ if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+ if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+ }
+ m->mothurOutEndLine();
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
+ fastqinfoCommand->execute();
+
+ map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
+ map<string, vector<string> >::iterator it = filenames.find("fastq");
+ if (it != filenames.end()) { files = it->second; }
+ else { m->control_pressed = true; } // error in sffinfo
+
+ delete fastqinfoCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SRACommand", "readFile");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
--- /dev/null
+//
+// sracommand.h
+// Mothur
+//
+// Created by SarahsWork on 10/28/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sracommand_h
+#define Mothur_sracommand_h
+
+#include "command.hpp"
+
+
+/**************************************************************************************************/
+
+class SRACommand : public Command {
+public:
+ SRACommand(string);
+ SRACommand();
+ ~SRACommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "sra"; }
+ string getCommandCategory() { return "Sequence Processing"; }
+
+ string getOutputPattern(string);
+
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/sra"; }
+ string getDescription() { return "create a Sequence Read Archive / SRA"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort;
+ int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
+ string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile;
+ vector<string> outputNames;
+
+ int readFile(vector<string>&);
+ int parseSffFile(vector<string>&);
+ int parseFastqFile(vector<string>&);
+
+};
+
+/**************************************************************************************************/
+
+
+
+#endif
try {
//save mothurOut's binLabels to restore for next label
- vector<string> saveBinLabels = m->currentBinLabels;
+ vector<string> saveBinLabels = m->currentSharedBinLabels;
int numBins = thislookup[0]->getNumBins();
for (int i = 0; i < thislookup.size(); i++) {
for (int j = 0; j < size; j++) {
- if (m->control_pressed) { return m->currentBinLabels; }
+ if (m->control_pressed) { return m->currentSharedBinLabels; }
int bin = order.get(j);
//subsampling may have created some otus with no sequences in them
eliminateZeroOTUS(thislookup);
- if (m->control_pressed) { return m->currentBinLabels; }
+ if (m->control_pressed) { return m->currentSharedBinLabels; }
//save mothurOut's binLabels to restore for next label
- vector<string> subsampleBinLabels = m->currentBinLabels;
- m->currentBinLabels = saveBinLabels;
+ vector<string> subsampleBinLabels = m->currentSharedBinLabels;
+ m->currentSharedBinLabels = saveBinLabels;
return subsampleBinLabels;
for (int h = 0; h < diff; h++) { binLabel += "0"; }
}
binLabel += sbinNumber;
- if (i < m->currentBinLabels.size()) { binLabel = m->currentBinLabels[i]; }
+ if (i < m->currentSharedBinLabels.size()) { binLabel = m->currentSharedBinLabels[i]; }
newBinLabels.push_back(binLabel);
}
thislookup.clear();
thislookup = newLookup;
- m->currentBinLabels = newBinLabels;
+ m->currentSharedBinLabels = newBinLabels;
return 0;
else if (type == "name") { pattern = "[filename],subsample,[extension]"; }
else if (type == "group") { pattern = "[filename],subsample,[extension]"; }
else if (type == "count") { pattern = "[filename],subsample,[extension]"; }
- else if (type == "list") { pattern = "[filename],subsample,[extension]"; }
+ else if (type == "list") { pattern = "[filename],[distance],subsample,[extension]"; }
else if (type == "taxonomy") { pattern = "[filename],subsample,[extension]"; }
else if (type == "shared") { pattern = "[filename],[distance],subsample,[extension]"; }
else if (type == "rabund") { pattern = "[filename],subsample,[extension]"; }
try {
//save mothurOut's binLabels to restore for next label
- vector<string> saveBinLabels = m->currentBinLabels;
+ vector<string> saveBinLabels = m->currentSharedBinLabels;
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
m->openOutputFile(outputFileName, out);
outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
- m->currentBinLabels = subsampledLabels;
+ m->currentSharedBinLabels = subsampledLabels;
thislookup[0]->printHeaders(out);
//save mothurOut's binLabels to restore for next label
- m->currentBinLabels = saveBinLabels;
+ m->currentSharedBinLabels = saveBinLabels;
return 0;
if (namefile != "") { m->readNames(namefile, nameMap); }
- string thisOutputDir = outputDir;
- if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
- map<string, string> variables;
- variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
- variables["[extension]"] = m->getExtension(listfile);
- string outputFileName = getOutputFileName("list", variables);
- ofstream out;
- m->openOutputFile(outputFileName, out);
- outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
-
InputData* input = new InputData(listfile, "list");
ListVector* list = input->getListVector();
string lastLabel = list->getLabel();
//file mismatch quit
if (list->getNumSeqs() != groupMap.getNumSeqs()) {
m->mothurOut("[ERROR]: your list file contains " + toString(list->getNumSeqs()) + " sequences, and your groupfile contains " + toString(groupMap.getNumSeqs()) + ", please correct.");
- m->mothurOutEndLine(); delete list; delete input; out.close(); outGroup.close(); return 0;
+ m->mothurOutEndLine(); delete list; delete input; outGroup.close(); return 0;
}
}else if (countfile != "") {
if (ct.hasGroupInfo()) {
//as long as you are not at the end of the file or done wih the lines you want
while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
- if (m->control_pressed) { delete list; delete input; out.close(); return 0; }
+ if (m->control_pressed) { delete list; delete input; return 0; }
if(allLines == 1 || labels.count(list->getLabel()) == 1){
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
- processList(list, out, subset);
+ processList(list, subset);
processedLabels.insert(list->getLabel());
userLabels.erase(list->getLabel());
list = input->getListVector(lastLabel);
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
- processList(list, out, subset);
+ processList(list, subset);
processedLabels.insert(list->getLabel());
userLabels.erase(list->getLabel());
}
- if (m->control_pressed) { if (list != NULL) { delete list; } delete input; out.close(); return 0; }
+ if (m->control_pressed) { if (list != NULL) { delete list; } delete input; return 0; }
//output error messages about any remaining user labels
set<string>::iterator it;
m->mothurOut(list->getLabel()); m->mothurOutEndLine();
- processList(list, out, subset);
+ processList(list, subset);
delete list; list = NULL;
}
- out.close();
if (list != NULL) { delete list; }
delete input;
}
}
//**********************************************************************************************************************
-int SubSampleCommand::processList(ListVector*& list, ofstream& out, set<string>& subset) {
+int SubSampleCommand::processList(ListVector*& list, set<string>& subset) {
try {
-
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
+ map<string, string> variables;
+ variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
+ variables["[extension]"] = m->getExtension(listfile);
+ variables["[distance]"] = list->getLabel();
+ string outputFileName = getOutputFileName("list", variables);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
int numBins = list->getNumBins();
ListVector* temp = new ListVector();
temp->setLabel(list->getLabel());
+ vector<string> binLabels = list->getLabels();
+ vector<string> newLabels;
for (int i = 0; i < numBins; i++) {
if (m->control_pressed) { break; }
if (newNames != "") {
newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
temp->push_back(newNames);
+ newLabels.push_back(binLabels[i]);
}
}
+ temp->setLabels(newLabels);
delete list;
list = temp;
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { out.close(); return 0; }
+ list->printHeaders(out);
list->print(out);
+ out.close();
return 0;
int processShared(vector<SharedRAbundVector*>&);
int processRabund(RAbundVector*&, ofstream&);
int processSabund(SAbundVector*&, ofstream&);
- int processList(ListVector*&, ofstream&, set<string>&);
+ int processList(ListVector*&, set<string>&);
int getNames();
int readNames();
int getTax(set<string>&);
string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
all = m->isTrue(temp);
- temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
- createPhylip = m->isTrue(temp);
-
temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
m->mothurConvert(temp, iters);
if (subsample == false) { iters = 0; }
+ temp = validParameter.validFile(parameters, "distance", false); if (temp == "not found") { temp = "false"; }
+ createPhylip = m->isTrue(temp);
+ if (subsample) { createPhylip = true; }
+
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
variables["[tag]"] = toString(i+1);
string wFileName = getOutputFileName("weighted", variables);
output = new ColumnFile(wFileName, itersString);
- outputNames.push_back(wFileName); outputTypes["wweighted"].push_back(wFileName);
+ outputNames.push_back(wFileName); outputTypes["weighted"].push_back(wFileName);
}
userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
if (vCalcs[i]->getName() == "sharedsobs") {
singleCalc = new Sobs();
if (sharedOtus && (labels.size() != 0)) {
- string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
+ string groupsTag = "";
+ for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; } groupsTag += lookup[lookup.size()-1]->getGroup();
+ string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus";
outputNames.push_back(filenameShared);
ofstream outShared;
ofstream outShared;
if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
- string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
+ string groupsTag = "";
+ for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; } groupsTag += lookup[lookup.size()-1]->getGroup();
+ string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus";
outputNames.push_back(filenameShared);
ofstream outShared;
if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
- string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
+ string groupsTag = "";
+ for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; } groupsTag += lookup[lookup.size()-1]->getGroup();
+ string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus";
outputNames.push_back(filenameShared);
CommandParameter pnseqs("nseqs", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pnseqs);
CommandParameter psharedotus("sharedotus", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(psharedotus);
CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "","",false,false); parameters.push_back(pfontsize);
- CommandParameter ppermute("permute", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppermute);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter ppermute("permute", "Multiple", "1-2-3-4", "4", "", "", "","",false,false); parameters.push_back(ppermute); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
helpString += "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n";
helpString += "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a shared file.\n";
helpString += "The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n";
- helpString += "If you have more than 4 groups, the permute parameter will find all possible combos of 4 of your groups and create pictures for them, default=F.\n";
+ helpString += "If you have more than 4 groups, you can use the permute parameter to set the number of groups you would like mothur to divide the samples into to draw the venn diagrams for all possible combos. Default=4.\n";
helpString += "The only estimators available four 4 groups are sharedsobs and sharedchao.\n";
helpString += "The sharedotus parameter can be used with the sharedsobs calculator to get the names of the OTUs in each section of the venn diagram. Default=t.\n";
helpString += "The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n";
temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; }
nseqs = m->isTrue(temp);
- temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; }
- perm = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "4"; }
+ m->mothurConvert(temp, perm);
+ if ((perm == 1) || (perm == 2) || (perm == 3) || (perm == 4)) { }
+ else { m->mothurOut("[ERROR]: Not a valid permute value. Valid values are 1, 2, 3, and 4."); m->mothurOutEndLine(); abort = true; }
temp = validParameter.validFile(parameters, "sharedotus", false); if (temp == "not found"){ temp = "t"; }
sharedOtus = m->isTrue(temp);
lookup = input->getSharedRAbundVectors();
lastLabel = lookup[0]->getLabel();
- if ((lookup.size() > 4) && (perm)) { combosOfFour = findCombinations(lookup.size()); }
+ if ((lookup.size() > 4)) { combos = findCombinations(lookup.size()); }
}else if (format == "list") {
sabund = input->getSAbundVector();
lastLabel = sabund->getLabel();
processedLabels.insert(lookup[0]->getLabel());
userLabels.erase(lookup[0]->getLabel());
- if ((lookup.size() > 4) && (!perm)){
- m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
- for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
- vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
- for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
- }else if ((lookup.size() > 4) && (perm)) {
+ if (lookup.size() > 4) {
set< set<int> >::iterator it3;
set<int>::iterator it2;
- for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
+ for (it3 = combos.begin(); it3 != combos.end(); it3++) {
set<int> poss = *it3;
vector<SharedRAbundVector*> subset;
processedLabels.insert(lookup[0]->getLabel());
userLabels.erase(lookup[0]->getLabel());
- if ((lookup.size() > 4) && (!perm)){
- m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
- for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
-
- vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
- for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
-
- }else if ((lookup.size() > 4) && (perm)) {
+ if (lookup.size() > 4) {
set< set<int> >::iterator it3;
set<int>::iterator it2;
- for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
+ for (it3 = combos.begin(); it3 != combos.end(); it3++) {
set<int> poss = *it3;
vector<SharedRAbundVector*> subset;
processedLabels.insert(lookup[0]->getLabel());
userLabels.erase(lookup[0]->getLabel());
- if ((lookup.size() > 4) && (!perm)){
- m->mothurOut("Error: Too many groups chosen. You may use up to 4 groups with the venn command. I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
- for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
-
- vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
- for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]); outputTypes["svg"].push_back(outfilenames[i]); } }
-
- }else if ((lookup.size() > 4) && (perm)) {
+ if (lookup.size() > 4) {
set< set<int> >::iterator it3;
set<int>::iterator it2;
- for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {
+ for (it3 = combos.begin(); it3 != combos.end(); it3++) {
set<int> poss = *it3;
vector<SharedRAbundVector*> subset;
}
}
//**********************************************************************************************************************
-//returns a vector of sets containing the 4 group combinations
+//returns a vector of sets containing the group combinations
set< set<int> > VennCommand::findCombinations(int lookupSize){
try {
set< set<int> > combos;
for (int i = 0; i < lookupSize; i++) { possibles.insert(i); }
getCombos(possibles, combos);
-
+
return combos;
}
}
}
//**********************************************************************************************************************
-//recusively finds combos of 4
+//recusively finds combos of length perm
int VennCommand::getCombos(set<int> possibles, set< set<int> >& combos){
try {
- if (possibles.size() == 4) { //done
+ if (possibles.size() == perm) { //done
if (combos.count(possibles) == 0) { //no dups
combos.insert(possibles);
}
Venn* venn;
vector<Calculator*> vennCalculators;
vector<SharedRAbundVector*> lookup;
- set< set<int> > combosOfFour;
+ set< set<int> > combos;
SAbundVector* sabund;
- int abund, fontsize;
+ int abund, fontsize, perm;
- bool abort, allLines, nseqs, perm, sharedOtus;
+ bool abort, allLines, nseqs, sharedOtus;
set<string> labels; //holds labels to be used
string format, groups, calc, label, outputDir, sharedfile, listfile, inputfile;
vector<string> Estimators, Groups, outputNames;