.DS_Store
*.zip
*.pbxproj
+*.xcuserdata
\ No newline at end of file
A7876A26152A017C00A0AE86 /* subsample.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7876A25152A017C00A0AE86 /* subsample.cpp */; };
A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; };
A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
+ A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */; };
A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */; };
A7A0671A1562946F0095C8C5 /* listotulabelscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A067191562946F0095C8C5 /* listotulabelscommand.cpp */; };
outputFiles = (
"$(TARGET_BUILD_DIR)/$(INPUT_FILE_BASE).o",
);
- script = "/usr/local/gfortran/bin/gfortran -g -m64 -c ${PROJECT_DIR}/${INPUT_FILE_NAME} -o ${TARGET_BUILD_DIR}/${INPUT_FILE_BASE}.o";
+ script = "/usr/local/bin/gfortran -g -m64 -c ${PROJECT_DIR}/${INPUT_FILE_NAME} -o ${TARGET_BUILD_DIR}/${INPUT_FILE_BASE}.o";
};
/* End PBXBuildRule section */
A79234D613C74BF6002B08E2 /* mothurfisher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurfisher.cpp; sourceTree = "<group>"; };
A795840B13F13CD900F201D5 /* countgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countgroupscommand.h; sourceTree = "<group>"; };
A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countgroupscommand.cpp; sourceTree = "<group>"; };
+ A799314816CBD0BC0017E888 /* mergetaxsummarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mergetaxsummarycommand.h; sourceTree = "<group>"; };
+ A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergetaxsummarycommand.cpp; sourceTree = "<group>"; };
A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = "<group>"; };
A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = "<group>"; };
A79EEF8516971D4A0006DEC1 /* filtersharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = filtersharedcommand.cpp; sourceTree = "<group>"; };
A7E9B75312D37EC400DA6239 /* mergefilecommand.cpp */,
A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */,
A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */,
+ A799314816CBD0BC0017E888 /* mergetaxsummarycommand.h */,
+ A799314A16CBD0CD0017E888 /* mergetaxsummarycommand.cpp */,
A7E9B75812D37EC400DA6239 /* metastatscommand.h */,
A7E9B75712D37EC400DA6239 /* metastatscommand.cpp */,
A7E9B75A12D37EC400DA6239 /* mgclustercommand.h */,
A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */,
A7128B1D16B7002A00723BE4 /* getdistscommand.cpp in Sources */,
A7B0231516B8244C006BA09E /* removedistscommand.cpp in Sources */,
+ A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
GCC_MODEL_TUNING = "";
GCC_OPTIMIZATION_LEVEL = 3;
GCC_PREPROCESSOR_DEFINITIONS = (
- "VERSION=\"\\\"1.28.0\\\"\"",
- "RELEASE_DATE=\"\\\"11/2/2012\\\"\"",
+ "VERSION=\"\\\"1.29.2\\\"\"",
+ "RELEASE_DATE=\"\\\"2/12/2013\\\"\"",
);
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
if ((processToAssign-1) == 1) { m->mothurOut(distName[i].begin()->first + "\n"); }
}
- //not lets reverse the order of ever other process, so we balance big files running with little ones
+ //now lets reverse the order of ever other process, so we balance big files running with little ones
for (int i = 0; i < processors; i++) {
//cout << i << endl;
int remainder = ((i+1) % processors);
helpString += "The collect.single command parameters are list, sabund, rabund, shared, label, freq, calc and abund. list, sabund, rabund or shared is required unless you have a valid current file. \n";
helpString += "The collect.single command should be in the following format: \n";
helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
- helpString += "collect.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
- helpString += "Example collect(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-chao-ace-jack).\n";
+ helpString += "collect.single(label=yourLabel, freq=yourFreq, calc=yourEstimators).\n";
+ helpString += "Example collect(label=unique-.01-.03, freq=10, calc=sobs-chao-ace-jack).\n";
helpString += "The default values for freq is 100, and calc are sobs-chao-ace-jack-shannon-npshannon-simpson.\n";
helpString += validCalculator.printCalc("single");
helpString += "The label parameter is used to analyze specific labels in your input.\n";
#include "primerdesigncommand.h"
#include "getdistscommand.h"
#include "removedistscommand.h"
+#include "mergetaxsummarycommand.h"
/*******************************************************/
commands["primer.design"] = "primer.design";
commands["get.dists"] = "get.dists";
commands["remove.dists"] = "remove.dists";
+ commands["merge.taxsummary"] = "merge.taxsummary";
}
else if(commandName == "primer.design") { command = new PrimerDesignCommand(optionString); }
else if(commandName == "get.dists") { command = new GetDistsCommand(optionString); }
else if(commandName == "remove.dists") { command = new RemoveDistsCommand(optionString); }
+ else if(commandName == "merge.taxsummary") { command = new MergeTaxSummaryCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "primer.design") { pipecommand = new PrimerDesignCommand(optionString); }
else if(commandName == "get.dists") { pipecommand = new GetDistsCommand(optionString); }
else if(commandName == "remove.dists") { pipecommand = new RemoveDistsCommand(optionString); }
+ else if(commandName == "merge.taxsummary") { pipecommand = new MergeTaxSummaryCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "primer.design") { shellcommand = new PrimerDesignCommand(); }
else if(commandName == "get.dists") { shellcommand = new GetDistsCommand(); }
else if(commandName == "remove.dists") { shellcommand = new RemoveDistsCommand(); }
+ else if(commandName == "merge.taxsummary") { shellcommand = new MergeTaxSummaryCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
translateFlow();
m->gobble(flowFile);
}
-
+
if(flowFile){ return 1; }
else { return 0; }
}
void FlowData::updateEndFlow(){
try{
+ if (baseFlow.length() > 4) { return; }
+
//int currLength = 0;
float maxIntensity = (float) maxHomoP + 0.49;
int deadSpot = 0;
-
+
while(deadSpot < endFlow){
int signal = 0;
int noise = 0;
- for(int i=0;i<4;i++){
+ for(int i=0;i<baseFlow.length();i++){
float intensity = flowData[i + deadSpot];
if(intensity > signalIntensity){
signal++;
break;
}
- deadSpot += 4;
+ deadSpot += baseFlow.length();
}
endFlow = deadSpot;
sequence = "";
for(int i=0;i<endFlow;i++){
int intensity = (int)(flowData[i] + 0.5);
- char base = baseFlow[i % 4];
+ char base = baseFlow[i % baseFlow.length()];
for(int j=0;j<intensity;j++){
sequence += base;
}
}
-
+
if(sequence.size() > 4){
sequence = sequence.substr(4);
}
CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
+ CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
CommandParameter pmatch("match", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pmatch);
CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pmismatch);
CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapopen);
helpString += "The insert parameter allows you to set a quality scores threshold. In the case where we are trying to decide whether to keep a base or remove it because the base is compared to a gap in the other fragment, if the base has a quality score below the threshold we eliminate it. Default=25.\n";
helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
+ helpString += "The trimoverlap parameter allows you to trim the sequences to only the overlapping section. The default is F.\n";
helpString += "The make.contigs command should be in the following format: \n";
helpString += "make.contigs(ffastq=yourForwardFastqFile, rfastq=yourReverseFastqFile, align=yourAlignmentMethod) \n";
helpString += "Note: No spaces between parameter labels (i.e. ffastq), '=' and parameters (i.e.yourForwardFastqFile).\n";
temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
allFiles = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "trimoverlap", false); if (temp == "not found") { temp = "F"; }
+ trimOverlap = m->isTrue(temp);
align = validParameter.validFile(parameters, "align", false); if (align == "not found"){ align = "needleman"; }
if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
}
- contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, h);
+ contigsData* tempcontig = new contigsData(files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createGroup, allFiles, trimOverlap, h);
pDataArray.push_back(tempcontig);
hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);
// if (num < 5) { cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
int overlapStart = fSeq.getStartPos();
int seq2Start = rSeq.getStartPos();
+
//bigger of the 2 starting positions is the location of the overlapping start
if (overlapStart < seq2Start) { //seq2 starts later so take from 0 to seq2Start from seq1
overlapStart = seq2Start;
for (int i = overlapEnd; i < length; i++) { contig += seq1[i]; }
}
+ if (trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart); if (contig.length() == 0) { trashCode += "l"; } }
+
if(trashCode.length() == 0){
bool ignore = false;
void help() { m->mothurOut(getHelpString()); }
private:
- bool abort, allFiles, createGroup;
+ bool abort, allFiles, createGroup, trimOverlap;
string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format;
float match, misMatch, gapOpen, gapExtend;
int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq;
MothurOut* m;
float match, misMatch, gapOpen, gapExtend;
int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq;
- bool allFiles, createGroup, done;
+ bool allFiles, createGroup, done, trimOverlap;
map<string, int> groupCounts;
map<string, string> groupMap;
vector<string> primerNameVector;
map<int, oligosPair> primers;
contigsData(){}
- contigsData(vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, int tid) {
+ contigsData(vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool all, bool to, int tid) {
files = f;
outputFasta = of;
outputMisMatches = om;
bdiffs = bdf;
tdiffs = tdf;
allFiles = all;
+ trimOverlap = to;
createGroup = cg;
threadID = tid;
deltaq = delt;
for (int i = overlapEnd; i < length; i++) { contig += seq1[i]; }
}
+ if (pDataArray->trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart); if (contig.length() == 0) { trashCode += "l"; } }
+
if(trashCode.length() == 0){
bool ignore = false;
if (pDataArray->createGroup) {
CYGWIN_BUILD ?= no
USECOMPRESSION ?= no
MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"1/23/2013\""
-VERSION = "\"1.29.1\""
+RELEASE_DATE = "\"2/12/2013\""
+VERSION = "\"1.29.2\""
FORTAN_COMPILER = gfortran
FORTRAN_FLAGS =
CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
- CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
+ CommandParameter poutput("output", "Multiple", "lt-square-column", "lt", "", "", "","",false,false); parameters.push_back(poutput);
CommandParameter pmode("mode", "Multiple", "average-median", "average", "", "", "","",false,false); parameters.push_back(pmode);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
helpString += "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, label=yourLabels).\n";
- helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
+ helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, column and square. The default is lt.\n";
helpString += "The mode parameter allows you to specify if you want the average or the median values reported when subsampling. Options are average, and median. The default is average.\n";
helpString += "Example dist.shared(groups=A-B-C, calc=jabund-sorabund).\n";
helpString += "The default value for groups is all the groups in your groupfile.\n";
}
output = validParameter.validFile(parameters, "output", false); if(output == "not found"){ output = "lt"; }
- if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
+ if ((output != "lt") && (output != "square") && (output != "column")) { m->mothurOut(output + " is not a valid output form. Options are lt, column and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
mode = validParameter.validFile(parameters, "mode", false); if(mode == "not found"){ mode = "average"; }
if ((mode != "average") && (mode != "median")) { m->mothurOut(mode + " is not a valid mode. Options are average and medina. I will use average."); m->mothurOutEndLine(); output = "average"; }
try {
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-
- //output num seqs
- out << simMatrix.size() << endl;
-
+
if (output == "lt") {
+ out << simMatrix.size() << endl;
for (int b = 0; b < simMatrix.size(); b++) {
out << lookup[b]->getGroup() << '\t';
for (int n = 0; n < b; n++) {
}
out << endl;
}
+ }else if (output == "column") {
+ for (int b = 0; b < simMatrix.size(); b++) {
+ for (int n = 0; n < b; n++) {
+ out << lookup[b]->getGroup() << '\t' << lookup[n]->getGroup() << '\t' << simMatrix[b][n] << endl;
+ }
+ }
}else{
+ out << simMatrix.size() << endl;
for (int b = 0; b < simMatrix.size(); b++) {
out << lookup[b]->getGroup() << '\t';
for (int n = 0; n < simMatrix[b].size(); n++) {
--- /dev/null
+//
+// mergetaxsummarycommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 2/13/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "mergetaxsummarycommand.h"
+
+
+//**********************************************************************************************************************
+vector<string> MergeTaxSummaryCommand::setParameters(){
+ try {
+ CommandParameter pinput("input", "String", "", "", "", "", "","",false,true,true); parameters.push_back(pinput);
+ CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MergeTaxSummaryCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The merge.taxsummary command takes a list of tax.summary files separated by dashes and merges them into one file.";
+ helpString += "The merge.taxsummary command parameters are input and output.";
+ helpString += "Example merge.taxsummary(input=small.tax.summary-large.tax.summary, output=all.tax.summary).";
+ helpString += "Note: No spaces between parameter labels (i.e. output), '=' and parameters (i.e.yourOutputFileName).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MergeTaxSummaryCommand::MergeTaxSummaryCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["taxsummary"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+MergeTaxSummaryCommand::MergeTaxSummaryCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true; }
+ else {
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["taxsummary"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+
+ string fileList = validParameter.validFile(parameters, "input", false);
+ if(fileList == "not found") { m->mothurOut("you must enter two or more file names"); m->mothurOutEndLine(); abort=true; }
+ else{ m->splitAtDash(fileList, fileNames); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found") { outputDir = ""; }
+
+
+ numInputFiles = fileNames.size();
+ ifstream testFile;
+ if(numInputFiles == 0){
+ m->mothurOut("you must enter two or more file names and you entered " + toString(fileNames.size()) + " file names"); m->mothurOutEndLine();
+ abort=true;
+ }
+ else{
+ for(int i=0;i<numInputFiles;i++){
+ if (inputDir != "") {
+ string path = m->hasPath(fileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { fileNames[i] = inputDir + fileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+ ableToOpen = m->openInputFile(fileNames[i], in, "noerror");
+ in.close();
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(fileNames[i]);
+ m->mothurOut("Unable to open " + fileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ fileNames[i] = tryPath;
+ }
+ }
+
+ //if you can't open it, try output location
+ if (ableToOpen == 1) {
+ if (m->getOutputDir() != "") { //default path is set
+ string tryPath = m->getOutputDir() + m->getSimpleName(fileNames[i]);
+ m->mothurOut("Unable to open " + fileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ fileNames[i] = tryPath;
+ }
+ }
+
+
+
+ if (ableToOpen == 1) {
+ m->mothurOut("Unable to open " + fileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ fileNames.erase(fileNames.begin()+i);
+ i--;
+ }
+ }
+ }
+
+ outputFileName = validParameter.validFile(parameters, "output", false);
+ if (outputFileName == "not found") { m->mothurOut("you must enter an output file name"); m->mothurOutEndLine(); abort=true; }
+ else if (outputDir != "") { outputFileName = outputDir + m->getSimpleName(outputFileName); }
+
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int MergeTaxSummaryCommand::execute(){
+ try {
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ outputFileName = m->getFullPathName(outputFileName);
+ m->mothurRemove(outputFileName);
+
+ vector<rawTaxNode> tree;
+ tree.push_back(rawTaxNode("Root"));
+ tree[0].rank = "0";
+ bool hasGroups = true;
+ set<string> groups;
+
+ for (int i = 0; i < fileNames.size(); i++) {
+
+ ifstream in;
+ m->openInputFile(fileNames[i], in);
+ string temp = m->getline(in);
+ vector<string> headers = m->splitWhiteSpace(temp);
+
+ vector<string> thisFilesGroups;
+ if (headers.size() == 5) { hasGroups = false; }
+ else { for (int j = 5; j < headers.size(); j++) { groups.insert(headers[j]); thisFilesGroups.push_back(headers[j]); } }
+
+ int level, daugterLevels, total;
+ string rankId, tax;
+ map<int, int> levelToCurrentNode;
+ levelToCurrentNode[0] = 0;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { return 0; }
+
+ in >> level >> rankId >> tax >> daugterLevels >> total; m->gobble(in);
+ map<string, int> groupCounts;
+ if (thisFilesGroups.size() != 0) {
+ for (int j = 0; j < thisFilesGroups.size(); j++) {
+ int tempNum; in >> tempNum; m->gobble(in);
+ groupCounts[thisFilesGroups[j]] = tempNum;
+ }
+ }
+
+ if (level == 0) {}
+ else {
+ map<int, int>::iterator itParent = levelToCurrentNode.find(level-1);
+ int parent = 0;
+ if (itParent == levelToCurrentNode.end()) { m->mothurOut("[ERROR]: situation I didnt expect.\n"); }
+ else { parent = itParent->second; }
+
+ levelToCurrentNode[level] = addTaxToTree(tree, level, parent, tax, total, groupCounts);
+ }
+ }
+ in.close();
+ }
+
+ if (!hasGroups && (groups.size() != 0)) { groups.clear(); m->mothurOut("[WARNING]: not all files contain group breakdown, ignoring group counts.\n"); }
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ print(out, tree, groups);
+
+ if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["taxsummary"].push_back(outputFileName);
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "execute");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+int MergeTaxSummaryCommand::addTaxToTree(vector<rawTaxNode>& tree, int level, int currentNode, string taxon, int total, map<string, int> groups){
+ try {
+ map<string, int>::iterator childPointer;
+
+ childPointer = tree[currentNode].children.find(taxon);
+ int nodeToIncrement = 0;
+
+ if(childPointer != tree[currentNode].children.end()){ //if the node already exists, increment counts
+ nodeToIncrement = childPointer->second;
+ tree[nodeToIncrement].total += total;
+
+ for (map<string, int>::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) {
+ map<string, int>::iterator it = tree[nodeToIncrement].groupCount.find(itGroups->first);
+ if (it == tree[nodeToIncrement].groupCount.end()) { tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second; }
+ else { it->second += itGroups->second; }
+ }
+ }
+ else{ //otherwise, create it
+ tree.push_back(rawTaxNode(taxon));
+ tree[currentNode].children[taxon] = tree.size()-1;
+ tree[tree.size()-1].parent = currentNode;
+ nodeToIncrement = tree.size()-1;
+ tree[nodeToIncrement].total = total;
+ tree[nodeToIncrement].level = level;
+ for (map<string, int>::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) {
+ tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second;
+ }
+ }
+
+ return nodeToIncrement;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "addSeqToTree");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+int MergeTaxSummaryCommand::assignRank(int index, vector<rawTaxNode>& tree){
+ try {
+ map<string,int>::iterator it;
+ int counter = 1;
+
+ for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+ if (m->control_pressed) { return 0; }
+ tree[it->second].rank = tree[index].rank + '.' + toString(counter);
+ counter++;
+
+ assignRank(it->second, tree);
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "assignRank");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+int MergeTaxSummaryCommand::print(ofstream& out, vector<rawTaxNode>& tree, set<string> groups){
+ try {
+
+ assignRank(0, tree);
+ vector<string> mGroups;
+ //print labels
+ out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
+ for (set<string>::iterator it = groups.begin(); it != groups.end(); it++) { out << (*it) << '\t'; }
+ out << endl;
+
+ for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { tree[0].groupCount[*it2] = 0; }
+
+ map<string,int>::iterator it;
+ for(it=tree[0].children.begin();it!=tree[0].children.end();it++){
+ tree[0].total += tree[it->second].total;
+ for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) {
+ map<string, int>:: iterator itGroups = tree[it->second].groupCount.find(*it2);
+ if (itGroups != tree[it->second].groupCount.end()) {
+ tree[0].groupCount[*it2] += itGroups->second;
+ }
+ }
+ }
+
+
+ //print root
+ out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << tree[0].children.size() << "\t" << tree[0].total << "\t";
+
+ for (set<string>::iterator it = groups.begin(); it != groups.end(); it++) {
+ map<string, int>:: iterator itGroups = tree[0].groupCount.find(*it);
+ int num = 0;
+ if (itGroups != tree[0].groupCount.end()) { num = itGroups->second; }
+ out << num << '\t';
+ }
+ out << endl;
+
+ //print rest
+ print(0, out, tree, groups);
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "print");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int MergeTaxSummaryCommand::print(int i, ofstream& out, vector<rawTaxNode>& tree, set<string> groups){
+ try {
+ map<string,int>::iterator it;
+ for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
+
+ //print root
+ out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << tree[it->second].children.size() << "\t" << tree[it->second].total << "\t";
+
+ for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) {
+ map<string, int>:: iterator itGroups = tree[it->second].groupCount.find(*it2);
+ int num = 0;
+ if (itGroups != tree[it->second].groupCount.end()) { num = itGroups->second; }
+ out << num << '\t';
+ }
+ out << endl;
+
+ print(it->second, out, tree, groups);
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeTaxSummaryCommand", "print");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
--- /dev/null
+//
+// mergetaxsummarycommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 2/13/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_mergetaxsummarycommand_h
+#define Mothur_mergetaxsummarycommand_h
+
+#include "mothur.h"
+#include "command.hpp"
+#include "phylosummary.h"
+
+class MergeTaxSummaryCommand : public Command {
+public:
+ MergeTaxSummaryCommand(string);
+ MergeTaxSummaryCommand();
+ ~MergeTaxSummaryCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "merge.taxsummary"; }
+ string getCommandCategory() { return "Phylotype Analysis"; }
+ string getHelpString();
+ string getOutputPattern(string){ return ""; }
+ string getCitation() { return "http://www.mothur.org/wiki/Merge.taxsummary"; }
+ string getDescription() { return "merges tax summary files creating one file"; }
+
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ vector<string> fileNames, outputNames;
+ string outputFileName;
+ int numInputFiles;
+ bool abort;
+
+ int addTaxToTree(vector<rawTaxNode>&, int, int, string, int, map<string, int>);
+ int assignRank(int index, vector<rawTaxNode>& tree);
+ int print(ofstream& out, vector<rawTaxNode>& tree, set<string> groups);
+ int print(int, ofstream& out, vector<rawTaxNode>& tree, set<string> groups);
+};
+
+
+#endif
return (left.index > right.index);
}
//********************************************************************************************************************
-//sorts highest to lowest
inline bool compareSpearman(spearmanRank left, spearmanRank right){
return (left.score < right.score);
}
}
return false;
}
-//********************************************************************************************************************
-//sorts lowest to highest
-inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){
- return (left.score < right.score);
-}
+
/************************************************************/
//sorts lowest to highest
inline bool compareDistLinePairs(distlinePair left, distlinePair right){
if (object == "cluster"){
mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt. \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
}else {
- mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+ mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G. If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue. If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
}
}
}
}
for (int i = index; i >= 0; i--) {
- newFileName = dirs[i] + "\\\\" + newFileName;
+ newFileName = dirs[i] + "\\" + newFileName;
}
return newFileName;
string individual = "";
int estimLength = estim.size();
bool prevEscape = false;
+ /*
for(int i=0;i<estimLength;i++){
if(prevEscape){
individual += estim[i];
}
}
}
- container.insert(individual);
+ */
+
+ for(int i=0;i<estimLength;i++){
+ if(estim[i] == '-'){
+ if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
+ else {
+ container.insert(individual);
+ individual = "";
+ }
+ }else if(estim[i] == '\\'){
+ if (i < estimLength-1) {
+ if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
+ else { individual += estim[i]; prevEscape = false; } //if no, add in
+ }else { individual += estim[i]; }
+ }else {
+ individual += estim[i];
+ }
+ }
+ container.insert(individual);
}
catch(exception& e) {
int lineNum;
int estimLength = estim.size();
bool prevEscape = false;
+ /*
for(int i=0;i<estimLength;i++){
if(prevEscape){
individual += estim[i];
prevEscape = false;
}
}
- }
+ }*/
+
+ for(int i=0;i<estimLength;i++){
+ if(estim[i] == '-'){
+ if (prevEscape) { individual += estim[i]; prevEscape = false; } //add in dash because it was escaped.
+ else {
+ convert(individual, lineNum); //convert the string to int
+ container.insert(lineNum);
+ individual = "";
+ }
+ }else if(estim[i] == '\\'){
+ if (i < estimLength-1) {
+ if (estim[i+1] == '-') { prevEscape=true; } //are you a backslash before a dash, if yes ignore
+ else { individual += estim[i]; prevEscape = false; } //if no, add in
+ }else { individual += estim[i]; }
+ }else {
+ individual += estim[i];
+ }
+ }
+
convert(individual, lineNum); //convert the string to int
container.insert(lineNum);
}
// Copyright (c) 2012 Schloss Lab. All rights reserved.
//
-
+//
#include "newcommandtemplate.h"
//**********************************************************************************************************************
it->second = m->getDesignFile();
}else if (it->first == "sff") {
it->second = m->getSFFFile();
+ }else if (it->first == "flow") {
+ it->second = m->getFlowFile();
}else if (it->first == "oligos") {
it->second = m->getOligosFile();
}else if (it->first == "accnos") {
string name = m->getline(in); m->gobble(in);
if (name == "") { m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else { name = name.substr(1); }
+ else {
+ name = name.substr(1);
+ for (int i = 0; i < name.length(); i++) { if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } }
+ }
//read sequence
string sequence = m->getline(in); m->gobble(in);
string name2 = m->getline(in); m->gobble(in);
if (name2 == "") { m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
- else { name2 = name2.substr(1); }
+ else {
+ name2 = name2.substr(1);
+ for (int i = 0; i < name2.length(); i++) { if (name2[i] == ':') { name2[i] = '_'; m->changedSeqNames = true; } }
+ }
//read quality scores
string quality = m->getline(in); m->gobble(in);
bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
bool abort, keepprimer, keepdots;
string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
- int start, end, processors, length;
+ int start, end, processors, length, pdiffs;
vector<string> revPrimer, outputNames;
- vector<string> primers;
+ map<string, int> primers;
int writeAccnos(set<string>);
int readName(set<string>&);
bool readEcoli();
int driverPcr(string, string, string, set<string>&, linePair);
int createProcesses(string, string, string, set<string>&);
- bool findForward(Sequence&, int&, int&);
- bool findReverse(Sequence&, int&, int&);
bool isAligned(string, map<int, int>&);
- bool compareDNASeq(string, string);
string reverseOligo(string);
};
string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
unsigned long long fstart;
unsigned long long fend;
- int count, start, end, length;
+ int count, start, end, length, pdiffs;
MothurOut* m;
- vector<string> primers;
+ map<string, int> primers;
vector<string> revPrimer;
set<string> badSeqNames;
bool keepprimer, keepdots;
pcrData(){}
- pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
+ pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
filename = f;
goodFasta = gf;
badFasta = bfn;
length = l;
fstart = fst;
fend = fen;
+ pdiffs = pd;
count = 0;
}
};
}
set<int> lengths;
+ //pdiffs, bdiffs, primers, barcodes, revPrimers
+ map<string, int> faked;
+ TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
pDataArray->count++;
//process primers
if (pDataArray->primers.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- //bool good = findForward(currSeq, primerStart, primerEnd);
- ///////////////////////////////////////////////////////////////
- bool good = false;
- string rawSequence = currSeq.getUnaligned();
-
- for(int j=0;j<pDataArray->primers.size();j++){
- string oligo = pDataArray->primers[j];
-
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
-
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int l = 0; l < rawSequence.length()-olength; l++){
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
- string rawChunk = rawSequence.substr(l, olength);
- //compareDNASeq(oligo, rawChunk)
- ////////////////////////////////////////////////////////
- bool success = 1;
- for(int k=0;k<olength;k++){
-
- if(oligo[k] != rawChunk[k]){
- if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C') { success = 0; }
- else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N')) { success = 0; }
- else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T')) { success = 0; }
- else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C')) { success = 0; }
- else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- ////////////////////////////////////////////////////////////////////
- if(success) {
- primerStart = j;
- primerEnd = primerStart + olength;
- good = true; break;
- }
- }
- if (good) { break; }
- }
-
- if (!good) { primerStart = 0; primerEnd = 0; }
- ///////////////////////////////////////////////////////////////
-
+ bool good = trim.findForward(currSeq, primerStart, primerEnd);
if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
else{
if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
}
+ ///////////////////////////////////////////////////////////////
+ mapAligned.clear();
+ string seq = currSeq.getAligned();
+ int countBases = 0;
+ for (int k = 0; k < seq.length(); k++) {
+ if (!isalpha(seq[k])) { ; }
+ else { mapAligned[countBases] = k; countBases++; }
+ }
+ ///////////////////////////////////////////////////////////////
}else {
if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
//process reverse primers
if (pDataArray->revPrimer.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- bool good = false;
- //findReverse(currSeq, primerStart, primerEnd);
- ///////////////////////////////////////////////////////////////
- string rawSequence = currSeq.getUnaligned();
-
- for(int j=0;j<pDataArray->revPrimer.size();j++){
- string oligo = pDataArray->revPrimer[j];
- if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; }
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int l = rawSequence.length()-olength; l >= 0; l--){
-
- string rawChunk = rawSequence.substr(l, olength);
- //compareDNASeq(oligo, rawChunk)
- ////////////////////////////////////////////////////////
- bool success = 1;
- for(int k=0;k<olength;k++){
-
- if(oligo[k] != rawChunk[k]){
- if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C') { success = 0; }
- else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N')) { success = 0; }
- else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T')) { success = 0; }
- else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A')) { success = 0; }
- else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G')) { success = 0; }
- else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C')) { success = 0; }
- else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- ////////////////////////////////////////////////////////////////////
- if(success) {
- primerStart = j;
- primerEnd = primerStart + olength;
- good = true; break;
- }
- }
- if (good) { break; }
- }
-
- if (!good) { primerStart = 0; primerEnd = 0; }
-
- ///////////////////////////////////////////////////////////////
+ bool good = trim.findReverse(currSeq, primerStart, primerEnd);
+
if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
else{
//are you aligned
CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
+ CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
try {
string helpString = "";
helpString += "The pcr.seqs command reads a fasta file.\n";
- helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
+ helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
helpString += "The start parameter allows you to provide a starting position to trim to.\n";
helpString += "The end parameter allows you to provide a ending position to trim from.\n";
helpString += "The processors parameter allows you to use multiple processors.\n";
helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
return helpString;
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
+
+ temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, pdiffs);
nomatch = validParameter.validFile(parameters, "nomatch", false); if (nomatch == "not found") { nomatch = "reject"; }
length = 0;
- if(oligosfile != ""){ readOligos(); } if (m->control_pressed) { return 0; }
+ if(oligosfile != ""){ readOligos(); if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } } if (m->control_pressed) { return 0; }
if(ecolifile != "") { readEcoli(); } if (m->control_pressed) { return 0; }
vector<unsigned long long> positions;
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
int count = 0;
set<int> lengths;
+ //pdiffs, bdiffs, primers, barcodes, revPrimers
+ map<string, int> faked;
+ TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
+
while (!done) {
if (m->control_pressed) { break; }
string trashCode = "";
if (currSeq.getName() != "") {
+ if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); }
+
bool goodSeq = true;
if (oligosfile != "") {
map<int, int> mapAligned;
//process primers
if (primers.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- bool good = findForward(currSeq, primerStart, primerEnd);
+ bool good = trim.findForward(currSeq, primerStart, primerEnd);
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
else{
}
else {
if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); }
- else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
+ else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); }
}
+ isAligned(currSeq.getAligned(), mapAligned);
}else {
if (!keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); }
else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); }
//process reverse primers
if (revPrimer.size() != 0) {
int primerStart = 0; int primerEnd = 0;
- bool good = findReverse(currSeq, primerStart, primerEnd);
+ bool good = trim.findReverse(currSeq, primerStart, primerEnd);
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
else{
- //are you aligned
+ //are you aligned
if (aligned) {
if (!keepprimer) {
if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); }
}
}
//********************************************************************/
-bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
- try {
-
- string rawSequence = seq.getUnaligned();
-
- for(int j=0;j<primers.size();j++){
- string oligo = primers[j];
-
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int j = 0; j < rawSequence.length()-olength; j++){
- if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
- string rawChunk = rawSequence.substr(j, olength);
- if(compareDNASeq(oligo, rawChunk)) {
- primerStart = j;
- primerEnd = primerStart + olength;
- return true;
- }
-
- }
- }
-
- primerStart = 0; primerEnd = 0;
- return false;
-
- }
- catch(exception& e) {
- m->errorOut(e, "TrimOligos", "stripForward");
- exit(1);
- }
-}
-//******************************************************************/
-bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
- try {
-
- string rawSequence = seq.getUnaligned();
-
- for(int i=0;i<revPrimer.size();i++){
- string oligo = revPrimer[i];
- if(rawSequence.length() < oligo.length()) { break; }
-
- //search for primer
- int olength = oligo.length();
- for (int j = rawSequence.length()-olength; j >= 0; j--){
- if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
- string rawChunk = rawSequence.substr(j, olength);
-
- if(compareDNASeq(oligo, rawChunk)) {
- primerStart = j;
- primerEnd = primerStart + olength;
- return true;
- }
-
- }
- }
-
- primerStart = 0; primerEnd = 0;
- return false;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "findReverse");
- exit(1);
- }
-}
-//********************************************************************/
bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
try {
+ aligned.clear();
bool isAligned = false;
int countBases = 0;
m->openInputFile(oligosfile, inOligos);
string type, oligo, group;
+ int primerCount = 0;
while(!inOligos.eof()){
if (c == 10 || c == 13){ break; }
else if (c == 32 || c == 9){;} //space or tab
}
- primers.push_back(oligo);
+ primers[oligo] = primerCount; primerCount++;
}else if(type == "REVERSE"){
string oligoRC = reverseOligo(oligo);
revPrimer.push_back(oligoRC);
exit(1);
}
-}
-//******************************************************************/
-bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
- try {
- bool success = 1;
- int length = oligo.length();
-
- for(int i=0;i<length;i++){
-
- if(oligo[i] != seq[i]){
- if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C') { success = 0; }
- else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N')) { success = 0; }
- else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T')) { success = 0; }
- else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A')) { success = 0; }
- else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A')) { success = 0; }
- else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G')) { success = 0; }
- else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C')) { success = 0; }
- else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G')) { success = 0; }
-
- if(success == 0) { break; }
- }
- else{
- success = 1;
- }
- }
-
- return success;
- }
- catch(exception& e) {
- m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
- exit(1);
- }
-
}
//***************************************************************************************************************
int PcrSeqsCommand::readName(set<string>& names){
int score;
seqName = getSequenceName(qFile);
+ if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n."); }
+
if (!m->control_pressed) {
string qScoreString = m->getline(qFile);
- //cout << qScoreString << endl;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n."); }
+
while(qFile.peek() != '>' && qFile.peek() != EOF){
if (m->control_pressed) { break; }
string temp = m->getline(qFile);
- //cout << temp << endl;
+ if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n."); }
qScoreString += ' ' + temp;
}
//cout << "done reading " << endl;
string temp;
qScoreStringStream >> temp; m->gobble(qScoreStringStream);
+ if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n."); }
+
//check temp to make sure its a number
if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
convert(temp, score);
string sname = ""; nameStream >> sname;
sname = sname.substr(1);
+
+ for (int i = 0; i < sname.length(); i++) {
+ if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+ }
map<string, int>::iterator it = firstSeqNames.find(sname);
istringstream nameStream(input);
string sname = ""; nameStream >> sname;
+ for (int i = 0; i < sname.length(); i++) {
+ if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+ }
+
map<string, int>::iterator it = firstSeqNamesReport.find(sname);
if(it != firstSeqNamesReport.end()) { //this is the start of a new chunk
if (trim) {
if(header.clipQualRight < header.clipQualLeft){
- seq = "NNNN";
+ if (header.clipQualRight == 0) { //don't trim right
+ seq = seq.substr(header.clipQualLeft-1);
+ }else {
+ seq = "NNNN";
+ }
}
else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
if (trim) {
if(header.clipQualRight < header.clipQualLeft){
- seq = "NNNN";
+ if (header.clipQualRight == 0) { //don't trim right
+ seq = seq.substr(header.clipQualLeft-1);
+ }else {
+ seq = "NNNN";
+ }
}
else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
if (trim) {
if(header.clipQualRight < header.clipQualLeft){
- out << ">" << header.name << " xy=" << header.xy << endl;
- out << "0\t0\t0\t0";
+ if (header.clipQualRight == 0) { //don't trim right
+ out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;
+ for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) { out << read.qualScores[i] << '\t'; }
+ }else {
+ out << ">" << header.name << " xy=" << header.xy << endl;
+ out << "0\t0\t0\t0";
+ }
}
else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
//**********************************************************************************************************************
int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
try {
+ if (header.clipQualRight == 0) { header.clipQualRight = read.flowgram.size(); }
+
if(header.clipQualRight > header.clipQualLeft){
int rightIndex = 0;
CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge);
CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma);
CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta);
- CommandParameter porder("order", "String", "", "", "", "", "","",false,false); parameters.push_back(porder);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter porder("order", "Multiple", "A-B", "A", "", "", "","",false,false, true); parameters.push_back(porder); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
try {
string helpString = "";
helpString += "The shhh.flows command reads a file containing flowgrams and creates a file of corrected sequences.\n";
+ helpString += "The shhh.flows command parameters are flow, file, lookup, cutoff, processors, large, maxiter, sigma, mindelta and order.\n";
+ helpString += "The flow parameter is used to input your flow file.\n";
+ helpString += "The file parameter is used to input the *flow.files file created by trim.flows.\n";
+ helpString += "The lookup parameter is used specify the lookup file you would like to use. http://www.mothur.org/wiki/Lookup_files.\n";
+ helpString += "The order parameter options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n";
return helpString;
}
catch(exception& e) {
temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found") { temp = "60"; }
m->mothurConvert(temp, sigma);
- flowOrder = validParameter.validFile(parameters, "order", false);
- if (flowOrder == "not found"){ flowOrder = "TACG"; }
- else if(flowOrder.length() != 4){
- m->mothurOut("The value of the order option must be four bases long\n");
- }
+ temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; }
+ if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n"); abort=true;
+ }
+ else {
+ if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; }
+ else if(toupper(temp[0]) == 'B'){
+ flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; }
+ else {
+ m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n"); abort=true;
+ }
+ }
+
}
#ifdef USE_MPI
for(int j=0;j<numFlowCells;j++){
- char base = flowOrder[j % 4];
+ char base = flowOrder[j % flowOrder.length()];
for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
newSeq += base;
}
otuCountsFile << "ideal\t";
for(int j=8;j<numFlowCells;j++){
- char base = bases[j % 4];
+ char base = bases[j % bases.length()];
for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
otuCountsFile << base;
}
string newSeq = "";
for(int k=0;k<lengths[sequence];k++){
- char base = bases[k % 4];
+ char base = bases[k % bases.length()];
int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
for(int s=0;s<freq;s++){
if (numFiles < processors) { processors = numFiles; }
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
+ if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName); }
else { createProcesses(flowFileVector); } //each processor processes one file
#else
- driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size());
+ driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName);
#endif
if(compositeFASTAFileName != ""){
}
}
#endif
+//********************************************************************************************************************
+//sorts biggest to smallest
+inline bool compareFileSizes(string left, string right){
+
+ FILE * pFile;
+ long leftsize = 0;
+
+ //get num bytes in file
+ string filename = left;
+ pFile = fopen (filename.c_str(),"rb");
+ string error = "Error opening " + filename;
+ if (pFile==NULL) perror (error.c_str());
+ else{
+ fseek (pFile, 0, SEEK_END);
+ leftsize=ftell (pFile);
+ fclose (pFile);
+ }
+
+ FILE * pFile2;
+ long rightsize = 0;
+
+ //get num bytes in file
+ filename = right;
+ pFile2 = fopen (filename.c_str(),"rb");
+ error = "Error opening " + filename;
+ if (pFile2==NULL) perror (error.c_str());
+ else{
+ fseek (pFile2, 0, SEEK_END);
+ rightsize=ftell (pFile2);
+ fclose (pFile2);
+ }
+
+ return (leftsize > rightsize);
+}
/**************************************************************************************************/
int ShhherCommand::createProcesses(vector<string> filenames){
//sanity check
if (filenames.size() < processors) { processors = filenames.size(); }
-
+
+ //sort file names by size to divide load better
+ sort(filenames.begin(), filenames.end(), compareFileSizes);
+
+ vector < vector <string> > dividedFiles; //dividedFiles[1] = vector of filenames for process 1...
+ dividedFiles.resize(processors);
+
+ //for each file, figure out which process will complete it
+ //want to divide the load intelligently so the big files are spread between processes
+ for (int i = 0; i < filenames.size(); i++) {
+ int processToAssign = (i+1) % processors;
+ if (processToAssign == 0) { processToAssign = processors; }
+
+ dividedFiles[(processToAssign-1)].push_back(filenames[i]);
+ }
+
+ //now lets reverse the order of ever other process, so we balance big files running with little ones
+ for (int i = 0; i < processors; i++) {
+ int remainder = ((i+1) % processors);
+ if (remainder) { reverse(dividedFiles[i].begin(), dividedFiles[i].end()); }
+ }
+
+
//divide the groups between the processors
- vector<linePair> lines;
+ /*vector<linePair> lines;
vector<int> numFilesToComplete;
int numFilesPerProcessor = filenames.size() / processors;
for (int i = 0; i < processors; i++) {
if(i == (processors - 1)){ endIndex = filenames.size(); }
lines.push_back(linePair(startIndex, endIndex));
numFilesToComplete.push_back((endIndex-startIndex));
- }
+ }*/
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
+ num = driver(dividedFiles[process], compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName + toString(getpid()) + ".temp");
//pass numSeqs to parent
ofstream out;
}
//do my part
- driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[0].start, lines[0].end);
+ driver(dividedFiles[0], compositeFASTAFileName, compositeNamesFileName);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
if (!in.eof()) {
int tempNum = 0;
in >> tempNum;
- if (tempNum != numFilesToComplete[i+1]) {
- m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches. The flow files may be too large to process with multiple processors. \n");
+ if (tempNum != dividedFiles[i+1].size()) {
+ m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(dividedFiles[i+1].size()) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches. The flow files may be too large to process with multiple processors. \n");
}
}
in.close(); m->mothurRemove(tempFile);
}
/**************************************************************************************************/
-int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
+int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName){
try {
int numCompleted = 0;
- for(int i=start;i<end;i++){
+ for(int i=0;i<filenames.size();i++){
if (m->control_pressed) { break; }
for(int j=0;j<numFlowCells;j++){
- char base = flowOrder[j % 4];
+ char base = flowOrder[j % flowOrder.length()];
for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
newSeq += base;
}
otuCountsFile << "ideal\t";
for(int j=8;j<numFlowCells;j++){
- char base = bases[j % 4];
+ char base = bases[j % bases.length()];
for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
otuCountsFile << base;
}
string newSeq = "";
for(int k=0;k<lengths[sequence];k++){
- char base = bases[k % 4];
+ char base = bases[k % bases.length()];
int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
for(int s=0;s<freq;s++){
vector<string> flowFileVector;
vector<string> parseFlowFiles(string);
- int driver(vector<string>, string, string, int, int);
+ int driver(vector<string>, string, string);
int createProcesses(vector<string>);
int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
int getUniques(int, int, vector<short>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
}else {
m->clearGroups();
Groups.clear();
+ m->Treenames.clear();
vector<SharedRAbundVector*> temp;
for (int i = 0; i < lookup.size(); i++) {
if (lookup[i]->getNumSeqs() < subsampleSize) {
}else {
Groups.push_back(lookup[i]->getGroup());
temp.push_back(lookup[i]);
+ m->Treenames.push_back(lookup[i]->getGroup());
}
}
lookup = temp;
for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
}
}
-
+
+ if (m->debug) { m->mothurOut("[DEBUG]: done with iters.\n"); }
+
if (iters != 1) {
//we need to find the average distance and standard deviation for each groups distance
vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals);
+ if (m->debug) { m->mothurOut("[DEBUG]: found averages.\n"); }
+
//create average tree for each calc
for (int i = 0; i < calcDists.size(); i++) {
vector< vector<double> > matrix; //square matrix to represent the distance
if (newTree != NULL) { writeTree(outputFile, newTree); }
}
+ if (m->debug) { m->mothurOut("[DEBUG]: done averages trees.\n"); }
+
//create all trees for each calc and find their consensus tree
for (int i = 0; i < calcDists.size(); i++) {
if (m->control_pressed) { break; }
int row = calcDistsTotals[myIter][i][j].seq1;
int column = calcDistsTotals[myIter][i][j].seq2;
double dist = calcDistsTotals[myIter][i][j].dist;
-
+
matrix[row][column] = dist;
matrix[column][row] = dist;
}
outAll.close();
if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
+ if (m->debug) { m->mothurOut("[DEBUG]: done all trees.\n"); }
+
Consensus consensus;
//clear old tree names if any
m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups
Tree* conTree = consensus.getTree(trees);
+ if (m->debug) { m->mothurOut("[DEBUG]: done cons tree.\n"); }
+
//create a new filename
variables["[tag]"] = "cons";
string conFile = getOutputFileName("tree",variables);
CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles);
- CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder);
+ CommandParameter porder("order", "Multiple", "A-B", "A", "", "", "","",false,false, true); parameters.push_back(porder);
CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- flowOrder = validParameter.validFile(parameters, "order", false);
- if (flowOrder == "not found"){ flowOrder = "TACG"; }
- else if(flowOrder.length() != 4){
- m->mothurOut("The value of the order option must be four bases long\n");
- }
-
+ temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; }
+ if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n"); abort=true;
+ }
+ else {
+ if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; }
+ else if(toupper(temp[0]) == 'B'){
+ flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; }
+ else {
+ m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n"); abort=true;
+ }
+ }
+
if(oligoFileName == "") { allFiles = 0; }
else { allFiles = 1; }
numLinkers = 0;
numSpacers = 0;
}
-
}
catch(exception& e) {
m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
barcodes = br;
primers = pr;
revPrimer = r;
+
+ maxFBarcodeLength = 0;
+ for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
+ string oligo = it->first;
+ if(oligo.length() > maxFBarcodeLength){
+ maxFBarcodeLength = oligo.length();
+ }
+ }
+ maxRBarcodeLength = maxFBarcodeLength;
+
+ maxFPrimerLength = 0;
+ for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+ string oligo = it->first;
+ if(oligo.length() > maxFPrimerLength){
+ maxFPrimerLength = oligo.length();
+ }
+ }
+ maxRPrimerLength = maxFPrimerLength;
}
catch(exception& e) {
m->errorOut(e, "TrimOligos", "TrimOligos");
}
/********************************************************************/
TrimOligos::~TrimOligos() {}
+//********************************************************************/
+bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
+ try {
+
+ string rawSequence = seq.getUnaligned();
+
+ for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+ string oligo = it->first;
+
+ if(rawSequence.length() < oligo.length()) { break; }
+
+ //search for primer
+ int olength = oligo.length();
+ for (int j = 0; j < rawSequence.length()-olength; j++){
+ if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
+ string rawChunk = rawSequence.substr(j, olength);
+ if(compareDNASeq(oligo, rawChunk)) {
+ primerStart = j;
+ primerEnd = primerStart + olength;
+ return true;
+ }
+
+ }
+ }
+
+ primerStart = 0; primerEnd = 0;
+ //if you don't want to allow for diffs
+ if (pdiffs == 0) { return false; }
+ else { //try aligning and see if you can find it
+
+ //can you find the barcode
+ int minDiff = 1e6;
+ int minCount = 1;
+
+ Alignment* alignment;
+ if (primers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
+ else{ alignment = NULL; }
+
+ for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
+
+ string prim = it->first;
+ //search for primer
+ int olength = prim.length();
+ if (rawSequence.length() < olength) {} //ignore primers too long for this seq
+ else{
+ for (int j = 0; j < rawSequence.length()-olength; j++){
+
+ string oligo = it->first;
+
+ if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
+
+ string rawChunk = rawSequence.substr(j, olength+pdiffs);
+
+ //use needleman to align first primer.length()+numdiffs of sequence to each barcode
+ alignment->align(oligo, rawChunk);
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){
+ if(oligo[i] != '-'){ alnLength = i+1; break; }
+ }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+
+ int numDiff = countDiffs(oligo, temp);
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ primerStart = j;
+ primerEnd = primerStart + alnLength;
+ }else if(numDiff == minDiff){ minCount++; }
+ }
+ }
+ }
+
+ if (alignment != NULL) { delete alignment; }
+
+ if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; return false; } //no good matches
+ else if(minCount > 1) { primerStart = 0; primerEnd = 0; return false; } //can't tell the difference between multiple primers
+ else{ return true; }
+ }
+
+ primerStart = 0; primerEnd = 0;
+ return false;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "TrimOligos", "stripForward");
+ exit(1);
+ }
+}
+//******************************************************************/
+bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
+ try {
+
+ string rawSequence = seq.getUnaligned();
+
+ for(int i=0;i<revPrimer.size();i++){
+ string oligo = revPrimer[i];
+ if(rawSequence.length() < oligo.length()) { break; }
+
+ //search for primer
+ int olength = oligo.length();
+ for (int j = rawSequence.length()-olength; j >= 0; j--){
+ if (m->control_pressed) { primerStart = 0; primerEnd = 0; return false; }
+ string rawChunk = rawSequence.substr(j, olength);
+
+ if(compareDNASeq(oligo, rawChunk)) {
+ primerStart = j;
+ primerEnd = primerStart + olength;
+ return true;
+ }
+
+ }
+ }
+
+ primerStart = 0; primerEnd = 0;
+ return false;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PcrSeqsCommand", "findReverse");
+ exit(1);
+ }
+}
//*******************************************************************/
+
int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
try {
bool stripSpacer(Sequence&);
bool stripSpacer(Sequence&, QualityScores&);
+
+ //seq, primerStart, primerEnd
+ bool findForward(Sequence&, int&, int&);
+ bool findReverse(Sequence&, int&, int&);
+
private: