A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A79234D613C74BF6002B08E2 /* mothurfisher.cpp */; };
+ A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A795840C13F13CD900F201D5 /* countgroupscommand.cpp */; };
A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
A7A61F2D130062E000E05B6B /* amovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7A61F2C130062E000E05B6B /* amovacommand.cpp */; };
A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
A79234D513C74BF6002B08E2 /* mothurfisher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mothurfisher.h; sourceTree = "<group>"; };
A79234D613C74BF6002B08E2 /* mothurfisher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mothurfisher.cpp; sourceTree = "<group>"; };
+ A795840B13F13CD900F201D5 /* countgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countgroupscommand.h; sourceTree = "<group>"; };
+ A795840C13F13CD900F201D5 /* countgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countgroupscommand.cpp; sourceTree = "<group>"; };
A799F5B71309A3E000AEEFA0 /* makefastqcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makefastqcommand.h; sourceTree = "<group>"; };
A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makefastqcommand.cpp; sourceTree = "<group>"; };
A7A61F1A130035C800E05B6B /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
A7E9B84E12D37EC400DA6239 /* structpearson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = structpearson.h; sourceTree = "<group>"; };
A7E9B84F12D37EC400DA6239 /* subsamplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = subsamplecommand.cpp; sourceTree = "<group>"; };
A7E9B85012D37EC400DA6239 /* subsamplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = subsamplecommand.h; sourceTree = "<group>"; };
- A7E9B85112D37EC400DA6239 /* suffixdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixdb.cpp; sourceTree = SOURCE_ROOT; };
- A7E9B85212D37EC400DA6239 /* suffixdb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixdb.hpp; sourceTree = SOURCE_ROOT; };
- A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixnodes.cpp; sourceTree = SOURCE_ROOT; };
- A7E9B85412D37EC400DA6239 /* suffixnodes.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixnodes.hpp; sourceTree = SOURCE_ROOT; };
- A7E9B85512D37EC400DA6239 /* suffixtree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixtree.cpp; sourceTree = SOURCE_ROOT; };
- A7E9B85612D37EC400DA6239 /* suffixtree.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixtree.hpp; sourceTree = SOURCE_ROOT; };
+ A7E9B85112D37EC400DA6239 /* suffixdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixdb.cpp; sourceTree = "<group>"; };
+ A7E9B85212D37EC400DA6239 /* suffixdb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixdb.hpp; sourceTree = "<group>"; };
+ A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixnodes.cpp; sourceTree = "<group>"; };
+ A7E9B85412D37EC400DA6239 /* suffixnodes.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixnodes.hpp; sourceTree = "<group>"; };
+ A7E9B85512D37EC400DA6239 /* suffixtree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = suffixtree.cpp; sourceTree = "<group>"; };
+ A7E9B85612D37EC400DA6239 /* suffixtree.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = suffixtree.hpp; sourceTree = "<group>"; };
A7E9B85712D37EC400DA6239 /* summarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summarycommand.cpp; sourceTree = "<group>"; };
A7E9B85812D37EC400DA6239 /* summarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summarycommand.h; sourceTree = "<group>"; };
A7E9B85912D37EC400DA6239 /* summarysharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summarysharedcommand.cpp; sourceTree = "<group>"; };
A7E9B82412D37EC400DA6239 /* sharedutilities.h */,
A7E9B82D12D37EC400DA6239 /* singlelinkage.cpp */,
A7E9B83012D37EC400DA6239 /* slibshuff.cpp */,
- A7E9B85112D37EC400DA6239 /* suffixdb.cpp */,
A7E9B83112D37EC400DA6239 /* slibshuff.h */,
- A7E9B85212D37EC400DA6239 /* suffixdb.hpp */,
- A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */,
- A7E9B85412D37EC400DA6239 /* suffixnodes.hpp */,
- A7E9B85512D37EC400DA6239 /* suffixtree.cpp */,
- A7E9B85612D37EC400DA6239 /* suffixtree.hpp */,
A7E9B87412D37EC400DA6239 /* validcalculator.cpp */,
A7E9B87512D37EC400DA6239 /* validcalculator.h */,
A7E9B87612D37EC400DA6239 /* validparameter.cpp */,
A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */,
A7E9B6BA12D37EC400DA6239 /* corraxescommand.h */,
A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */,
+ A795840B13F13CD900F201D5 /* countgroupscommand.h */,
+ A795840C13F13CD900F201D5 /* countgroupscommand.cpp */,
A7730EFD13967241007433A3 /* countseqscommand.h */,
A7730EFE13967241007433A3 /* countseqscommand.cpp */,
A7E9B6C412D37EC400DA6239 /* deconvolutecommand.h */,
A7E9B81412D37EC400DA6239 /* sharedsabundvector.h */,
A7E9B83912D37EC400DA6239 /* sparsematrix.cpp */,
A7E9B83A12D37EC400DA6239 /* sparsematrix.hpp */,
+ A7E9B85112D37EC400DA6239 /* suffixdb.cpp */,
+ A7E9B85212D37EC400DA6239 /* suffixdb.hpp */,
+ A7E9B85312D37EC400DA6239 /* suffixnodes.cpp */,
+ A7E9B85412D37EC400DA6239 /* suffixnodes.hpp */,
+ A7E9B85512D37EC400DA6239 /* suffixtree.cpp */,
+ A7E9B85612D37EC400DA6239 /* suffixtree.hpp */,
A7E9B85F12D37EC400DA6239 /* tree.cpp */,
A7E9B86012D37EC400DA6239 /* tree.h */,
A7E9B86412D37EC400DA6239 /* treemap.cpp */,
A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */,
A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */,
A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */,
+ A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
*/
#include "aligncommand.h"
-#include "sequence.hpp"
-
-#include "gotohoverlap.hpp"
-#include "needlemanoverlap.hpp"
-#include "blastalign.hpp"
-#include "noalign.hpp"
-
-#include "nast.hpp"
-#include "nastreport.hpp"
#include "referencedb.h"
//**********************************************************************************************************************
temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
save = m->isTrue(temp);
+ //this is so the threads can quickly load the reference data
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #else
+ if (processors != 1) { save = true; }
+ #endif
rdb->save = save;
if (save) { //clear out old references
rdb->clearMemory();
if (abort == false) {
for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
delete templateDB;
- delete alignment;
}
}
//**********************************************************************************************************************
if (abort == true) { if (calledHelp) { return 0; } return 2; }
templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch);
- int longestBase = templateDB->getLongestBase();
-
- if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); }
- else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); }
- else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); }
- else if(align == "noalign") { alignment = new NoAlign(); }
- else {
- m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
- m->mothurOutEndLine();
- alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
- }
for (int s = 0; s < candidateFileNames.size(); s++) {
if (m->control_pressed) { outputTypes.clear(); return 0; }
#else
- vector<unsigned long int> positions = m->divideFile(candidateFileNames[s], processors);
- for (int i = 0; i < (positions.size()-1); i++) {
- lines.push_back(new linePair(positions[i], positions[(i+1)]));
- }
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ vector<unsigned long int> positions;
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ positions = m->divideFile(candidateFileNames[s], processors);
+ for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); }
+ #else
+ positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs);
+
+ //figure out how many sequences you have to process
+ int numSeqsPerProcessor = numFastaSeqs / processors;
+ for (int i = 0; i < processors; i++) {
+ int startIndex = i * numSeqsPerProcessor;
+ if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; }
+ lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
+ }
+ #endif
+
if(processors == 1){
numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
}else{
numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
}
- #else
- numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
- #endif
-
+
if (m->control_pressed) { m->mothurRemove(accnosFileName); m->mothurRemove(alignFileName); m->mothurRemove(reportFileName); outputTypes.clear(); return 0; }
//delete accnos file if its blank else report to user
}
//**********************************************************************************************************************
-
int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){
try {
ofstream alignmentFile;
bool done = false;
int count = 0;
+
+ //moved this into driver to avoid deep copies in windows paralellized version
+ Alignment* alignment;
+ int longestBase = templateDB->getLongestBase();
+ if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); }
+ else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); }
+ else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); }
+ else if(align == "noalign") { alignment = new NoAlign(); }
+ else {
+ m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+ m->mothurOutEndLine();
+ alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+ }
while (!done) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { break; }
Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
report.setCandidate(candidateSeq);
Sequence temp = templateDB->findClosestSequence(candidateSeq);
Sequence* templateSeq = &temp;
-
+
float searchScore = templateDB->getSearchScore();
Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
//report progress
if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+ delete alignment;
alignmentFile.close();
inFASTA.close();
accnosFile.close();
delete buf;
}
+ Alignment* alignment;
+ int longestBase = templateDB->getLongestBase();
+ if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); }
+ else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); }
+ else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); }
+ else if(align == "noalign") { alignment = new NoAlign(); }
+ else {
+ m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
+ m->mothurOutEndLine();
+ alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
+ }
+
+
for(int i=0;i<num;i++){
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { delete alignment; return 0; }
//read next sequence
int length = MPIPos[start+i+1] - MPIPos[start+i];
int AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ int num = 0;
processIDS.resize(0);
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 1;
- int num = 0;
- // processIDS.resize(0);
//loop through and create all the processes you want
while (process != processors) {
m->openOutputFile(accnosFName, out);
out.close();
}
+#else
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the alignData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<alignData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=0; i<processors-1; i++ ){
+ //copy templateDb
+ //AlignmentDB* tempDB = new AlignmentDB(*templateDB);
+
+ // Allocate memory for thread data.
+ string extension = "";
+ if (i != 0) { extension = toString(i) + ".temp"; }
+
+ alignData* tempalign = new alignData((alignFileName + extension), (reportFileName + extension), (accnosFName + extension), filename, align, search, kmerSize, m, lines[i]->start, lines[i]->end, flip, match, misMatch, gapOpen, gapExtend, threshold);
+ pDataArray.push_back(tempalign);
+ processIDS.push_back(i);
+
+ //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i] = CreateThread(NULL, 0, MyAlignThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //need to check for line ending error
+ ifstream inFASTA;
+ m->openInputFile(filename, inFASTA);
+ inFASTA.seekg(lines[processors-1]->start-1);
+ char c = inFASTA.peek();
+
+ if (c == '>') { //we need to move back
+ lines[processors-1]->start--;
+ }
+
+ //using the main process as a worker saves time and memory
+ //do my part - do last piece because windows is looking for eof
+ num = driver(lines[processors-1], (alignFileName + toString(processors-1) + ".temp"), (reportFileName + toString(processors-1) + ".temp"), (accnosFName + toString(processors-1) + ".temp"), filename);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ num += pDataArray[i]->count;
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+ vector<string> nonBlankAccnosFiles;
+ if (!(m->isBlank(accnosFName))) { nonBlankAccnosFiles.push_back(accnosFName); }
+ else { m->mothurRemove(accnosFName); } //remove so other files can be renamed to it
+
+ for (int i = 1; i < processors; i++) {
+ appendAlignFiles((alignFileName + toString(i) + ".temp"), alignFileName);
+ m->mothurRemove((alignFileName + toString(i) + ".temp"));
+
+ appendReportFiles((reportFileName + toString(i) + ".temp"), reportFileName);
+ m->mothurRemove((reportFileName + toString(i) + ".temp"));
+
+ if (!(m->isBlank(accnosFName + toString(i) + ".temp"))) {
+ nonBlankAccnosFiles.push_back(accnosFName + toString(i) + ".temp");
+ }else { m->mothurRemove((accnosFName + toString(i) + ".temp")); }
+ }
+
+ //append accnos files
+ if (nonBlankAccnosFiles.size() != 0) {
+ rename(nonBlankAccnosFiles[0].c_str(), accnosFName.c_str());
+
+ for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+ appendAlignFiles(nonBlankAccnosFiles[h], accnosFName);
+ m->mothurRemove(nonBlankAccnosFiles[h]);
+ }
+ }else { //recreate the accnosfile if needed
+ ofstream out;
+ m->openOutputFile(accnosFName, out);
+ out.close();
+ }
+#endif
return num;
-#endif
}
catch(exception& e) {
m->errorOut(e, "AlignCommand", "createProcesses");
#include "database.hpp"
#include "alignment.hpp"
#include "alignmentdb.h"
+#include "sequence.hpp"
+
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
+
+#include "nast.hpp"
+#include "nastreport.hpp"
+
class AlignCommand : public Command {
bool MPIWroteAccnos;
AlignmentDB* templateDB;
- Alignment* alignment;
int driver(linePair*, string, string, string, string);
int createProcesses(string, string, string, string);
};
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+typedef struct alignData {
+ string alignFName;
+ string reportFName;
+ string accnosFName;
+ string filename;
+ string align;
+ string search;
+ bool flip;
+ unsigned long int start;
+ unsigned long int end;
+ MothurOut* m;
+ //AlignmentDB* templateDB;
+ float match, misMatch, gapOpen, gapExtend, threshold;
+ int count, kmerSize;
+
+ alignData(){}
+ alignData(string a, string r, string ac, string f, string al, string se, int ks, MothurOut* mout, unsigned long int st, unsigned long int en, bool fl, float ma, float misMa, float gapO, float gapE, float thr) {
+ alignFName = a;
+ reportFName = r;
+ accnosFName = ac;
+ filename = f;
+ flip = fl;
+ m = mout;
+ start = st;
+ end = en;
+ //templateDB = tdb;
+ match = ma;
+ misMatch = misMa;
+ gapOpen = gapO;
+ gapExtend = gapE;
+ threshold = thr;
+ align = al;
+ search = se;
+ count = 0;
+ kmerSize = ks;
+ }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#else
+static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){
+ alignData* pDataArray;
+ pDataArray = (alignData*)lpParam;
+
+ try {
+ ofstream alignmentFile;
+ pDataArray->m->openOutputFile(pDataArray->alignFName, alignmentFile);
+
+ ofstream accnosFile;
+ pDataArray->m->openOutputFile(pDataArray->accnosFName, accnosFile);
+
+ NastReport report(pDataArray->reportFName);
+
+ ifstream inFASTA;
+ pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
+
+ //print header if you are process 0
+ if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+ inFASTA.seekg(0);
+ }else { //this accounts for the difference in line endings.
+ inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA);
+ }
+
+ pDataArray->count = pDataArray->end;
+
+ AlignmentDB* templateDB = new AlignmentDB("saved-silent", pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);
+
+ //moved this into driver to avoid deep copies in windows paralellized version
+ Alignment* alignment;
+ int longestBase = templateDB->getLongestBase();
+ if(pDataArray->align == "gotoh") { alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, longestBase); }
+ else if(pDataArray->align == "needleman") { alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase); }
+ else if(pDataArray->align == "blast") { alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); }
+ else if(pDataArray->align == "noalign") { alignment = new NoAlign(); }
+ else {
+ pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
+ pDataArray->m->mothurOutEndLine();
+ alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase);
+ }
+
+ int count = 0;
+ for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+
+ if (pDataArray->m->control_pressed) { break; }
+
+ Sequence* candidateSeq = new Sequence(inFASTA); pDataArray->m->gobble(inFASTA);
+ report.setCandidate(candidateSeq);
+
+ int origNumBases = candidateSeq->getNumBases();
+ string originalUnaligned = candidateSeq->getUnaligned();
+ int numBasesNeeded = origNumBases * pDataArray->threshold;
+
+ if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+ if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
+ alignment->resize(candidateSeq->getUnaligned().length()+1);
+ }
+
+ Sequence temp = templateDB->findClosestSequence(candidateSeq);
+ Sequence* templateSeq = &temp;
+
+ float searchScore = templateDB->getSearchScore();
+
+ Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
+
+ Sequence* copy;
+
+ Nast* nast2;
+ bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below
+ //since nast does not make a copy of hte sequence passed, and it is used by the reporter below
+ //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
+ //so this bool tells you if you need to delete it
+
+ //if there is a possibility that this sequence should be reversed
+ if (candidateSeq->getNumBases() < numBasesNeeded) {
+
+ string wasBetter = "";
+ //if the user wants you to try the reverse
+ if (pDataArray->flip) {
+
+ //get reverse compliment
+ copy = new Sequence(candidateSeq->getName(), originalUnaligned);
+ copy->reverseComplement();
+
+ //rerun alignment
+ Sequence temp2 = templateDB->findClosestSequence(copy);
+ Sequence* templateSeq2 = &temp2;
+
+ searchScore = templateDB->getSearchScore();
+
+ nast2 = new Nast(alignment, copy, templateSeq2);
+
+ //check if any better
+ if (copy->getNumBases() > candidateSeq->getNumBases()) {
+ candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better
+ templateSeq = templateSeq2;
+ delete nast;
+ nast = nast2;
+ needToDeleteCopy = true;
+ wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
+ }else{
+ wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
+ delete nast2;
+ delete copy;
+ }
+ }
+
+ //create accnos file with names
+ accnosFile << candidateSeq->getName() << wasBetter << endl;
+ }
+
+ report.setTemplate(templateSeq);
+ report.setSearchParameters(pDataArray->search, searchScore);
+ report.setAlignmentParameters(pDataArray->align, alignment);
+ report.setNastParameters(*nast);
+
+ alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+
+ report.print();
+ delete nast;
+ if (needToDeleteCopy) { delete copy; }
+
+ count++;
+ }
+ delete candidateSeq;
+
+ //report progress
+ if((count) % 100 == 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine(); }
+
+ }
+ //report progress
+ if((count) % 100 != 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine(); }
+
+ delete alignment;
+ delete templateDB;
+ alignmentFile.close();
+ inFASTA.close();
+ accnosFile.close();
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "AlignCommand", "MyAlignThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
+
+
#endif
#include "blastdb.hpp"
#include "referencedb.h"
-
+/**************************************************************************************************/
+//deep copy
+AlignmentDB::AlignmentDB(const AlignmentDB& adb) : numSeqs(adb.numSeqs), longest(adb.longest), method(adb.method), emptySequence(adb.emptySequence) {
+ try {
+
+ m = MothurOut::getInstance();
+ if (adb.method == "blast") {
+ search = new BlastDB(*((BlastDB*)adb.search));
+ }else if(adb.method == "kmer") {
+ search = new KmerDB(*((KmerDB*)adb.search));
+ }else if(adb.method == "suffix") {
+ search = new SuffixDB(*((SuffixDB*)adb.search));
+ }else {
+ m->mothurOut("[ERROR]: cannot create copy of alignment database, unrecognized method - " + adb.method); m->mothurOutEndLine();
+ }
+
+ for (int i = 0; i < adb.templateSequences.size(); i++) {
+ Sequence temp(adb.templateSequences[i]);
+ templateSequences.push_back(temp);
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignmentDB", "AlignmentDB");
+ exit(1);
+ }
+
+}
/**************************************************************************************************/
AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may
try { // need to alter this in the future?
method = s;
bool needToGenerate = true;
ReferenceDB* rdb = ReferenceDB::getInstance();
+ bool silent = false;
+
+ if (fastaFileName == "saved-silent") {
+ fastaFileName = "saved"; silent = true;
+ }
if (fastaFileName == "saved") {
int start = time(NULL);
- m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine();
+
+ if (!silent) { m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
templateSequences.push_back(rdb->referenceSeqs[i]);
fastaFileName = rdb->getSavedReference();
numSeqs = templateSequences.size();
- m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();
+ if (!silent) { m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine(); }
}else {
int start = time(NULL);
else if(method == "suffix") { search = new SuffixDB(numSeqs); }
else if(method == "blast") { search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, ""); }
else {
+ method = "kmer";
m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
m->mothurOutEndLine();
search = new KmerDB(fastaFileName, 8);
AlignmentDB(string, string, int, float, float, float, float); //reads fastafile passed in and stores sequences
AlignmentDB(string);
+ AlignmentDB(const AlignmentDB& adb);
~AlignmentDB();
Sequence findClosestSequence(Sequence*);
public:
BlastDB(string, float, float, float, float, string);
BlastDB(string);
+ BlastDB(const BlastDB& bdb) : dbFileName(bdb.dbFileName), queryFileName(bdb.queryFileName), blastFileName(bdb.blastFileName), path(bdb.path),
+ count(bdb.count), gapOpen(bdb.gapOpen), gapExtend(bdb.gapExtend), match(bdb.match), misMatch(bdb.misMatch), Database(bdb) {}
~BlastDB();
void generateDB();
#include "getcommandinfocommand.h"
#include "deuniquetreecommand.h"
#include "countseqscommand.h"
+#include "countgroupscommand.h"
#include "clearmemorycommand.h"
/*******************************************************/
commands["get.commandinfo"] = "get.commandinfo";
commands["deunique.tree"] = "deunique.tree";
commands["count.seqs"] = "count.seqs";
+ commands["count.groups"] = "count.groups";
commands["clear.memory"] = "clear.memory";
commands["pairwise.seqs"] = "MPIEnabled";
commands["pipeline.pds"] = "MPIEnabled";
else if(commandName == "get.commandinfo") { command = new GetCommandInfoCommand(optionString); }
else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); }
else if(commandName == "count.seqs") { command = new CountSeqsCommand(optionString); }
+ else if(commandName == "count.groups") { command = new CountGroupsCommand(optionString); }
else if(commandName == "clear.memory") { command = new ClearMemoryCommand(optionString); }
else { command = new NoCommand(optionString); }
else if(commandName == "get.commandinfo") { pipecommand = new GetCommandInfoCommand(optionString); }
else if(commandName == "deunique.tree") { pipecommand = new DeuniqueTreeCommand(optionString); }
else if(commandName == "count.seqs") { pipecommand = new CountSeqsCommand(optionString); }
+ else if(commandName == "count.groups") { pipecommand = new CountGroupsCommand(optionString); }
else if(commandName == "clear.memory") { pipecommand = new ClearMemoryCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
else if(commandName == "get.commandinfo") { shellcommand = new GetCommandInfoCommand(); }
else if(commandName == "deunique.tree") { shellcommand = new DeuniqueTreeCommand(); }
else if(commandName == "count.seqs") { shellcommand = new CountSeqsCommand(); }
+ else if(commandName == "count.groups") { shellcommand = new CountGroupsCommand(); }
else if(commandName == "clear.memory") { shellcommand = new ClearMemoryCommand(); }
else { shellcommand = new NoCommand(); }
--- /dev/null
+/*
+ * countgroupscommand.cpp
+ * Mothur
+ *
+ * Created by westcott on 8/9/11.
+ * Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "countgroupscommand.h"
+#include "sharedutilities.h"
+#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> CountGroupsCommand::setParameters(){
+ try {
+ CommandParameter pshared("shared", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+ CommandParameter pgroup("group", "InputTypes", "", "", "sharedGroup", "sharedGroup", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountGroupsCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string CountGroupsCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The count.groups command counts sequences from a specfic group or set of groups from the following file types: group or shared file.\n";
+ helpString += "The count.groups command parameters are accnos, group, shared and groups. You must provide a group or shared file.\n";
+ helpString += "The accnos parameter allows you to provide a file containing the list of groups.\n";
+ helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like. You can separate group names with dashes.\n";
+ helpString += "The count.groups command should be in the following format: count.groups(accnos=yourAccnos, group=yourGroupFile).\n";
+ helpString += "Example count.groups(accnos=amazon.accnos, group=amazon.groups).\n";
+ helpString += "or count.groups(groups=pasture, group=amazon.groups).\n";
+ helpString += "Note: No spaces between parameter labels (i.e. group), '=' and parameters (i.e.yourGroupFile).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountGroupsCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+CountGroupsCommand::CountGroupsCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountGroupsCommand", "CountGroupsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+CountGroupsCommand::CountGroupsCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("accnos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["accnos"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+ }
+
+
+ //check for required parameters
+ accnosfile = validParameter.validFile(parameters, "accnos", true);
+ if (accnosfile == "not open") { abort = true; }
+ else if (accnosfile == "not found") { accnosfile = ""; }
+ else { m->setAccnosFile(accnosfile); }
+
+ groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; }
+ else {
+ m->splitAtDash(groups, Groups);
+ m->Groups = Groups;
+ }
+
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") { sharedfile = ""; }
+ else { m->setSharedFile(sharedfile); }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not open") { groupfile = ""; abort = true; }
+ else if (groupfile == "not found") { groupfile = ""; }
+ else { m->setGroupFile(groupfile); }
+
+ if ((sharedfile == "") && (groupfile == "")) {
+ //give priority to shared, then group
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ groupfile = m->getGroupFile();
+ if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+ }
+
+ if ((accnosfile == "") && (Groups.size() == 0)) { Groups.push_back("all"); m->Groups = Groups; }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountGroupsCommand", "CountGroupsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int CountGroupsCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ //get groups you want to remove
+ if (accnosfile != "") { readAccnos(); }
+
+ if (groupfile != "") {
+ GroupMap groupMap(groupfile);
+ groupMap.readMap();
+
+ //make sure groups are valid
+ //takes care of user setting groupNames that are invalid or setting groups=all
+ SharedUtil util;
+ util.setGroups(Groups, groupMap.namesOfGroups);
+
+ for (int i = 0; i < Groups.size(); i++) {
+ m->mothurOut(Groups[i] + " contains " + toString(groupMap.getNumSeqs(Groups[i])) + "."); m->mothurOutEndLine();
+ }
+ }
+
+ if (m->control_pressed) { return 0; }
+
+ if (sharedfile != "") {
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+
+ for (int i = 0; i < lookup.size(); i++) {
+ m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + "."); m->mothurOutEndLine();
+ delete lookup[i];
+ }
+ }
+
+ return 0;
+ }
+
+ catch(exception& e) {
+ m->errorOut(e, "CountGroupsCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+void CountGroupsCommand::readAccnos(){
+ try {
+ Groups.clear();
+
+ ifstream in;
+ m->openInputFile(accnosfile, in);
+ string name;
+
+ while(!in.eof()){
+ in >> name;
+
+ Groups.push_back(name);
+
+ m->gobble(in);
+ }
+ in.close();
+
+ m->Groups = Groups;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountGroupsCommand", "readAccnos");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
--- /dev/null
+#ifndef COUNTGROUPSCOMMAND_H
+#define COUNTGROUPSCOMMAND_H
+
+/*
+ * countgroupscommand.h
+ * Mothur
+ *
+ * Created by westcott on 8/9/11.
+ * Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+
+class CountGroupsCommand : public Command {
+
+public:
+
+ CountGroupsCommand(string);
+ CountGroupsCommand();
+ ~CountGroupsCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "count.groups"; }
+ string getCommandCategory() { return "Sequence Processing"; }
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Count.groups"; }
+ string getDescription() { return "counts the number of sequences in each group"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+
+private:
+ string sharedfile, groupfile, outputDir, groups, accnosfile;
+ bool abort;
+ vector<string> Groups;
+
+ void readAccnos();
+};
+
+#endif
public:
Database();
+ Database(const Database& db) : numSeqs(db.numSeqs), longest(db.longest), searchScore(db.searchScore), results(db.results), Scores(db.Scores) { m = MothurOut::getInstance(); }
virtual ~Database();
virtual void generateDB() = 0;
virtual void addSequence(Sequence) = 0; //add sequence to search engine
virtual void readKmerDB(ifstream&){};
virtual void setNumSeqs(int i) { numSeqs = i; }
virtual vector<int> getSequencesWithKmer(int){ vector<int> filler; return filler; };
- virtual int getMaxKmer(){ return 1; };
+ virtual int getMaxKmer(){ return 1; }
protected:
MothurOut* m;
class Dist {
public:
- Dist(){dist = 0; m = MothurOut::getInstance(); }
+ Dist(){ dist = 0; m = MothurOut::getInstance(); }
+ Dist(const Dist& d) : dist(d.dist) { m = MothurOut::getInstance(); }
virtual ~Dist() {}
virtual void calcDist(Sequence, Sequence) = 0;
double getDist() { return dist; }
#include "distancedb.hpp"
#include "onegapignore.h"
+
+/**************************************************************************************************/
+DistanceDB::DistanceDB(const DistanceDB& ddb) : data(ddb.data), templateSeqsLength(ddb.templateSeqsLength), templateAligned(ddb.templateAligned), Database(ddb) {
+ distCalculator = new oneGapIgnoreTermGapDist();
+}
/**************************************************************************************************/
-DistanceDB::DistanceDB() {
+DistanceDB::DistanceDB() : Database() {
try {
templateAligned = true;
templateSeqsLength = 0;
public:
DistanceDB();
+ DistanceDB(const DistanceDB& ddb);
~DistanceDB() { delete distCalculator; }
void generateDB() {} //doesn't generate a search db
public:
+ eachGapDist() {}
+ eachGapDist(const eachGapDist& ddb) {}
+
void calcDist(Sequence A, Sequence B){
int diff = 0;
int length = 0;
class eachGapIgnoreTermGapDist : public Dist {
public:
+ eachGapIgnoreTermGapDist() {}
+ eachGapIgnoreTermGapDist(const eachGapIgnoreTermGapDist& ddb) {}
void calcDist(Sequence A, Sequence B){
int diff = 0;
public:
+ ignoreGaps() {}
+ ignoreGaps(const ignoreGaps& ddb) {}
+
void calcDist(Sequence A, Sequence B){
int diff = 0;
int length = 0;
public:
KmerDB(string, int);
+ KmerDB(const KmerDB& kdb) : kmerSize(kdb.kmerSize), maxKmer(kdb.maxKmer), count(kdb.count), kmerDBName(kdb.kmerDBName), kmerLocations(kdb.kmerLocations), Database(kdb) {}
KmerDB();
~KmerDB();
class oneGapDist : public Dist {
public:
+
+ oneGapDist() {}
+ oneGapDist(const oneGapDist& ddb) {}
+
void calcDist(Sequence A, Sequence B){
int difference = 0;
class oneGapIgnoreTermGapDist : public Dist {
public:
+
+ oneGapIgnoreTermGapDist() {}
+ oneGapIgnoreTermGapDist(const oneGapIgnoreTermGapDist& ddb) {}
+
void calcDist(Sequence A, Sequence B){
int difference = 0;
read->AssembleTrees();
T = read->getTrees();
delete read;
-
+
//make sure all files match
//if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
int numNamesInTree;
Sequence(string, string);
Sequence(ifstream&);
Sequence(istringstream&);
+ Sequence(const Sequence& se) : name(se.name), unaligned(se.unaligned), aligned(se.aligned), pairwise(se.pairwise), numBases(se.numBases), startPos(se.startPos), endPos(se.endPos),
+ alignmentLength(se.alignmentLength), isAligned(se.isAligned), longHomoPolymer(se.longHomoPolymer), ambigBases(se.ambigBases) { m = MothurOut::getInstance(); }
//these constructors just set the unaligned string to save space
Sequence(string, string, string);
#include "mothur.h"
#include "database.hpp"
-
-class SuffixTree;
+#include "suffixtree.hpp"
+//class SuffixTree;
class SuffixDB : public Database {
public:
SuffixDB(int);
SuffixDB();
+ SuffixDB(const SuffixDB& sdb) : count(sdb.count), Database(sdb) {
+ for (int i = 0; i < sdb.suffixForest.size(); i++) {
+ SuffixTree temp(sdb.suffixForest[i]);
+ suffixForest.push_back(temp);
+ }
+ }
~SuffixDB();
void generateDB() {}; //adding sequences generates the db
public:
SuffixNode(int, int, int);
+ SuffixNode(const SuffixNode& sn) : parentNode(sn.parentNode), startCharPosition(sn.startCharPosition), endCharPosition(sn.endCharPosition) {m = MothurOut::getInstance();}
virtual ~SuffixNode() {}
virtual void print(string, int) = 0;
virtual void setChildren(char, int);
public:
SuffixBranch(int, int, int);
+ SuffixBranch(const SuffixBranch& sb) : suffixNode(sb.suffixNode), childNodes(sb.childNodes), SuffixNode(sb.parentNode, sb.startCharPosition, sb.endCharPosition) {}
~SuffixBranch() {}
void print(string, int); // need a special method for printing the node because there are children
void eraseChild(char); // need a special method for erasing the children
return (left->getParentNode() < right->getParentNode()); // nodes in order of their parent
}
+//********************************************************************************************************************
+
+SuffixTree::SuffixTree(const SuffixTree& st) : root(st.root), activeEndPosition(st.activeEndPosition), activeStartPosition(st.activeStartPosition), activeNode(st.activeNode),
+ nodeCounter(st.nodeCounter), seqName(st.seqName), sequence(st.sequence) {
+ try {
+ m = MothurOut::getInstance();
+
+ for (int i = 0; i < st.nodeVector.size(); i++) {
+ SuffixNode* temp = new SuffixBranch(*((SuffixBranch*)st.nodeVector[i]));
+ nodeVector.push_back(temp);
+ }
+
+
+ }catch(exception& e) {
+ m->errorOut(e, "SuffixTree", "SuffixTree");
+ exit(1);
+ }
+}
+
//********************************************************************************************************************
SuffixTree::SuffixTree(){ m = MothurOut::getInstance(); }
SuffixTree();
~SuffixTree();
// SuffixTree(string, string);
+ SuffixTree(const SuffixTree&);
void loadSequence(Sequence);
string getSeqName();
/************************************************************/
TreeMap::TreeMap(string filename) {
+ m = MothurOut::getInstance();
groupFileName = filename;
m->openInputFile(filename, fileHandle);
}
TreeMap::~TreeMap(){}
/************************************************************/
-void TreeMap::readMap() {
+int TreeMap::readMap() {
string seqName, seqGroup;
-
+ int error = 0;
+
while(fileHandle){
- fileHandle >> seqName; //read from first column
+ fileHandle >> seqName; //read from first column
fileHandle >> seqGroup; //read from second column
-
- namesOfSeqs.push_back(seqName);
+
+ if (m->control_pressed) { fileHandle.close(); return 1; }
+
setNamesOfGroups(seqGroup);
- treemap[seqName].groupname = seqGroup; //store data in map
-
- it2 = seqsPerGroup.find(seqGroup);
- if (it2 == seqsPerGroup.end()) { //if it's a new group
- seqsPerGroup[seqGroup] = 1;
- }else {//it's a group we already have
- seqsPerGroup[seqGroup]++;
+ map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
+ if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); }
+ else {
+ namesOfSeqs.push_back(seqName);
+ treemap[seqName].groupname = seqGroup; //store data in map
+
+ it2 = seqsPerGroup.find(seqGroup);
+ if (it2 == seqsPerGroup.end()) { //if it's a new group
+ seqsPerGroup[seqGroup] = 1;
+ }else {//it's a group we already have
+ seqsPerGroup[seqGroup]++;
+ }
}
-
+
m->gobble(fileHandle);
}
fileHandle.close();
+
+
+ return error;
}
/************************************************************/
void TreeMap::addSeq(string seqName, string seqGroup) {
TreeMap() { m = MothurOut::getInstance(); }
TreeMap(string);
~TreeMap();
- void readMap();
+ int readMap();
int getNumGroups();
int getNumSeqs();
void setIndex(string, int); //sequencename, index