From: westcott Date: Wed, 1 Jun 2011 14:36:06 +0000 (+0000) Subject: added count.seqs command and made some modifcations to the uchime code to allow it... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=75962c819c16814e5c2340d99af1aa62e564dc20 added count.seqs command and made some modifcations to the uchime code to allow it to build properly in cygwin --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 546a2f6..9699584 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -39,6 +39,7 @@ A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3686137DAB8300332B0C /* writechhit.cpp */; }; A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; }; A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; }; + A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7730EFE13967241007433A3 /* countseqscommand.cpp */; }; A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; }; A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; }; A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; }; @@ -401,6 +402,8 @@ A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = ""; }; A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = ""; }; A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = ""; }; + A7730EFD13967241007433A3 /* countseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countseqscommand.h; sourceTree = ""; }; + A7730EFE13967241007433A3 /* countseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = countseqscommand.cpp; sourceTree = ""; }; A778FE69134CA6CA00C0BA33 /* getcommandinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcommandinfocommand.h; sourceTree = ""; }; A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = ""; }; A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = ""; }; @@ -1221,6 +1224,8 @@ A7E9B6B712D37EC400DA6239 /* consensusseqscommand.cpp */, A7E9B6BA12D37EC400DA6239 /* corraxescommand.h */, A7E9B6B912D37EC400DA6239 /* corraxescommand.cpp */, + A7730EFD13967241007433A3 /* countseqscommand.h */, + A7730EFE13967241007433A3 /* countseqscommand.cpp */, A7E9B6C412D37EC400DA6239 /* deconvolutecommand.h */, A7E9B6C312D37EC400DA6239 /* deconvolutecommand.cpp */, A7E9B6C612D37EC400DA6239 /* degapseqscommand.h */, @@ -2101,6 +2106,7 @@ A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */, A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */, A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */, + A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/blastdb.cpp b/blastdb.cpp index c7003f4..48b09fb 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -35,9 +35,9 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) formatdbCommand = path + "blast/bin/formatdb"; // format the database, -o option gives us the ability #else - formatdbCommand = path + "blast\\bin\\formatdb"; + formatdbCommand = path + "blast\\bin\\formatdb.exe"; //wrap entire string in "" - formatdbCommand = "\"" + formatdbCommand + "\""; + //formatdbCommand = "\"" + formatdbCommand + "\""; #endif //test to make sure formatdb exists @@ -50,9 +50,9 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) blastCommand = path + "blast/bin/blastall"; // format the database, -o option gives us the ability #else - blastCommand = path + "blast\\bin\\blastall"; + blastCommand = path + "blast\\bin\\blastall.exe"; //wrap entire string in "" - blastCommand = "\"" + blastCommand + "\""; + //blastCommand = "\"" + blastCommand + "\""; #endif //test to make sure formatdb exists @@ -66,9 +66,9 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) megablastCommand = path + "blast/bin/megablast"; // format the database, -o option gives us the ability #else - megablastCommand = path + "blast\\bin\\megablast"; + megablastCommand = path + "blast\\bin\\megablast.exe"; //wrap entire string in "" - megablastCommand = "\"" + blastCommmegablastCommandand + "\""; + //megablastCommand = "\"" + megablastCommand + "\""; #endif //test to make sure formatdb exists @@ -105,9 +105,9 @@ BlastDB::BlastDB() : Database() { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) formatdbCommand = path + "blast/bin/formatdb"; // format the database, -o option gives us the ability #else - formatdbCommand = path + "blast\\bin\\formatdb"; + formatdbCommand = path + "blast\\bin\\formatdb.exe"; //wrap entire string in "" - formatdbCommand = "\"" + formatdbCommand + "\""; + //formatdbCommand = "\"" + formatdbCommand + "\""; #endif //test to make sure formatdb exists @@ -120,9 +120,9 @@ BlastDB::BlastDB() : Database() { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) blastCommand = path + "blast/bin/blastall"; // format the database, -o option gives us the ability #else - blastCommand = path + "blast\\bin\\blastall"; + blastCommand = path + "blast\\bin\\blastall.exe"; //wrap entire string in "" - blastCommand = "\"" + blastCommand + "\""; + //blastCommand = "\"" + blastCommand + "\""; #endif //test to make sure formatdb exists @@ -136,9 +136,9 @@ BlastDB::BlastDB() : Database() { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) megablastCommand = path + "blast/bin/megablast"; // format the database, -o option gives us the ability #else - megablastCommand = path + "blast\\bin\\megablast"; + megablastCommand = path + "blast\\bin\\megablast.exe"; //wrap entire string in "" - megablastCommand = "\"" + blastCommmegablastCommandand + "\""; + //megablastCommand = "\"" + megablastCommand + "\""; #endif //test to make sure formatdb exists diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index b00cedf..97b331b 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -1127,7 +1127,7 @@ vector ChimeraSlayer::getBlastSeqs(Sequence q, vector& db, delete queryRight; delete queryLeft; - if (refResults.size() == 0) { m->mothurOut("[WARNING]: megablast found 0 potential parents, so we are not able to check " + q.getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } + if (refResults.size() == 0) { m->mothurOut("[WARNING]: megablast returned 0 potential parents, so we are not able to check " + q.getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } return refResults; } diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index d4cb57a..33b48bd 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -744,7 +744,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename processIDS.clear(); int process = 1; int num = 0; - +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) //break up file into multiple files vector files; m->divideFile(filename, processors, files); @@ -852,7 +852,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename #endif //get rid of the file pieces. for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); } - +#endif return num; } catch(exception& e) { diff --git a/commandfactory.cpp b/commandfactory.cpp index 71558a9..620536d 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -119,6 +119,7 @@ #include "sharedcommand.h" #include "getcommandinfocommand.h" #include "deuniquetreecommand.h" +#include "countseqscommand.h" /*******************************************************/ @@ -241,6 +242,7 @@ CommandFactory::CommandFactory(){ commands["set.current"] = "set.current"; commands["get.commandinfo"] = "get.commandinfo"; commands["deunique.tree"] = "deunique.tree"; + commands["count.seqs"] = "count.seqs"; commands["pairwise.seqs"] = "MPIEnabled"; commands["pipeline.pds"] = "MPIEnabled"; commands["classify.seqs"] = "MPIEnabled"; @@ -412,6 +414,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "make.shared") { command = new SharedCommand(optionString); } else if(commandName == "get.commandinfo") { command = new GetCommandInfoCommand(optionString); } else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); } + else if(commandName == "count.seqs") { command = new CountSeqsCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -549,6 +552,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "make.shared") { pipecommand = new SharedCommand(optionString); } else if(commandName == "get.commandinfo") { pipecommand = new GetCommandInfoCommand(optionString); } else if(commandName == "deunique.tree") { pipecommand = new DeuniqueTreeCommand(optionString); } + else if(commandName == "count.seqs") { pipecommand = new CountSeqsCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -674,6 +678,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "make.shared") { shellcommand = new SharedCommand(); } else if(commandName == "get.commandinfo") { shellcommand = new GetCommandInfoCommand(); } else if(commandName == "deunique.tree") { shellcommand = new DeuniqueTreeCommand(); } + else if(commandName == "count.seqs") { shellcommand = new CountSeqsCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/countseqscommand.cpp b/countseqscommand.cpp index 8164e27..a446469 100644 --- a/countseqscommand.cpp +++ b/countseqscommand.cpp @@ -33,7 +33,7 @@ vector CountSeqsCommand::setParameters(){ string CountSeqsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The count.seqs command reads a name file and outputs a .count.summary file. You may also provide a group file to get the counts broken down by group.\n"; + helpString += "The count.seqs command reads a name file and outputs a .seq.count file. You may also provide a group file to get the counts broken down by group.\n"; helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n"; helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n"; helpString += "The count.seqs command should be in the following format: count.seqs(name=yourNameFile).\n"; @@ -146,7 +146,7 @@ int CountSeqsCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } ofstream out; - string outputFileName = outputDir + m->getRootName(m->getSimpleName(namefile)) + "count.summary"; + string outputFileName = outputDir + m->getRootName(m->getSimpleName(namefile)) + ".seq.count"; m->openOutputFile(outputFileName, out); outputTypes["summary"].push_back(outputFileName); out << "Representative Sequence\t total\t"; diff --git a/countseqscommand.h b/countseqscommand.h new file mode 100644 index 0000000..385cd13 --- /dev/null +++ b/countseqscommand.h @@ -0,0 +1,41 @@ +#ifndef COuNTSEQSCOMMAND_H +#define COuNTSEQSCOMMAND_H + +/* + * countseqscommand.h + * Mothur + * + * Created by westcott on 6/1/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" + +class CountSeqsCommand : public Command { + +public: + + CountSeqsCommand(string); + CountSeqsCommand(); + ~CountSeqsCommand(){} + + vector setParameters(); + string getCommandName() { return "count.seqs"; } + string getCommandCategory() { return "Sequence Processing"; } + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/Count.seqs"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + + +private: + string namefile, groupfile, outputDir, groups; + bool abort; + vector Groups; +}; + +#endif + + diff --git a/myutils.cpp b/myutils.cpp index e9d52c2..b9c0147 100755 --- a/myutils.cpp +++ b/myutils.cpp @@ -13,19 +13,19 @@ #include #include -#ifdef _MSC_VER -#include -#include -#include -#include -#include -#else +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) #include #include #include #include #include #include +#else +//#include +#include +#include +#include +#include #endif #include "myutils.h" @@ -141,7 +141,9 @@ bool myisatty(int fd) return isatty(fd) != 0; } -#ifdef _MSC_VER +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +#ifdef BIT_VERSION #include int fseeko(FILE *stream, off_t offset, int whence) { @@ -149,6 +151,14 @@ int fseeko(FILE *stream, off_t offset, int whence) return (FilePos == -1L) ? -1 : 0; } #define ftello(fm) (off_t) _ftelli64(fm) +#else +int fseeko(FILE *stream, off_t offset, int whence) +{ + off_t FilePos = fseek(stream, offset, whence); + return (FilePos == -1L) ? -1 : 0; +} +#define ftello(fm) (off_t) ftell(fm) +#endif #endif void LogStdioFileState(FILE *f) @@ -168,9 +178,16 @@ void LogStdioFileState(FILE *f) Log("fpos %ld (retval %d)\n", (long) fpos, fgetpos_retval); // Log("eof %d\n", _eof(fd)); #endif -#ifdef _MSC_VER +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +#ifdef BIT_VERSION __int64 pos64 = _ftelli64(f); Log("_ftelli64 %lld\n", pos64); +#else + __int32 pos32 = ftell(f); + Log("ftell %lld\n", pos32); + +#endif #endif } @@ -596,10 +613,11 @@ void Die(const char *Format, ...) fprintf(stderr, "\n---Fatal error---\n%s\n", szStr); Log("\n---Fatal error---\n%s\n", szStr); -#ifdef _MSC_VER - if (IsDebuggerPresent()) - __debugbreak(); - _CrtSetDbgFlag(0); +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else + //if (IsDebuggerPresent()) + // __debugbreak(); + //_CrtSetDbgFlag(0); #endif exit(1); @@ -624,20 +642,7 @@ void Warning(const char *Format, ...) } } -#ifdef _MSC_VER -double GetMemUseBytes() - { - HANDLE hProc = GetCurrentProcess(); - PROCESS_MEMORY_COUNTERS PMC; - BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC)); - if (!bOk) - return 1000000; - double Bytes = (double) PMC.WorkingSetSize; - if (Bytes > g_PeakMemUseBytes) - g_PeakMemUseBytes = Bytes; - return Bytes; - } -#elif linux || __linux__ +#if defined linux || __linux__ double GetMemUseBytes() { static char statm[64]; @@ -668,7 +673,7 @@ double GetMemUseBytes() g_PeakMemUseBytes = Bytes; return Bytes; } -#elif defined(__MACH__) +#elif defined(__APPLE__) || (__MACH__) #include #include #include @@ -709,9 +714,9 @@ double GetMemUseBytes() } #else double GetMemUseBytes() - { +{ return 0; - } +} #endif double GetPeakMemUseBytes() @@ -1201,7 +1206,8 @@ static void AddOpt(const OptInfo &Opt) g_Opts.insert(Opt); } -#ifdef _MSC_VER +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else #pragma warning(disable: 4505) // unreferenced local function #endif diff --git a/myutils.h b/myutils.h index 6122054..6374c8b 100644 --- a/myutils.h +++ b/myutils.h @@ -16,30 +16,32 @@ #include #include -#ifndef _MSC_VER +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else #include #endif using namespace std; -#ifdef _MSC_VER -#include +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +//#include #pragma warning(disable: 4996) // deprecated functions #define _CRT_SECURE_NO_DEPRECATE 1 #endif -#if defined(_DEBUG) && !defined(DEBUG) +//#if defined(_DEBUG) && !defined(DEBUG) #define DEBUG 1 -#endif +//#endif -#if defined(DEBUG) && !defined(_DEBUG) +//#if defined(DEBUG) && !defined(_DEBUG) #define _DEBUG 1 -#endif +//#endif -#ifndef NDEBUG +//#ifndef NDEBUG #define DEBUG 1 #define _DEBUG 1 -#endif +//#endif typedef unsigned char byte; typedef unsigned short uint16; @@ -49,10 +51,10 @@ typedef double float32; typedef signed char int8; typedef unsigned char uint8; -#ifdef _MSC_VER +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) -typedef __int64 int64; -typedef unsigned __int64 uint64; +typedef long long int64; +typedef unsigned long long uint64; #define INT64_PRINTF "lld" #define UINT64_PRINTF "llu" @@ -66,6 +68,7 @@ typedef unsigned __int64 uint64; #define SIZE_T_PRINTFX "x" #define OFF64_T_PRINTFX "llx" + #elif defined(__x86_64__) typedef long int64; @@ -85,8 +88,8 @@ typedef unsigned long uint64; #else -typedef long long int64; -typedef unsigned long long uint64; +typedef __int64 int64; +typedef unsigned __int64 uint64; #define INT64_PRINTF "lld" #define UINT64_PRINTF "llu" @@ -99,6 +102,7 @@ typedef unsigned long long uint64; #define SIZE_T_PRINTFX "x" #define OFF64_T_PRINTFX "llx" + #endif #define d64 INT64_PRINTF @@ -160,7 +164,8 @@ void myfree(void *p); bool myisatty(int fd); -#ifdef _MSC_VER +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else #define off_t __int64 #endif diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index 2774922..5efb1ce 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -347,6 +347,7 @@ int SeqErrorCommand::createProcesses(string filename, string qFileName, string r processIDS.clear(); map >::iterator it; int num = 0; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) //loop through and create all the processes you want while (process != processors) { @@ -569,7 +570,7 @@ int SeqErrorCommand::createProcesses(string filename, string qFileName, string r in.close(); remove(tempFile.c_str()); } - +#endif return num; } catch(exception& e) { diff --git a/systemcommand.cpp b/systemcommand.cpp index e5aeb3e..5ace75e 100644 --- a/systemcommand.cpp +++ b/systemcommand.cpp @@ -91,20 +91,23 @@ int SystemCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } - //system(command.c_str()); - FILE *lsofFile_p = popen(command.c_str(), "r"); + command += " > ./commandScreen.output 2>&1"; + system(command.c_str()); - if (!lsofFile_p) { return 0; } + ifstream in; + string filename = "./commandScreen.output"; + m->openInputFile(filename, in); - char buffer[1024]; - while ( fgets(buffer, 1024, lsofFile_p) != NULL ) { - string temp = buffer; - m->mothurOut(temp); + string output = ""; + while(char c = in.get()){ + if(in.eof()) { break; } + else { output += c; } } - m->mothurOutEndLine(); - - pclose(lsofFile_p); + in.close(); + m->mothurOut(output); m->mothurOutEndLine(); + remove(filename.c_str()); + return 0; } diff --git a/timing.h b/timing.h index 10bd1f7..4db7847 100644 --- a/timing.h +++ b/timing.h @@ -59,30 +59,29 @@ const unsigned AllocerCount = #undef A ; -#ifdef _MSC_VER +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +typedef uint64_t TICKS; +__inline__ uint64_t GetClockTicks() +{ + uint32_t lo, hi; + /* We cannot use "=A", since this would use %rax on x86_64 */ + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return (uint64_t)hi << 32 | lo; +} + +#else // ifdef _MSC_VER typedef unsigned __int64 TICKS; #pragma warning(disable:4035) inline TICKS GetClockTicks() - { +{ _asm - { + { _emit 0x0f _emit 0x31 - } - } - -#else // ifdef _MSC_VER - -typedef uint64_t TICKS; -__inline__ uint64_t GetClockTicks() - { - uint32_t lo, hi; - /* We cannot use "=A", since this would use %rax on x86_64 */ - __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); - return (uint64_t)hi << 32 | lo; } +} #endif // ifdef _MSC_VER