]> git.donarmstrong.com Git - mothur.git/blobdiff - uchime_src/mx.h
added uchime_src folder. added biom parameter to make.shared. added biom as a current...
[mothur.git] / uchime_src / mx.h
diff --git a/uchime_src/mx.h b/uchime_src/mx.h
new file mode 100644 (file)
index 0000000..1438900
--- /dev/null
@@ -0,0 +1,454 @@
+#ifndef mx_h\r
+#define mx_h\r
+\r
+#include <list>\r
+#include <limits.h>\r
+#include <math.h>\r
+#include "timing.h"\r
+#include "myutils.h"\r
+\r
+const int OPT_LOG = 0x01;\r
+const int OPT_EXP = 0x02;\r
+const int OPT_ZERO_BASED = 0x04;\r
+const float MINUS_INFINITY = -9e9f;\r
+const float UNINIT = -8e8f;\r
+\r
+struct SeqData;\r
+\r
+template<class T> const char *TypeToStr(T t)\r
+       {\r
+       Die("Unspecialised TypeToStr() called");\r
+       ureturn(0);\r
+       }\r
+\r
+template<> inline const char *TypeToStr<unsigned short>(unsigned short f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%12u", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<short>(short f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%12d", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<int>(int f)\r
+       {\r
+       static char s[16];\r
+\r
+       sprintf(s, "%5d", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<float>(float f)\r
+       {\r
+       static char s[16];\r
+\r
+       if (f == UNINIT)\r
+               sprintf(s, "%12.12s", "?");\r
+       else if (f < MINUS_INFINITY/2)\r
+               sprintf(s, "%12.12s", "*");\r
+       else if (f == 0.0f)\r
+               sprintf(s, "%12.12s", ".");\r
+       else if (f >= -1e5 && f <= 1e5)\r
+               sprintf(s, "%12.5f", f);\r
+       else\r
+               sprintf(s, "%12.4g", f);\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<double>(double f)\r
+       {\r
+       static char s[16];\r
+\r
+       if (f < -1e9)\r
+               sprintf(s, "%12.12s", "*");\r
+       else if (f == 0.0f)\r
+               sprintf(s, "%12.12s", ".");\r
+       else if (f >= -1e-5 && f <= 1e5)\r
+               sprintf(s, "%12.5f", f);\r
+       else\r
+               sprintf(s, "%12.4g", f);\r
+       return s;\r
+       }\r
+\r
+static inline const char *FloatToStr(float f, string &s)\r
+       {\r
+       s = TypeToStr<float>(f);\r
+       return s.c_str();\r
+       }\r
+\r
+template<> inline const char *TypeToStr<char>(char c)\r
+       {\r
+       static char s[2];\r
+       s[0] = c;\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<byte>(byte c)\r
+       {\r
+       static char s[2];\r
+       s[0] = c;\r
+       return s;\r
+       }\r
+\r
+template<> inline const char *TypeToStr<bool>(bool tof)\r
+       {\r
+       static char s[2];\r
+       s[0] = tof ? 'T' : 'F';\r
+       return s;\r
+       }\r
+\r
+struct SeqDB;\r
+\r
+struct MxBase\r
+       {\r
+private:\r
+       MxBase(const MxBase &rhs);\r
+       MxBase &operator=(const MxBase &rhs);\r
+\r
+public:\r
+       char m_Name[32];\r
+       char m_Alpha[32];\r
+       char m_Alpha2[32];\r
+       unsigned m_RowCount;\r
+       unsigned m_ColCount;\r
+       unsigned m_AllocatedRowCount;\r
+       unsigned m_AllocatedColCount;\r
+       const SeqDB *m_SeqDB;\r
+       unsigned m_IdA;\r
+       unsigned m_IdB;\r
+       const SeqData *m_SA;\r
+       const SeqData *m_SB;\r
+\r
+       static list<MxBase *> *m_Matrices;\r
+       //static MxBase *Get(const string &Name);\r
+       //static float **Getf(const string &Name);\r
+       //static double **Getd(const string &Name);\r
+       //static char **Getc(const string &Name);\r
+\r
+       static unsigned m_AllocCount;\r
+       static unsigned m_ZeroAllocCount;\r
+       static unsigned m_GrowAllocCount;\r
+       static double m_TotalBytes;\r
+       static double m_MaxBytes;\r
+\r
+       static void OnCtor(MxBase *Mx);\r
+       static void OnDtor(MxBase *Mx);\r
+\r
+       MxBase()\r
+               {\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_IdA = UINT_MAX;\r
+               m_IdB = UINT_MAX;\r
+               m_SeqDB = 0;\r
+               OnCtor(this);\r
+               }\r
+       virtual ~MxBase()\r
+               {\r
+               OnDtor(this);\r
+               }\r
+\r
+       virtual unsigned GetTypeSize() const = 0;\r
+       virtual unsigned GetBytes() const = 0;\r
+\r
+       void Clear()\r
+               {\r
+               FreeData();\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_IdA = UINT_MAX;\r
+               m_IdB = UINT_MAX;\r
+               m_SA = 0;\r
+               m_SB = 0;\r
+               }\r
+\r
+       bool Empty() const\r
+               {\r
+               return m_RowCount == 0;\r
+               }\r
+\r
+       virtual void AllocData(unsigned RowCount, unsigned ColCount) = 0;\r
+       virtual void FreeData() = 0;\r
+       virtual const char *GetAsStr(unsigned i, unsigned j) const = 0;\r
+\r
+       void SetAlpha(const char *Alpha)\r
+               {\r
+               unsigned n = sizeof(m_Alpha);\r
+               strncpy(m_Alpha, Alpha, n);\r
+               m_Alpha[n] = 0;\r
+               }\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqDB *DB, unsigned IdA, unsigned IdB,\r
+         const SeqData *SA, const SeqData *SB);\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqDB *DB = 0, unsigned IdA = UINT_MAX, unsigned IdB = UINT_MAX);\r
+\r
+       void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+         const SeqData *SA, const SeqData *SB);\r
+\r
+       static void LogAll()\r
+               {\r
+               Log("\n");\r
+               if (m_Matrices == 0)\r
+                       {\r
+                       Log("MxBase::m_Matrices=0\n");\r
+                       return;\r
+                       }\r
+               Log("\n");\r
+               Log("AllRows  AllCols    Sz        MB  Name\n");\r
+               Log("-------  -------  ----  --------  ----\n");\r
+               double TotalMB = 0;\r
+               for (list<MxBase *>::const_iterator p = m_Matrices->begin();\r
+                 p != m_Matrices->end(); ++p)\r
+                       {\r
+                       const MxBase *Mx = *p;\r
+                       if (Mx == 0)\r
+                               continue;\r
+                       //if (Mx->m_RowCount != 0 || ShowEmpty)\r
+                       //      Mx->LogMe(WithData);\r
+                       unsigned ar = Mx->m_AllocatedRowCount;\r
+                       if (ar == 0)\r
+                               continue;\r
+                       unsigned ac = Mx->m_AllocatedColCount;\r
+                       unsigned sz = Mx->GetTypeSize();\r
+                       double MB = (double) ar*(double) ac*(double) sz/1e6;\r
+                       TotalMB += MB;\r
+                       Log("%7u  %7u  %4u  %8.2f  %s\n", ar, ac, sz, MB, Mx->m_Name);\r
+                       }\r
+               Log("                        --------\n");\r
+               Log("%7.7s  %7.7s  %4.4s  %8.2f\n", "", "", "", TotalMB);\r
+               }\r
+\r
+       void LogMe(bool WithData = true, int Opts = 0) const;\r
+       static void LogCounts();\r
+       };\r
+\r
+template<class T> struct Mx : public MxBase\r
+       {\r
+// Disable unimplemented stuff\r
+private:\r
+       Mx(Mx &rhs);\r
+       Mx &operator=(Mx &rhs);\r
+       // const Mx &operator=(const Mx &rhs) const;\r
+\r
+public:\r
+       T **m_Data;\r
+\r
+       Mx()\r
+               {\r
+               m_Data = 0;\r
+               }\r
+       \r
+       ~Mx()\r
+               {\r
+               FreeData();\r
+               }\r
+\r
+       virtual void AllocData(unsigned RowCount, unsigned ColCount)\r
+               {\r
+               if (opt_logmemgrows)\r
+                       Log("MxBase::AllocData(%u,%u) %s bytes, Name=%s\n",\r
+                         RowCount, ColCount, IntToStr(GetBytes()), m_Name);\r
+               // m_Data = myalloc<T *>(RowCount);\r
+               m_Data = MYALLOC(T *, RowCount, Mx);\r
+               for (unsigned i = 0; i < RowCount; ++i)\r
+                       // m_Data[i] = myalloc<T>(ColCount);\r
+                       m_Data[i] = MYALLOC(T, ColCount, Mx);\r
+               AddBytes("Mx_AllocData", RowCount*sizeof(T *) + RowCount*ColCount*sizeof(T));\r
+\r
+               m_AllocatedRowCount = RowCount;\r
+               m_AllocatedColCount = ColCount;\r
+               }\r
+\r
+       virtual void FreeData()\r
+               {\r
+               for (unsigned i = 0; i < m_AllocatedRowCount; ++i)\r
+                       MYFREE(m_Data[i], m_AllocatedColCount, Mx);\r
+               MYFREE(m_Data, m_AllocatedRowCount, Mx);\r
+               SubBytes("Mx_AllocData",\r
+                 m_AllocatedRowCount*sizeof(T *) + m_AllocatedRowCount*m_AllocatedColCount*sizeof(T));\r
+\r
+               m_Data = 0;\r
+               m_RowCount = 0;\r
+               m_ColCount = 0;\r
+               m_AllocatedRowCount = 0;\r
+               m_AllocatedColCount = 0;\r
+               }\r
+\r
+       T **GetData()\r
+               {\r
+               return (T **) m_Data;\r
+               }\r
+\r
+       T Get(unsigned i, unsigned j) const\r
+               {\r
+               assert(i < m_RowCount);\r
+               assert(j < m_ColCount);\r
+               return m_Data[i][j];\r
+               }\r
+\r
+       void Put(unsigned i, unsigned j, T x) const\r
+               {\r
+               assert(i < m_RowCount);\r
+               assert(j < m_ColCount);\r
+               m_Data[i][j] = x;\r
+               }\r
+\r
+       T GetOffDiagAvgs(vector<T> &Avgs) const\r
+               {\r
+               if (m_RowCount != m_ColCount)\r
+                       Die("GetOffDiagAvgs, not symmetrical");\r
+               Avgs.clear();\r
+               T Total = T(0);\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       {\r
+                       T Sum = T(0);\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               {\r
+                               if (j == i)\r
+                                       continue;\r
+                               Sum += m_Data[i][j];\r
+                               }\r
+                       T Avg = Sum/(m_RowCount-1);\r
+                       Total += Avg;\r
+                       Avgs.push_back(Avg);\r
+                       }\r
+               return m_RowCount == 0 ? T(0) : Total/m_RowCount;\r
+               }\r
+\r
+       unsigned GetTypeSize() const\r
+               {\r
+               return sizeof(T);\r
+               }\r
+\r
+       virtual unsigned GetBytes() const\r
+               {\r
+               return m_AllocatedRowCount*m_AllocatedColCount*GetTypeSize() +\r
+                 m_AllocatedRowCount*sizeof(T *);\r
+               }\r
+\r
+       const char *GetAsStr(unsigned i, unsigned j) const\r
+               {\r
+               return TypeToStr<T>(Get(i, j));\r
+               }\r
+\r
+       const T *const *const GetData() const\r
+               {\r
+               return (const T *const *) m_Data;\r
+               }\r
+\r
+       void Copy(const Mx<T> &rhs)\r
+               {\r
+               Alloc("Copy", rhs.m_RowCount, rhs.m_ColCount, rhs.m_SeqDB, rhs.m_IdA, rhs.m_IdB);\r
+               const T * const *Data = rhs.GetData();\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = Data[i][j];\r
+               }\r
+\r
+       void Assign(T v)\r
+               {\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = v;\r
+               }\r
+\r
+       bool Eq(const Mx &rhs, bool Bwd = false) const\r
+               {\r
+               if (rhs.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (rhs.m_RowCount != m_RowCount)\r
+                       return false;\r
+               const T * const*d = rhs.GetData();\r
+               int i1 = Bwd ? m_RowCount : 0;\r
+               int j1 = Bwd ? m_ColCount : 0;\r
+               int i2 = Bwd ? -1 : m_RowCount;\r
+               int j2 = Bwd ? -1 : m_ColCount;\r
+               for (int i = i1; i != i2; Bwd ? --i : ++i)\r
+                       for (int j = j1; j != j2; Bwd ? --j : ++j)\r
+                               {\r
+                               float x = m_Data[i][j];\r
+                               float y = d[i][j];\r
+                               if (x < -1e10 && y < -1e10)\r
+                                       continue;\r
+                               if (!feq(x, y))\r
+                                       {\r
+                                       Warning("%s[%d][%d] = %g, %s = %g",\r
+                                         m_Name, i, j, x, rhs.m_Name, y);\r
+                                       return false;\r
+                                       }\r
+                               }\r
+               return true;\r
+               }\r
+\r
+       bool EqMask(const Mx &rhs, const Mx<bool> &Mask) const\r
+               {\r
+               if (rhs.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (rhs.m_RowCount != m_RowCount)\r
+                       return false;\r
+\r
+               if (Mask.m_ColCount != m_ColCount)\r
+                       return false;\r
+               if (Mask.m_RowCount != m_RowCount)\r
+                       return false;\r
+\r
+               const T * const*d = rhs.GetData();\r
+               bool Bwd = false;\r
+               int i1 = Bwd ? m_RowCount : 0;\r
+               int j1 = Bwd ? m_ColCount : 0;\r
+               int i2 = Bwd ? -1 : m_RowCount;\r
+               int j2 = Bwd ? -1 : m_ColCount;\r
+               for (int i = i1; i != i2; Bwd ? --i : ++i)\r
+                       for (int j = j1; j != j2; Bwd ? --j : ++j)\r
+                               {\r
+                               if (!Mask.m_Data[i][j])\r
+                                       continue;\r
+                               float x = m_Data[i][j];\r
+                               float y = d[i][j];\r
+                               if (x < -1e10 && y < -1e10)\r
+                                       continue;\r
+                               if (!feq(x, y))\r
+                                       {\r
+                                       Warning("%s[%d][%d] = %g, %s = %g",\r
+                                         m_Name, i, j, x, rhs.m_Name, y);\r
+                                       return false;\r
+                                       }\r
+                               }\r
+               return true;\r
+               }\r
+\r
+       void Init(T v)\r
+               {\r
+               for (unsigned i = 0; i < m_RowCount; ++i)\r
+                       for (unsigned j = 0; j < m_ColCount; ++j)\r
+                               m_Data[i][j] = v;\r
+               }\r
+       };\r
+\r
+void WriteMx(const string &Name, Mx<float> &Mxf);\r
+\r
+template<class T> void ReserveMx(Mx<T> &Mxf, unsigned N = UINT_MAX)\r
+       {\r
+       if (Mxf.m_AllocatedRowCount > 0)\r
+               return;\r
+       extern unsigned g_MaxInputSeqLength;\r
+       if (N == UINT_MAX)\r
+               N = g_MaxInputSeqLength+1;\r
+       Mxf.Alloc("(Reserved)", N, N);\r
+       }\r
+\r
+#endif // mx_h\r