1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.
\r
9 typedef uint32 word_t;
\r
10 typedef uint16 wordcount_t;
\r
11 typedef uint32 arrsize_t;
\r
12 typedef uint16 seqcountperword_t;
\r
13 typedef uint32 seqindex_t;
\r
14 typedef uint16 commonwordcount_t;
\r
16 const uint32 WindexFileHdr_Magic1 = 0x312DE41;
\r
17 const uint32 WindexFileHdr_Magic2 = 0x312DE42;
\r
18 const uint32 WindexFileHdr_Magic3 = 0x312DE43;
\r
19 const uint32 WindexFileHdr_Magic4 = 0x312DE44;
\r
21 struct WindexFileHdr
\r
34 unsigned m_WordLength;
\r
35 unsigned m_AlphaSize;
\r
36 unsigned m_WordCount;
\r
38 unsigned m_CapacityInc;
\r
39 arrsize_t *m_Capacities;
\r
41 float *m_WordScores;
\r
42 seqindex_t **m_SeedIndexes;
\r
43 byte *m_UniqueCounts;
\r
44 unsigned m_CharToLetter[256];
\r
48 void ToFile(const string &FileName) const;
\r
49 void FromFile(const string &FileName);
\r
50 void FromSFasta(SFasta &SF);
\r
51 void FromSeqDB(const SeqDB &DB);
\r
52 void Clear(bool ctor = false);
\r
53 void AddWords(unsigned SeqIndex, const word_t *Words, unsigned N);
\r
54 void Init(bool Nucleo, unsigned WordLength);
\r
55 void Init2(bool Nucleo, unsigned TableSize);
\r
56 void InitRed(unsigned WordLength);
\r
57 void InitWordScores(const float *const *SubstMx);
\r
60 unsigned LogMemSize() const;
\r
61 void LogWordStats(unsigned TopWords = 10) const;
\r
62 const char *WordToStr(word_t Word) const;
\r
63 word_t SeqToWord(const byte *Seq) const;
\r
64 unsigned SeqToWords(const byte *Seq, unsigned L, word_t *Words) const;
\r
65 unsigned SeqToWordsStep(unsigned Step, const byte *Seq, unsigned L, word_t *Words) const;
\r
66 unsigned WordsToCounts(const word_t *Words, unsigned N,
\r
67 word_t *UniqueWords, seqcountperword_t *Counts) const;
\r
68 unsigned GetUniqueWords(const word_t *Words, unsigned N,
\r
69 word_t *UniqueWords) const;
\r
70 void LogSizeHisto() const;
\r