1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.
\r
9 typedef void (*ON_START_XSEQ)(const SeqData &SD);
\r
10 typedef void (*ON_END_XSEQ)(const SeqData &SD);
\r
12 // Sequential reader for FASTA file format.
\r
13 // Serves sequences in file order to save memory.
\r
14 // Caches biggish chunks to compromise memory vs. speed.
\r
24 // Position to start next read
\r
30 // Bytes allocated to m_Buffer
\r
31 unsigned m_BufferSize;
\r
33 // Current position in buffer, normally points to '>'
\r
34 unsigned m_BufferOffset;
\r
36 // File data in buffer <= m_BufferSize
\r
37 unsigned m_BufferBytes;
\r
40 // Points into m_Buffer, not a separate buffer.
\r
43 // Current sequence length
\r
44 unsigned m_SeqLength;
\r
46 // Current seq index
\r
47 unsigned m_SeqIndex;
\r
49 unsigned m_ShortestLength;
\r
50 unsigned m_LongestLength;
\r
51 unsigned m_TooShortCount;
\r
52 unsigned m_TooLongCount;
\r
53 unsigned m_TooPolyCount;
\r
64 void Open(const string &FileName);
\r
67 bool GetIsNucleo() const { asserta(m_IsNucleoSet); return m_IsNucleo; };
\r
69 // Get next sequence.
\r
70 // Returns zero on end-of-file
\r
71 const byte *GetNextSeq();
\r
73 // Get next sequence as SeqData object, return false on end-of-file.
\r
74 bool GetNextSD(SeqData &SD);
\r
76 // Length of most recent sequence returned by GetNextSeq().
\r
77 unsigned GetSeqLength() const { return m_SeqLength; }
\r
79 // Label of most recent sequence returned by GetNextSeq().
\r
80 const char *GetLabel() const { return m_Label; }
\r
82 // Index of most recent sequence returned by GetNextSeq().
\r
83 unsigned GetSeqIndex() const { return m_SeqIndex; }
\r
85 unsigned GetPctDoneX10() const;
\r
86 double GetPctDone() const;
\r
92 const byte *GetNextSeqLo();
\r