2 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.
\r
8 void SortDescending(const vector<float> &Values, vector<unsigned> &Order);
\r
10 static byte *g_QueryHasWord;
\r
11 static unsigned g_WordCount;
\r
13 unsigned GetWord(const byte *Seq)
\r
16 const byte *Front = Seq;
\r
17 for (unsigned i = 0; i < opt_w; ++i)
\r
19 unsigned Letter = g_CharToLetterNucleo[*Front++];
\r
20 Word = (Word*4) + Letter;
\r
25 static void SetQuery(const SeqData &Query)
\r
27 if (g_QueryHasWord == 0)
\r
30 for (unsigned i = 1; i < opt_w; ++i)
\r
33 g_QueryHasWord = myalloc(byte, g_WordCount);
\r
36 memset(g_QueryHasWord, 0, g_WordCount);
\r
38 if (Query.L <= opt_w)
\r
41 const unsigned L = Query.L - opt_w + 1;
\r
42 const byte *Seq = Query.Seq;
\r
43 for (unsigned i = 0; i < L; ++i)
\r
45 unsigned Word = GetWord(Seq++);
\r
46 g_QueryHasWord[Word] = 1;
\r
50 static unsigned GetUniqueWordsInCommon(const SeqData &Target)
\r
52 if (Target.L <= opt_w)
\r
56 const unsigned L = Target.L - opt_w + 1;
\r
57 const byte *Seq = Target.Seq;
\r
58 for (unsigned i = 0; i < L; ++i)
\r
60 unsigned Word = GetWord(Seq++);
\r
61 if (g_QueryHasWord[Word])
\r
67 void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts,
\r
68 vector<unsigned> &Order)
\r
75 const unsigned SeqCount = DB.GetSeqCount();
\r
76 for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)
\r
79 DB.GetSeqData(SeqIndex, Target);
\r
80 float WordCount = (float) GetUniqueWordsInCommon(Target);
\r
81 WordCounts.push_back(WordCount);
\r
83 SortDescending(WordCounts, Order);
\r