--- /dev/null
+//#if UCHIMES\r
+\r
+#include "myutils.h"\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+#include "alpha.h"\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
+\r
+static byte *g_QueryHasWord;\r
+static unsigned g_WordCount;\r
+\r
+unsigned GetWord(const byte *Seq)\r
+ {\r
+ unsigned Word = 0;\r
+ const byte *Front = Seq;\r
+ for (unsigned i = 0; i < opt_w; ++i)\r
+ {\r
+ unsigned Letter = g_CharToLetterNucleo[*Front++];\r
+ Word = (Word*4) + Letter;\r
+ }\r
+ return Word;\r
+ }\r
+\r
+static void SetQuery(const SeqData &Query)\r
+ {\r
+ if (g_QueryHasWord == 0)\r
+ {\r
+ g_WordCount = 4;\r
+ for (unsigned i = 1; i < opt_w; ++i)\r
+ g_WordCount *= 4;\r
+\r
+ g_QueryHasWord = myalloc(byte, g_WordCount);\r
+ }\r
+\r
+ memset(g_QueryHasWord, 0, g_WordCount);\r
+\r
+ if (Query.L <= opt_w)\r
+ return;\r
+\r
+ const unsigned L = Query.L - opt_w + 1;\r
+ const byte *Seq = Query.Seq;\r
+ for (unsigned i = 0; i < L; ++i)\r
+ {\r
+ unsigned Word = GetWord(Seq++);\r
+ g_QueryHasWord[Word] = 1;\r
+ }\r
+ }\r
+\r
+static unsigned GetUniqueWordsInCommon(const SeqData &Target)\r
+ {\r
+ if (Target.L <= opt_w)\r
+ return 0;\r
+\r
+ unsigned Count = 0;\r
+ const unsigned L = Target.L - opt_w + 1;\r
+ const byte *Seq = Target.Seq;\r
+ for (unsigned i = 0; i < L; ++i)\r
+ {\r
+ unsigned Word = GetWord(Seq++);\r
+ if (g_QueryHasWord[Word])\r
+ ++Count;\r
+ }\r
+ return Count;\r
+ }\r
+\r
+void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts, \r
+ vector<unsigned> &Order)\r
+ {\r
+ WordCounts.clear();\r
+ Order.clear();\r
+\r
+ SetQuery(Query);\r
+\r
+ const unsigned SeqCount = DB.GetSeqCount();\r
+ for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
+ {\r
+ SeqData Target;\r
+ DB.GetSeqData(SeqIndex, Target);\r
+ float WordCount = (float) GetUniqueWordsInCommon(Target);\r
+ WordCounts.push_back(WordCount);\r
+ }\r
+ SortDescending(WordCounts, Order);\r
+ }\r
+\r
+//#endif // UCHIMES\r