]> git.donarmstrong.com Git - mothur.git/blobdiff - uchime_src/usort.cpp
added uchime_src folder. added biom parameter to make.shared. added biom as a current...
[mothur.git] / uchime_src / usort.cpp
diff --git a/uchime_src/usort.cpp b/uchime_src/usort.cpp
new file mode 100644 (file)
index 0000000..922dcb4
--- /dev/null
@@ -0,0 +1,86 @@
+#if    UCHIMES\r
+\r
+#include "myutils.h"\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+#include "alpha.h"\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
+\r
+static byte *g_QueryHasWord;\r
+static unsigned g_WordCount;\r
+\r
+unsigned GetWord(const byte *Seq)\r
+       {\r
+       unsigned Word = 0;\r
+       const byte *Front = Seq;\r
+       for (unsigned i = 0; i < opt_w; ++i)\r
+               {\r
+               unsigned Letter = g_CharToLetterNucleo[*Front++];\r
+               Word = (Word*4) + Letter;\r
+               }\r
+       return Word;\r
+       }\r
+\r
+static void SetQuery(const SeqData &Query)\r
+       {\r
+       if (g_QueryHasWord == 0)\r
+               {\r
+               g_WordCount = 4;\r
+               for (unsigned i = 1; i < opt_w; ++i)\r
+                       g_WordCount *= 4;\r
+\r
+               g_QueryHasWord = myalloc(byte, g_WordCount);\r
+               }\r
+\r
+       memset(g_QueryHasWord, 0, g_WordCount);\r
+\r
+       if (Query.L <= opt_w)\r
+               return;\r
+\r
+       const unsigned L = Query.L - opt_w + 1;\r
+       const byte *Seq = Query.Seq;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               {\r
+               unsigned Word = GetWord(Seq++);\r
+               g_QueryHasWord[Word] = 1;\r
+               }\r
+       }\r
+\r
+static unsigned GetUniqueWordsInCommon(const SeqData &Target)\r
+       {\r
+       if (Target.L <= opt_w)\r
+               return 0;\r
+\r
+       unsigned Count = 0;\r
+       const unsigned L = Target.L - opt_w + 1;\r
+       const byte *Seq = Target.Seq;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               {\r
+               unsigned Word = GetWord(Seq++);\r
+               if (g_QueryHasWord[Word])\r
+                       ++Count;\r
+               }\r
+       return Count;\r
+       }\r
+\r
+void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts, \r
+  vector<unsigned> &Order)\r
+       {\r
+       WordCounts.clear();\r
+       Order.clear();\r
+\r
+       SetQuery(Query);\r
+\r
+       const unsigned SeqCount = DB.GetSeqCount();\r
+       for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
+               {\r
+               SeqData Target;\r
+               DB.GetSeqData(SeqIndex, Target);\r
+               float WordCount = (float) GetUniqueWordsInCommon(Target);\r
+               WordCounts.push_back(WordCount);\r
+               }\r
+       SortDescending(WordCounts, Order);\r
+       }\r
+\r
+#endif // UCHIMES\r