]> git.donarmstrong.com Git - mothur.git/blobdiff - uchime_src/writechhit.cpp
added uchime_src folder. added biom parameter to make.shared. added biom as a current...
[mothur.git] / uchime_src / writechhit.cpp
diff --git a/uchime_src/writechhit.cpp b/uchime_src/writechhit.cpp
new file mode 100644 (file)
index 0000000..ea67061
--- /dev/null
@@ -0,0 +1,329 @@
+#include "myutils.h"\r
+#include "chime.h"\r
+\r
+void WriteChimeFileHdr(FILE *f)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       fprintf(f,\r
+               "\tQuery"               // 1\r
+               "\tA"                   // 2\r
+               "\tB"                   // 3\r
+               "\tIdQM"                // 4\r
+               "\tIdQA"                // 5\r
+               "\tIdQB"                // 6\r
+               "\tIdAB"                // 7\r
+               "\tIdQT"                // 8\r
+               "\tLY"                  // 9\r
+               "\tLN"                  // 10\r
+               "\tLA"                  // 11\r
+               "\tRY"                  // 12\r
+               "\tRN"                  // 13\r
+               "\tRA"                  // 14\r
+               "\tDiv"                 // 15\r
+               "\tY"                   // 16\r
+               "\n"\r
+               );\r
+       }\r
+\r
+void WriteChimeHit(FILE *f, const ChimeHit2 &Hit)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       if (Hit.Div <= 0.0)\r
+               {\r
+               fprintf(f, "0.0000");           // 0\r
+\r
+               fprintf(f,\r
+                 "\t%s", Hit.QLabel.c_str());  // 1\r
+\r
+               fprintf(f,\r
+                 "\t*"                                         // 2\r
+                 "\t*"                                         // 3\r
+                 "\t*"                                         // 4\r
+                 "\t*"                                         // 5\r
+                 "\t*"                                         // 6\r
+                 "\t*"                                         // 7\r
+                 "\t*"                                         // 8\r
+                 "\t*"                                         // 9\r
+                 "\t*"                                         // 10\r
+                 "\t*"                                         // 11\r
+                 "\t*"                                         // 12\r
+                 "\t*"                                         // 13\r
+                 "\t*"                                         // 14\r
+                 "\t*"                                         // 15\r
+                 "\tN"                                         // 16\r
+                 "\n"\r
+                 );\r
+               return;\r
+               }\r
+\r
+       fprintf(f, "%.4f", Hit.Score);          // 0\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.QLabel.c_str(), f);           // 1\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.ALabel.c_str(), f);           // 2\r
+\r
+       fputc('\t', f);\r
+       fputs(Hit.BLabel.c_str(), f);           // 3\r
+\r
+       fprintf(f, "\t%.1f", Hit.PctIdQM);      // 4\r
+       fprintf(f, "\t%.1f", Hit.PctIdQA);      // 5\r
+       fprintf(f, "\t%.1f", Hit.PctIdQB);      // 6\r
+       fprintf(f, "\t%.1f", Hit.PctIdAB);      // 7\r
+       fprintf(f, "\t%.1f", Hit.PctIdQT);      // 8\r
+\r
+       fprintf(f, "\t%u", Hit.CS_LY);          // 9\r
+       fprintf(f, "\t%u", Hit.CS_LN);          // 10\r
+       fprintf(f, "\t%u", Hit.CS_LA);          // 11\r
+\r
+       fprintf(f, "\t%u", Hit.CS_RY);          // 12\r
+       fprintf(f, "\t%u", Hit.CS_RN);          // 13\r
+       fprintf(f, "\t%u", Hit.CS_RA);          // 14\r
+\r
+       fprintf(f, "\t%.2f", Hit.Div);          // 15\r
+\r
+       fprintf(f, "\t%c", yon(Hit.Accept())); // 16\r
+       fputc('\n', f);\r
+       }\r
+\r
+unsigned GetUngappedLength(const byte *Seq, unsigned L)\r
+       {\r
+       unsigned UL = 0;\r
+       for (unsigned i = 0; i < L; ++i)\r
+               if (!isgap(Seq[i]))\r
+                       ++UL;\r
+       return UL;\r
+       }\r
+\r
+void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit)\r
+       {\r
+       if (f == 0)\r
+               return;\r
+\r
+       if (Hit.Div <= 0.0)\r
+               return;\r
+\r
+       const string &Q3 = Hit.Q3;\r
+       const string &A3 = Hit.A3;\r
+       const string &B3 = Hit.B3;\r
+\r
+       const byte *Q3Seq = (const byte *) Q3.c_str();\r
+       const byte *A3Seq = (const byte *) A3.c_str();\r
+       const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+// Aligned\r
+       unsigned ColCount = SIZE(Q3);\r
+       asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+       unsigned LQ = GetUngappedLength(Q3Seq, ColCount);\r
+       unsigned LA = GetUngappedLength(A3Seq, ColCount);\r
+       unsigned LB = GetUngappedLength(B3Seq, ColCount);\r
+\r
+       fprintf(f, "\n");\r
+       fprintf(f, "------------------------------------------------------------------------\n");\r
+       fprintf(f, "Query   (%5u nt) %s\n", LQ, Hit.QLabel.c_str());\r
+       fprintf(f, "ParentA (%5u nt) %s\n", LA, Hit.ALabel.c_str());\r
+       fprintf(f, "ParentB (%5u nt) %s\n", LB, Hit.BLabel.c_str());\r
+\r
+// Strip terminal gaps in query\r
+       unsigned FromCol = UINT_MAX;\r
+       unsigned ToCol = UINT_MAX;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               if (!isgap(Q3Seq[Col]))\r
+                       {\r
+                       if (FromCol == UINT_MAX)\r
+                               FromCol = Col;\r
+                       ToCol = Col;\r
+                       }\r
+               }\r
+\r
+       unsigned QPos = 0;\r
+       unsigned APos = 0;\r
+       unsigned BPos = 0;\r
+       for (unsigned Col = 0; Col < FromCol; ++Col)\r
+               {\r
+               if (!isgap(A3Seq[Col]))\r
+                       ++APos;\r
+               if (!isgap(B3Seq[Col]))\r
+                       ++BPos;\r
+               }\r
+\r
+       unsigned Range = ToCol - FromCol + 1;\r
+       unsigned RowCount = (Range + 79)/80;\r
+       unsigned RowFromCol = FromCol;\r
+       for (unsigned RowIndex = 0; RowIndex < RowCount; ++RowIndex)\r
+               {\r
+               fprintf(f, "\n");\r
+               unsigned RowToCol = RowFromCol + 79;\r
+               if (RowToCol > ToCol)\r
+                       RowToCol = ToCol;\r
+\r
+       // A row\r
+               fprintf(f, "A %5u ", APos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       if (a != q)\r
+                               a = tolower(a);\r
+                       fprintf(f, "%c", a);\r
+                       if (!isgap(a))\r
+                               ++APos;\r
+                       }\r
+               fprintf(f, " %u\n", APos);\r
+\r
+       // Q row\r
+               fprintf(f, "Q %5u ", QPos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       fprintf(f, "%c", q);\r
+                       if (!isgap(q))\r
+                               ++QPos;\r
+                       }\r
+               fprintf(f, " %u\n", QPos);\r
+\r
+       // B row\r
+               fprintf(f, "B %5u ", BPos + 1);\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+                       if (b != q)\r
+                               b = tolower(b);\r
+                       fprintf(f, "%c", b);\r
+                       if (!isgap(b))\r
+                               ++BPos;\r
+                       }\r
+               fprintf(f, " %u\n", BPos);\r
+\r
+       // Diffs\r
+               fprintf(f, "Diffs   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+\r
+                       char c = ' ';\r
+                       if (isgap(q) || isgap(a) || isgap(b))\r
+                               c = ' ';\r
+                       else if (Col < Hit.ColXLo)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == a && q != b)\r
+                                       c = 'A';\r
+                               else if (q == b && q != a)\r
+                                       c = 'b';\r
+                               else if (a == b && q != a)\r
+                                       c = 'N';\r
+                               else\r
+                                       c = '?';\r
+                               }\r
+                       else if (Col > Hit.ColXHi)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == b && q != a)\r
+                                       c = 'B';\r
+                               else if (q == a && q != b)\r
+                                       c = 'a';\r
+                               else if (a == b && q != a)\r
+                                       c = 'N';\r
+                               else\r
+                                       c = '?';\r
+                               }\r
+\r
+                       fprintf(f, "%c", c);\r
+                       }\r
+               fprintf(f, "\n");\r
+\r
+       // SNPs\r
+               fprintf(f, "Votes   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       char q = Q3Seq[Col];\r
+                       char a = A3Seq[Col];\r
+                       char b = B3Seq[Col];\r
+\r
+                       bool PrevGap = Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]));\r
+                       bool NextGap = Col+1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]));\r
+\r
+                       char c = ' ';\r
+                       if (isgap(q) || isgap(a) || isgap(b) || PrevGap || NextGap)\r
+                               c = ' ';\r
+                       else if (Col < Hit.ColXLo)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == a && q != b)\r
+                                       c = '+';\r
+                               else if (q == b && q != a)\r
+                                       c = '!';\r
+                               else\r
+                                       c = '0';\r
+                               }\r
+                       else if (Col > Hit.ColXHi)\r
+                               {\r
+                               if (q == a && q == b)\r
+                                       c = ' ';\r
+                               else if (q == b && q != a)\r
+                                       c = '+';\r
+                               else if (q == a && q != b)\r
+                                       c = '!';\r
+                               else\r
+                                       c = '0';\r
+                               }\r
+\r
+                       fprintf(f, "%c", c);\r
+                       }\r
+               fprintf(f, "\n");\r
+\r
+       // LR row\r
+               fprintf(f, "Model   ");\r
+               for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+                       {\r
+                       if (Col < Hit.ColXLo)\r
+                               fprintf(f, "A");\r
+                       else if (Col >= Hit.ColXLo && Col <= Hit.ColXHi)\r
+                               fprintf(f, "x");\r
+                       else\r
+                               fprintf(f, "B");\r
+                       }\r
+\r
+               fprintf(f, "\n");\r
+\r
+               RowFromCol += 80;\r
+               }\r
+       fprintf(f, "\n");\r
+\r
+       double PctIdBestP = max(Hit.PctIdQA, Hit.PctIdQB);\r
+       double Div = (Hit.PctIdQM - PctIdBestP)*100.0/PctIdBestP;\r
+\r
+       unsigned LTot = Hit.CS_LY + Hit.CS_LN + Hit.CS_LA;\r
+       unsigned RTot = Hit.CS_RY + Hit.CS_RN + Hit.CS_RA;\r
+\r
+       double PctL = Pct(Hit.CS_LY, LTot);\r
+       double PctR = Pct(Hit.CS_RY, RTot);\r
+\r
+       fprintf(f,\r
+         "Ids.  QA %.1f%%, QB %.1f%%, AB %.1f%%, QModel %.1f%%, Div. %+.1f%%\n",\r
+         Hit.PctIdQA,\r
+         Hit.PctIdQB,\r
+         Hit.PctIdAB,\r
+         Hit.PctIdQM,\r
+         Div);\r
+\r
+       fprintf(f,\r
+         "Diffs Left %u: N %u, A %u, Y %u (%.1f%%); Right %u: N %u, A %u, Y %u (%.1f%%), Score %.4f\n",\r
+         LTot, Hit.CS_LN, Hit.CS_LA, Hit.CS_LY, PctL,\r
+         RTot, Hit.CS_RN, Hit.CS_RA, Hit.CS_RY, PctR,\r
+         Hit.Score);\r
+       }\r