comment out analyzing output section

[ool/lipid_simulation_formalism.git] / kinetic_formalism_competition.Rnw
diff --git a/kinetic_formalism_competition.Rnw b/kinetic_formalism_competition.Rnw

index 505fb008ac93341170f2d90155dc475cb12b4027..54e7fa8c02f5a022c0ffb1447bf0495cf14970f1 100644 (file)
--- a/kinetic_formalism_competition.Rnw
+++ b/kinetic_formalism_competition.Rnw
@@ -18,6 +18,7 @@
  \usepackage{booktabs}
  \usepackage[noblocks]{authblk}
  \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
+\usepackage[capitalise]{cleveref}
  \usepackage[sectionbib,sort&compress,numbers]{natbib}
  \usepackage[nomargin,inline,draft]{fixme}
  \usepackage[x11names,svgnames]{xcolor}
@@ -99,35 +100,35 @@ to.kcal <- function(k,temp=300) {
  \renewcommand{\thetable}{S\@arabic\c@table}
  \makeatother
  
-\section{Competition Implementation}
-\subsection{Implementation changes}
-
-\begin{itemize}
-\item settable maximum number of vesicles to track (default $10^4$)
-\item start with 1~L ($10^{-3}$~m$^3$) cube
-\item if at any point the number of vesicles exceeds the maximum
-  number, chop the volume and environment molecule number into tenths,
-  randomly select one tenth of the vesicles, and continue tracking.
-\item generations will be counted per vesicle, and each progeny
-  vesicle will have a generation number one greater than the parental
-  vesicle.
-\item 100 generations can result in as many as $2^{100}$
-  ($\Sexpr{to.latex(format(digits=3,2^100))}$) vesicles or as few as
-  101 vesicles.
-\item Environment will use a specific number of each component instead
-  of a constant concentration; as the number may be larger than
-  \texttt{long long} ($2^{64}$), we use libgmp to handle an arbitrary
-  precision number of components
-\end{itemize}
-
-\subsection{Infrastructure changes}
-\begin{itemize}
-\item Rewrite core bits in C
-\item Use libgmp for handling large ints
-\item Use openmpi to split the calculations out over multiple
-  machines/processors and allow deploying to larger
-  clusters/supercomputers
-\end{itemize}
+% \section{Competition Implementation}
+% \subsection{Implementation changes}
+% 
+% \begin{itemize}
+% \item settable maximum number of vesicles to track (default $10^4$)
+% \item start with 1~L ($10^{-3}$~m$^3$) cube
+% \item if at any point the number of vesicles exceeds the maximum
+%   number, chop the volume and environment molecule number into tenths,
+%   randomly select one tenth of the vesicles, and continue tracking.
+% \item generations will be counted per vesicle, and each progeny
+%   vesicle will have a generation number one greater than the parental
+%   vesicle.
+% \item 100 generations can result in as many as $2^{100}$
+%   ($\Sexpr{2^100}$) vesicles or as few as
+%   101 vesicles.
+% \item Environment will use a specific number of each component instead
+%   of a constant concentration; as the number may be larger than
+%   \texttt{long long} ($2^{64}$), we use libgmp to handle an arbitrary
+%   precision number of components
+% \end{itemize}
+% 
+% \subsection{Infrastructure changes}
+% \begin{itemize}
+% \item Rewrite core bits in C
+% \item Use libgmp for handling large ints
+% \item Use openmpi to split the calculations out over multiple
+%   machines/processors and allow deploying to larger
+%   clusters/supercomputers
+% \end{itemize}
  
  
  
@@ -149,20 +150,18 @@ The base forward kinetic parameter for the $i$th component is $k_{\mathrm{f}i}$
  and is dependent on the particular lipid type (PC, PS, SM, etc.). The
  forward adjustment parameter, $k_{\mathrm{f}i\mathrm{adj}}$, is based on the
  properties of the vesicle and the specific component (type, length,
-unsaturation, etc.) (see Equation~\ref{eq:kf_adj}, and
-Section~\ref{sec:kinetic_adjustments}).
+unsaturation, etc.) (see \cref{eq:kf_adj,sec:kinetic_adjustments}).
  $\left[C_{i_\mathrm{monomer}}\right]$ is the molar concentration of
  monomer of the $i$th component. $\left[S_\mathrm{vesicle}\right]$ is the surface
  area of the vesicle per volume. The base backwards kinetic parameter
  for the $i$th component is $k_{\mathrm{b}i}$ and its adjustment parameter
-$k_{\mathrm{b}i\mathrm{adj}}$ (see Equation~\ref{eq:kb_adj}, and
-Section~\ref{sec:kinetic_adjustments}).
+$k_{\mathrm{b}i\mathrm{adj}}$ (see \cref{eq:kb_adj,sec:kinetic_adjustments}).
  $\left[C_{i_\mathrm{vesicle}}\right]$ is the molar concentration of
  the $i$th component in the vesicle.
  
  \subsection{Per-Lipid Kinetic Parameters}
  
-<<echo=FALSE,results='hide'>>=
+<<kf_prime,echo=FALSE,results='hide'>>=
  kf.prime <- c(3.7e6,3.7e6,5.1e7,3.7e6,2.3e6)
  kf <- (as.numeric(kf.prime)*10^-3)/(63e-20*6.022e23)
  @ 
@@ -174,7 +173,7 @@ available, these were taken from literature.
  %   \centering
  %   \begin{tabular}{c c c c c c c c}
  %     \toprule
-%     Type & $k_\mathrm{f}$ $\left(\frac{\mathrm{m}}{\mathrm{s}}\right)$ & $k'_\mathrm{f}$ $\left(\frac{1}{\mathrm{M} \mathrm{s}}\right)$ & $k_\mathrm{b}$ $\left(\mathrm{s}^{-1}\right)$ & Area $\left({\AA}^2\right)$ & Charge & $\mathrm{CF}1$ & Curvature \\
+%     Type & $k_\mathrm{f}$ $\left(\frac{\mathrm{m}}{\mathrm{s}}\right)$ & $k'_\mathrm{f}$ $\left(\frac{1}{\mathrm{M} \mathrm{s}}\right)$ & $k_\mathrm{b}$ $\left(\mathrm{s}^{-1}\right)$ & Area $\left({Å}^2\right)$ & Charge & $\mathrm{CF}1$ & Curvature \\
  %         \midrule
  %     PC   & $\Sexpr{to.latex(format(digits=3,scientific=TRUE,kf[1]))}$ & $3.7 \times 10^6$ & $2   \times 10^{-5}$ & 63 & 0  & 2  & 0.8  \\
  %     PS   & $\Sexpr{to.latex(format(digits=3,scientific=TRUE,kf[2]))}$ & $3.7 \times 10^6$ & $1.25\times 10^{-5}$ & 54 & -1 & 0  & 1    \\
@@ -188,7 +187,7 @@ available, these were taken from literature.
  % \end{table}
  
  %%% \DLA{I think we may just reduce these three sections; area, $k_\mathrm{f}$
-%%%   and $k_\mathrm{b}$ to Table~\ref{tab:kinetic_parameters_lipid_types} with
+%%%   and $k_\mathrm{b}$ to \cref{tab:kinetic_parameters_lipid_types} with
  %%%   references.}
  %%% 
  %%% \RZ{Yes, but we also have to have then as comments the numbers that
@@ -576,7 +575,7 @@ The mean $\mathrm{stdev}\left(un_\mathrm{vesicle}\right)$ in our
  simulations is around $1.5$, which leads to a $\Delta \Delta
  G^\ddagger$ of $\Sexpr{to.latex(format(digits=3,to.kcal(2^1.5)))}
  \frac{\mathrm{kcal}}{\mathrm{mol}}$, and a total range of possible
-values depicted in Figure~\ref{fig:unf_graph}.
+values depicted in \cref{fig:unf_graph}.
  
  % \RZ{Explain here, or even earlier that the formulas were ad hoc
  %   adjusted to correspond to ``reasonable'' changes in the Eyring
@@ -650,7 +649,7 @@ to a range of $\Delta \Delta G^\ddagger$ from
  $\Sexpr{format(digits=3,to.kcal(60^(-.165*-1)))}
  \frac{\mathrm{kcal}}{\mathrm{mol}}$ to
  $0\frac{\mathrm{kcal}}{\mathrm{mol}}$, and the total range of possible
-values seen in Figure~\ref{fig:chf_graph}.
+values seen in \cref{fig:chf_graph}.
  
  
  \begin{figure}
@@ -745,7 +744,7 @@ cu_\mathrm{vesicle}$ of $0.213$ leads to a $\Delta \Delta G^\ddagger$
  of $\Sexpr{format(digits=3,to.kcal(10^(0.13*0.213)))}
  \frac{\mathrm{kcal}}{\mathrm{mol}}$. This is a consequence of the
  relatively matched curvatures in our environment. The full range of
-$cu_\mathrm{f}$ values possible are shown in Figure~\ref{fig:cuf_graph}.
+$cu_\mathrm{f}$ values possible are shown in \cref{fig:cuf_graph}.
  
  % 1.5 to 0.75 3 to 0.33
  \begin{figure}
@@ -956,7 +955,7 @@ $\Sexpr{format(digits=3,to.kcal(7^(1-1/(5*(2^-1.7-2^-0)^2+1))))}
  \frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with 0 unsaturation
  to
  $\Sexpr{format(digits=3,to.kcal(7^(1-1/(5*(2^-1.7-2^-4)^2+1))))}\frac{\mathrm{kcal}}{\mathrm{mol}}$
-for monomers with 4 unsaturations. See Figure~\ref{fig:unb_graph} for
+for monomers with 4 unsaturations. See \cref{fig:unb_graph} for
  the full range of possible values.
  
  
@@ -1024,7 +1023,7 @@ a range of $\Delta \Delta G^\ddagger$ from
  $\Sexpr{format(digits=3,to.kcal(20^(-.164*-1)))}
  \frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with charge $-1$ to
  $0\frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with charge $0$.
-See Figure~\ref{fig:chb_graph} for the full range of possible values
+See \cref{fig:chb_graph} for the full range of possible values
  of $ch_\mathrm{b}$.
  
  
@@ -1111,7 +1110,7 @@ $\Sexpr{format(digits=3,to.kcal(7^(1-1/(20*(-0.013-log(1.3))^2+1))))}\frac{\math
  for monomers with curvature 1.3 to
  $0\frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with curvature near
  1. The full range of values possible for $cu_\mathrm{b}$ are shown in
-Figure~\ref{fig:cub_graph}
+\cref{fig:cub_graph}
  
  % \RZ{What about the opposite curvatures that actually do fit to each
  %   other?}
@@ -1200,7 +1199,7 @@ $\Sexpr{format(digits=3,to.kcal(3.2^abs(12-17.75)))}
  $\Sexpr{format(digits=3,to.kcal(3.2^abs(24-17.75)))}\frac{\mathrm{kcal}}{\mathrm{mol}}$
  for monomers with length 24 to $0\frac{\mathrm{kcal}}{\mathrm{mol}}$
  for monomers with length near 18. The full range of possible values of
-$l_\mathrm{b}$ are shown in Figure~\ref{fig:lb_graph}
+$l_\mathrm{b}$ are shown in \cref{fig:lb_graph}
  
  % (for methods? From McLean84LIB: The activation free energies and free
  % energies of transfer from self-micelles to water increase by 2.2 and
@@ -1294,7 +1293,7 @@ $\Sexpr{format(digits=3,to.kcal(1.5^(0.925*2-abs(0.925*2))))}\frac{\mathrm{kcal}
  for monomers with complex formation $2$ to
  $0\frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with complex
  formation $0$. The full range of possible values for $CF1_\mathrm{b}$ are
-depicted in Figure~\ref{fig:cf1b_graph}.
+depicted in \cref{fig:cf1b_graph}.
  
  
  
@@ -1458,15 +1457,15 @@ vesicle properties, we only plot the mean of the property.
  
  Determining the number of molecules to add to the lipid membrane
  ($n_i$) requires knowing $k_{\mathrm{f}i_\mathrm{adj}}$, the surface area of the
-vesicle $S_\mathrm{vesicle}$ (see Section \ref{sec:ves_prop}), the time interval
+vesicle $S_\mathrm{vesicle}$ (see \cref{sec:ves_prop}), the time interval
  $dt$ during which lipids are added, the base $k_{\mathrm{f}i}$, and the
  concentration of the monomer in the environment
-$\left[C_{i\mathrm{vesicle}}\right]$ (see Equation~\ref{eq:state}).
-$k_{\mathrm{f}i\mathrm{adj}}$ is calculated (see Equation~\ref{eq:kf_adj}) based on the
+$\left[C_{i\mathrm{vesicle}}\right]$ (see \cref{eq:state}).
+$k_{\mathrm{f}i\mathrm{adj}}$ is calculated (see \cref{eq:kf_adj}) based on the
  vesicle properties and their hypothesized effect on the rate (in as
  many cases as possible, experimentally based)
-(see Section~\ref{sec:kinetic_adjustments} for details). $dt$ can be varied
-(see Section~\ref{sec:step_duration}), but for a given step is constant. This
+(see \cref{sec:kinetic_adjustments} for details). $dt$ can be varied
+(see \cref{sec:step_duration}), but for a given step is constant. This
  leads to the following:
  
  $n_i = k_{\mathrm{f}i}k_{\mathrm{f}i_\mathrm{adj}}\left[C_{i_\mathrm{monomer}}\right]S_\mathrm{vesicle}\mathrm{N_A}dt$
@@ -1518,59 +1517,59 @@ The environment, initial vesicle, and the state of the vesicle
  immediately before and immediately after splitting are stored
  to produce later output.
  
-\section{Analyzing output}
-
-The analysis of output is handled by a separate perl program which
-shares many common modules with the simulation program. Current output
-includes simulation progress, summary tables, summary statistics, and
-various graphs.
-
-\subsection{PCA plots}
-
-Two major groups of axes that contribute to vesicle variation between
-subsequent generations are the components and properties of vesicles.
-Each component in a vesicle is an axis on its own; it can be measured
-either as an absolute number of molecules in each component, or the
-fraction of molecules of that component over the total number of
-molecules; the second approach is often more convenient, as it allows
-vesicles with different number of molecules to be directly compared
-(though it hides any effect of vesicle size).
-
-In order to visualize the variation between and transition of
-subsequent generations of vesicles from their initial state in the
-simulation, to their final state at the termination of the simulation,
-we plot the projection of the generations onto the two principle PCA
-axes (and alternatively, any pairing of the axes).
-
-\subsection{Carpet plots}
-
-Carpet plots show the distance/similarity between the composition or
-properties of all generations in a single run. The difference between
-each group of vesicle can be calculated using Euclidean distance
-(properties and compositions) or H similarity (composition only). We
-must use Euclidean distance for properties because the H distance
-metric is invalid when comparing negative vector elements to positive
-vector elements.
-
-In addition to showing distance or similarity, carpet plots also
-depict propersomes and composomes as square boxes on the diagonals and
-propertypes and compotypes as rectangles off the diagonals, each
-propertype or compotype with a distinct color.
-
-\subsection{Propersomes, propertypes, composomes and compotypes}
-
-A generation is considered to be a propersome if it is less than $0.6$
-units (by Euclidean distance of normalized properties) away from the
-generation immediately following and preceding. Likewise, a generation
-is in a composome if its H similarity is more than $0.9$ (by the
-normalized H metric) from the preceding and following generations.
-Propersomes and composomes are then assigned to propertypes and
-compotypes using Paritioning Around Medioids
-(PAM). All values of $k$ between 2 and 15
-(or the number of propersomes and composomes, if that is less than 15)
-are tried, and the clustering with the smallest
-silhouette~\citep{Rousseeuw1987:silhouettes} is chosen as the ideal
-clustering~\citep{Shenhav2005:pgard}.
+% \section{Analyzing output}
+% 
+% The analysis of output is handled by a separate perl program which
+% shares many common modules with the simulation program. Current output
+% includes simulation progress, summary tables, summary statistics, and
+% various graphs.
+% 
+% \subsection{PCA plots}
+% 
+% Two major groups of axes that contribute to vesicle variation between
+% subsequent generations are the components and properties of vesicles.
+% Each component in a vesicle is an axis on its own; it can be measured
+% either as an absolute number of molecules in each component, or the
+% fraction of molecules of that component over the total number of
+% molecules; the second approach is often more convenient, as it allows
+% vesicles with different number of molecules to be directly compared
+% (though it hides any effect of vesicle size).
+% 
+% In order to visualize the variation between and transition of
+% subsequent generations of vesicles from their initial state in the
+% simulation, to their final state at the termination of the simulation,
+% we plot the projection of the generations onto the two principle PCA
+% axes (and alternatively, any pairing of the axes).
+% 
+% \subsection{Carpet plots}
+% 
+% Carpet plots show the distance/similarity between the composition or
+% properties of all generations in a single run. The difference between
+% each group of vesicle can be calculated using Euclidean distance
+% (properties and compositions) or H similarity (composition only). We
+% must use Euclidean distance for properties because the H distance
+% metric is invalid when comparing negative vector elements to positive
+% vector elements.
+% 
+% In addition to showing distance or similarity, carpet plots also
+% depict propersomes and composomes as square boxes on the diagonals and
+% propertypes and compotypes as rectangles off the diagonals, each
+% propertype or compotype with a distinct color.
+% 
+% \subsection{Propersomes, propertypes, composomes and compotypes}
+% 
+% A generation is considered to be a propersome if it is less than $0.6$
+% units (by Euclidean distance of normalized properties) away from the
+% generation immediately following and preceding. Likewise, a generation
+% is in a composome if its H similarity is more than $0.9$ (by the
+% normalized H metric) from the preceding and following generations.
+% Propersomes and composomes are then assigned to propertypes and
+% compotypes using Paritioning Around Medioids
+% (PAM). All values of $k$ between 2 and 15
+% (or the number of propersomes and composomes, if that is less than 15)
+% are tried, and the clustering with the smallest
+% silhouette~\citep{Rousseeuw1987:silhouettes} is chosen as the ideal
+% clustering~\citep{Shenhav2005:pgard}.
  
  
  \bibliographystyle{unsrtnat}