X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=kinetic_formalism_competition.Rnw;h=79900c83bf128a12dc6dab04ba36a7f6213b0171;hb=63f84533ffc7746ae328e34999dc7d63b56f70c8;hp=917ef8537f06260418f71abc33fc50e32ae7e2d4;hpb=a58f8a92449aa803bf4396ae5f62f463ab1caf9c;p=ool%2Flipid_simulation_formalism.git diff --git a/kinetic_formalism_competition.Rnw b/kinetic_formalism_competition.Rnw index 917ef85..79900c8 100644 --- a/kinetic_formalism_competition.Rnw +++ b/kinetic_formalism_competition.Rnw @@ -18,6 +18,7 @@ \usepackage{booktabs} \usepackage[noblocks]{authblk} \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref} +\usepackage[capitalise]{cleveref} \usepackage[sectionbib,sort&compress,numbers]{natbib} \usepackage[nomargin,inline,draft]{fixme} \usepackage[x11names,svgnames]{xcolor} @@ -60,7 +61,6 @@ \newcommand{\OM}[1]{\textcolor{red}{\fxnote{OM: #1}}} -%\SweaveOpts{prefix.string=pub_318_phys_bio_submission_figures_suppl/pub_318_phys_bio_sub_suppl_fig,pdf=TRUE,eps=TRUE} \oddsidemargin 0.0in \textwidth 6.5in \raggedbottom @@ -76,11 +76,14 @@ \begin{document} \maketitle -<>= -require(lattice) -require(grid) -require(Hmisc) -require(gridBase) +<>= +opts_chunk$set(dev="CairoPDF",out.width="\\columnwidth",out.height="0.7\\textheight",out.extra="keepaspectratio") +opts_chunk$set(cache=TRUE, autodep=TRUE) +options(scipen = -2, digits = 1) +library("lattice") +library("grid") +library("Hmisc") +library("gridBase") to.latex <- function(x){ gsub("\\\\","\\\\\\\\",latexSN(x)) } @@ -97,35 +100,35 @@ to.kcal <- function(k,temp=300) { \renewcommand{\thetable}{S\@arabic\c@table} \makeatother -\section{Competition Implementation} -\subsection{Implementation changes} - -\begin{itemize} -\item settable maximum number of vesicles to track (default $10^4$) -\item start with 1~L ($10^{-3}$~m$^3$) cube -\item if at any point the number of vesicles exceeds the maximum - number, chop the volume and environment molecule number into tenths, - randomly select one tenth of the vesicles, and continue tracking. -\item generations will be counted per vesicle, and each progeny - vesicle will have a generation number one greater than the parental - vesicle. -\item 100 generations can result in as many as $2^{100}$ - ($\Sexpr{to.latex(format(digits=3,2^100))}$) vesicles or as few as - 101 vesicles. -\item Environment will use a specific number of each component instead - of a constant concentration; as the number may be larger than - \texttt{long long} ($2^{64}$), we use libgmp to handle an arbitrary - precision number of components -\end{itemize} - -\subsection{Infrastructure changes} -\begin{itemize} -\item Rewrite core bits in C -\item Use libgmp for handling large ints -\item Use openmpi to split the calculations out over multiple - machines/processors and allow deploying to larger - clusters/supercomputers -\end{itemize} +% \section{Competition Implementation} +% \subsection{Implementation changes} +% +% \begin{itemize} +% \item settable maximum number of vesicles to track (default $10^4$) +% \item start with 1~L ($10^{-3}$~m$^3$) cube +% \item if at any point the number of vesicles exceeds the maximum +% number, chop the volume and environment molecule number into tenths, +% randomly select one tenth of the vesicles, and continue tracking. +% \item generations will be counted per vesicle, and each progeny +% vesicle will have a generation number one greater than the parental +% vesicle. +% \item 100 generations can result in as many as $2^{100}$ +% ($\Sexpr{2^100}$) vesicles or as few as +% 101 vesicles. +% \item Environment will use a specific number of each component instead +% of a constant concentration; as the number may be larger than +% \texttt{long long} ($2^{64}$), we use libgmp to handle an arbitrary +% precision number of components +% \end{itemize} +% +% \subsection{Infrastructure changes} +% \begin{itemize} +% \item Rewrite core bits in C +% \item Use libgmp for handling large ints +% \item Use openmpi to split the calculations out over multiple +% machines/processors and allow deploying to larger +% clusters/supercomputers +% \end{itemize} @@ -147,20 +150,18 @@ The base forward kinetic parameter for the $i$th component is $k_{\mathrm{f}i}$ and is dependent on the particular lipid type (PC, PS, SM, etc.). The forward adjustment parameter, $k_{\mathrm{f}i\mathrm{adj}}$, is based on the properties of the vesicle and the specific component (type, length, -unsaturation, etc.) (see Equation~\ref{eq:kf_adj}, and -Section~\ref{sec:kinetic_adjustments}). +unsaturation, etc.) (see \cref{eq:kf_adj,sec:kinetic_adjustments}). $\left[C_{i_\mathrm{monomer}}\right]$ is the molar concentration of monomer of the $i$th component. $\left[S_\mathrm{vesicle}\right]$ is the surface area of the vesicle per volume. The base backwards kinetic parameter for the $i$th component is $k_{\mathrm{b}i}$ and its adjustment parameter -$k_{\mathrm{b}i\mathrm{adj}}$ (see Equation~\ref{eq:kb_adj}, and -Section~\ref{sec:kinetic_adjustments}). +$k_{\mathrm{b}i\mathrm{adj}}$ (see \cref{eq:kb_adj,sec:kinetic_adjustments}). $\left[C_{i_\mathrm{vesicle}}\right]$ is the molar concentration of the $i$th component in the vesicle. \subsection{Per-Lipid Kinetic Parameters} -<>= +<>= kf.prime <- c(3.7e6,3.7e6,5.1e7,3.7e6,2.3e6) kf <- (as.numeric(kf.prime)*10^-3)/(63e-20*6.022e23) @ @@ -172,7 +173,7 @@ available, these were taken from literature. % \centering % \begin{tabular}{c c c c c c c c} % \toprule -% Type & $k_\mathrm{f}$ $\left(\frac{\mathrm{m}}{\mathrm{s}}\right)$ & $k'_\mathrm{f}$ $\left(\frac{1}{\mathrm{M} \mathrm{s}}\right)$ & $k_\mathrm{b}$ $\left(\mathrm{s}^{-1}\right)$ & Area $\left({\AA}^2\right)$ & Charge & $\mathrm{CF}1$ & Curvature \\ +% Type & $k_\mathrm{f}$ $\left(\frac{\mathrm{m}}{\mathrm{s}}\right)$ & $k'_\mathrm{f}$ $\left(\frac{1}{\mathrm{M} \mathrm{s}}\right)$ & $k_\mathrm{b}$ $\left(\mathrm{s}^{-1}\right)$ & Area $\left({Å}^2\right)$ & Charge & $\mathrm{CF}1$ & Curvature \\ % \midrule % PC & $\Sexpr{to.latex(format(digits=3,scientific=TRUE,kf[1]))}$ & $3.7 \times 10^6$ & $2 \times 10^{-5}$ & 63 & 0 & 2 & 0.8 \\ % PS & $\Sexpr{to.latex(format(digits=3,scientific=TRUE,kf[2]))}$ & $3.7 \times 10^6$ & $1.25\times 10^{-5}$ & 54 & -1 & 0 & 1 \\ @@ -186,7 +187,7 @@ available, these were taken from literature. % \end{table} %%% \DLA{I think we may just reduce these three sections; area, $k_\mathrm{f}$ -%%% and $k_\mathrm{b}$ to Table~\ref{tab:kinetic_parameters_lipid_types} with +%%% and $k_\mathrm{b}$ to \cref{tab:kinetic_parameters_lipid_types} with %%% references.} %%% %%% \RZ{Yes, but we also have to have then as comments the numbers that @@ -217,7 +218,7 @@ exchange of [$^3$H]DMPC between LUV at 30\textdegree C, and found it to be 9.6 hr. As this is a first order reaction, and the primary limiting step in exchange is lipid desorption, $k_\mathrm{b}$ for DMPC is $k_{\mathrm{b}_\mathrm{PC}}=\frac{\log 2}{9.6 \times 60 \times 60} \approx -\Sexpr{to.latex(format(log(2)/(9.6*60*60),digits=2))} +\Sexpr{log(2)/(9.6*60*60)} \mathrm{s}^{-1}$. We assume that $k_\mathrm{b}$ for SM is the same as for PC. To estimate the $k_\mathrm{b}$ of PE and PS, we used the data from \citet{Nichols1982:ret_amphiphile_transfer} who measured the rate of @@ -237,9 +238,9 @@ us to $k_{\mathrm{b}_\mathrm{PE}} = \frac{k_{\mathrm{b}_\mathrm{PC}}\times\mathrm{PE}}{\mathrm{PC}} \approx \frac{2\times 10^{-5}\,\mathrm{s}^{-1} \times 0.45\,\mathrm{min}^{-1}}{0.89\,\mathrm{min}^{-1}} \approx -\Sexpr{to.latex(format(log(2)/(9.6*60*60)*0.45/0.89,digits=2))}$~$\mathrm{s}^{-1}$ +\Sexpr{log(2)/(9.6*60*60)*0.45/0.89}$~$\mathrm{s}^{-1}$ and likewise, $k_{\mathrm{b}_\mathrm{PS}}\approx -\Sexpr{to.latex(format(log(2)/(9.6*60*60)*0.55/0.89,digits=3))}$~$\mathrm{s}^{-1}$. +\Sexpr{log(2)/(9.6*60*60)*0.55/0.89}$~$\mathrm{s}^{-1}$. The $k_\mathrm{b}$ of SM was determined using the work of \citet{Bai1997:lipid_movementbodipy}, who measured spontaneous transfer of C$_5$-DMB-SM and C$_5$-DMB-PC from donor and acceptor @@ -247,15 +248,15 @@ vesicles, finding $3.4\times10^{-2}$~$\mathrm{s}^{-1}$ and $2.2\times10^{-3}$~$\mathrm{s}^{-1}$ respectively; using the ratio of $k_\mathrm{b}$ of C$_5$-DMB-SM to the $k_\mathrm{b}$ of C$_5$-DMB-PC times the $k_\mathrm{b}$ of PC ($\frac{3.4 \times 10^{-2}\mathrm{s}^{-1}}{2.2 \times - 10^{-3}\mathrm{s}^{-1}} -\Sexpr{to.latex(format(log(2)/(9.6*60*60),digits=2))}\mathrm{s}^{-1}$, + 10^{-3}\mathrm{s}^{-1}}\approx +\Sexpr{log(2)/(9.6*60*60)}\mathrm{s}^{-1}$), we obtain $k_{\mathrm{b}_\mathrm{SM}} \approx -\Sexpr{to.latex(format(log(2)/(9.6*60*60)*3.4e-2/2.2e-3,digits=2,scientific=TRUE))}$. +\Sexpr{log(2)/(9.6*60*60)*3.4e-2/2.2e-3}$. In the case of CHOL, \citet{Jones1990:lipid_transfer} measured the $t_{1/2}$ of [$^3$H] transfer from POPC vesicles and found it to be 41 minutes, leading to a $k_{\mathrm{b}_\mathrm{CHOL}} = \frac{\log 2}{41\times 60} \approx -\Sexpr{to.latex(format(log(2)/(41*60),digits=2,scientific=TRUE))}$~$\mathrm{s}^{-1}$. +\Sexpr{log(2)/(41*60)}$~$\mathrm{s}^{-1}$. \subsubsection{Headgroup Surface Area for lipid types} @@ -267,16 +268,16 @@ minutes, leading to a $k_{\mathrm{b}_\mathrm{CHOL}} = \frac{\log 2}{41\times Different lipids have different headgroup surface areas, which contributes to $\left[S_\mathrm{vesicle}\right]$. \citet{Smaby1997:pc_area_with_chol} measured the surface area of POPC with a Langmuir film balance, and -found it to be 63~$\AA^2$ at $30$~$\frac{\mathrm{mN}}{\mathrm{m}}$. -Molecular dynamic simulations found an area of 54 $\AA^2$ for +found it to be 63~Å$^2$ at $30$~$\frac{\mathrm{mN}}{\mathrm{m}}$. +Molecular dynamic simulations found an area of 54 Å$^2$ for DPPS\citep{Cascales1996:mds_dpps_area,Pandit2002:mds_dpps}, which is -in agreement with the experimental value of 56~$\AA^2$ found using a +in agreement with the experimental value of 56~Å$^2$ found using a Langmuir balance by \citet{Demel1987:ps_area}. \citet{Shaikh2002:pe_phase_sm_area} measured the area of SM using a -Langmuir film balance, and found it to be 61~$\AA^2$. Using $^2$H NMR, +Langmuir film balance, and found it to be 61~Å$^2$. Using $^2$H NMR, \citet{Thurmond1991:area_of_pc_pe_2hnmr} found the area of -DPPE-d$_{62}$ to be 55.4 $\AA^2$. \citet{Robinson1995:mds_chol_area} -found an area for CHOL of 38~$\AA^2$ using molecular dynamic +DPPE-d$_{62}$ to be 55.4 Å$^2$. \citet{Robinson1995:mds_chol_area} +found an area for CHOL of 38~Å$^2$ using molecular dynamic simulations. % robinson's chol area is kind of crappy; they did it using MDS, but @@ -574,7 +575,7 @@ The mean $\mathrm{stdev}\left(un_\mathrm{vesicle}\right)$ in our simulations is around $1.5$, which leads to a $\Delta \Delta G^\ddagger$ of $\Sexpr{to.latex(format(digits=3,to.kcal(2^1.5)))} \frac{\mathrm{kcal}}{\mathrm{mol}}$, and a total range of possible -values depicted in Figure~\ref{fig:unf_graph}. +values depicted in \cref{fig:unf_graph}. % \RZ{Explain here, or even earlier that the formulas were ad hoc % adjusted to correspond to ``reasonable'' changes in the Eyring @@ -585,9 +586,8 @@ values depicted in Figure~\ref{fig:unf_graph}. % not include a term for it in this formalism. -\setkeys{Gin}{width=6.4in} \begin{figure} -<>= +<>= layout(matrix(1:2,nrow=1,ncol=2)) curve(2^x,from=0,to=sd(c(0,4)), xlab=expression(paste(stdev,group("(",italic(un[vesicle]),")"))), @@ -649,11 +649,11 @@ to a range of $\Delta \Delta G^\ddagger$ from $\Sexpr{format(digits=3,to.kcal(60^(-.165*-1)))} \frac{\mathrm{kcal}}{\mathrm{mol}}$ to $0\frac{\mathrm{kcal}}{\mathrm{mol}}$, and the total range of possible -values seen in Figure~\ref{fig:chf_graph}. +values seen in \cref{fig:chf_graph}. \begin{figure} -<>= +<>= trellis.device(new=F,color=TRUE) trellis.par.set(list(axis.line =list(col="transparent"))) pushViewport(viewport(layout=grid.layout(nrow=1,ncol=2),clip="off")) @@ -744,11 +744,11 @@ cu_\mathrm{vesicle}$ of $0.213$ leads to a $\Delta \Delta G^\ddagger$ of $\Sexpr{format(digits=3,to.kcal(10^(0.13*0.213)))} \frac{\mathrm{kcal}}{\mathrm{mol}}$. This is a consequence of the relatively matched curvatures in our environment. The full range of -$cu_\mathrm{f}$ values possible are shown in Figure~\ref{fig:cuf_graph}. +$cu_\mathrm{f}$ values possible are shown in \cref{fig:cuf_graph}. % 1.5 to 0.75 3 to 0.33 \begin{figure} -<>= +<>= trellis.device(new=F,color=TRUE) trellis.par.set(list(axis.line =list(col="transparent"))) pushViewport(viewport(layout=grid.layout(nrow=1,ncol=2),clip="off")) @@ -862,7 +862,7 @@ $\Sexpr{format(digits=3,to.kcal(2^(3.4)))} \begin{figure} -<>= +<>= layout(matrix(1:2,nrow=1,ncol=2)) curve(2^x,from=0,to=sd(c(12,24)), xlab=expression(paste(stdev,group("(",italic(l[vesicle]),")"))), @@ -955,12 +955,12 @@ $\Sexpr{format(digits=3,to.kcal(7^(1-1/(5*(2^-1.7-2^-0)^2+1))))} \frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with 0 unsaturation to $\Sexpr{format(digits=3,to.kcal(7^(1-1/(5*(2^-1.7-2^-4)^2+1))))}\frac{\mathrm{kcal}}{\mathrm{mol}}$ -for monomers with 4 unsaturations. See Figure~\ref{fig:unb_graph} for +for monomers with 4 unsaturations. See \cref{fig:unb_graph} for the full range of possible values. \begin{figure} -<>= +<>= trellis.device(new=F,color=TRUE) trellis.par.set(list(axis.line =list(col="transparent"))) pushViewport(viewport(layout=grid.layout(nrow=1,ncol=2),clip="off")) @@ -1023,12 +1023,12 @@ a range of $\Delta \Delta G^\ddagger$ from $\Sexpr{format(digits=3,to.kcal(20^(-.164*-1)))} \frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with charge $-1$ to $0\frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with charge $0$. -See Figure~\ref{fig:chb_graph} for the full range of possible values +See \cref{fig:chb_graph} for the full range of possible values of $ch_\mathrm{b}$. \begin{figure} -<>= +<>= trellis.device(new=F,color=TRUE) trellis.par.set(list(axis.line =list(col="transparent"))) pushViewport(viewport(layout=grid.layout(nrow=1,ncol=2),clip="off")) @@ -1110,7 +1110,7 @@ $\Sexpr{format(digits=3,to.kcal(7^(1-1/(20*(-0.013-log(1.3))^2+1))))}\frac{\math for monomers with curvature 1.3 to $0\frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with curvature near 1. The full range of values possible for $cu_\mathrm{b}$ are shown in -Figure~\ref{fig:cub_graph} +\cref{fig:cub_graph} % \RZ{What about the opposite curvatures that actually do fit to each % other?} @@ -1125,7 +1125,7 @@ Figure~\ref{fig:cub_graph} \begin{figure} -<>= +<>= trellis.device(new=F,color=TRUE) trellis.par.set(list(axis.line =list(col="transparent"))) pushViewport(viewport(layout=grid.layout(nrow=1,ncol=2),clip="off")) @@ -1199,7 +1199,7 @@ $\Sexpr{format(digits=3,to.kcal(3.2^abs(12-17.75)))} $\Sexpr{format(digits=3,to.kcal(3.2^abs(24-17.75)))}\frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with length 24 to $0\frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with length near 18. The full range of possible values of -$l_\mathrm{b}$ are shown in Figure~\ref{fig:lb_graph} +$l_\mathrm{b}$ are shown in \cref{fig:lb_graph} % (for methods? From McLean84LIB: The activation free energies and free % energies of transfer from self-micelles to water increase by 2.2 and @@ -1209,7 +1209,7 @@ $l_\mathrm{b}$ are shown in Figure~\ref{fig:lb_graph} \begin{figure} -<>= +<>= trellis.device(new=F,color=TRUE) trellis.par.set(list(axis.line =list(col="transparent"))) pushViewport(viewport(layout=grid.layout(nrow=1,ncol=2),clip="off")) @@ -1293,12 +1293,12 @@ $\Sexpr{format(digits=3,to.kcal(1.5^(0.925*2-abs(0.925*2))))}\frac{\mathrm{kcal} for monomers with complex formation $2$ to $0\frac{\mathrm{kcal}}{\mathrm{mol}}$ for monomers with complex formation $0$. The full range of possible values for $CF1_\mathrm{b}$ are -depicted in Figure~\ref{fig:cf1b_graph}. +depicted in \cref{fig:cf1b_graph}. \begin{figure} -<>= +<>= trellis.device(new=F,color=TRUE) trellis.par.set(list(axis.line =list(col="transparent"))) pushViewport(viewport(layout=grid.layout(nrow=1,ncol=2),clip="off")) @@ -1406,6 +1406,12 @@ $10^{-10}\mathrm{M}$. The base concentration ($10^{-10}\mathrm{M}$) can also be altered at the initialization of the experiment to specific values for specific lipid types or components. +The environment is a volume which is the maximum number of vesicles +from a single simulation (4096) times the maximum size of the vesicle +(usually 10000) divided by Avagadro's number divided by the total +environmental lipid concentration, or usually +\Sexpr{4096*10000/6.022E23/141E-10}~L. + \subsection{Initial Vesicle Creation} Initial vesicles are seeded by selecting lipid molecules from the @@ -1457,15 +1463,15 @@ vesicle properties, we only plot the mean of the property. Determining the number of molecules to add to the lipid membrane ($n_i$) requires knowing $k_{\mathrm{f}i_\mathrm{adj}}$, the surface area of the -vesicle $S_\mathrm{vesicle}$ (see Section \ref{sec:ves_prop}), the time interval +vesicle $S_\mathrm{vesicle}$ (see \cref{sec:ves_prop}), the time interval $dt$ during which lipids are added, the base $k_{\mathrm{f}i}$, and the concentration of the monomer in the environment -$\left[C_{i\mathrm{vesicle}}\right]$ (see Equation~\ref{eq:state}). -$k_{\mathrm{f}i\mathrm{adj}}$ is calculated (see Equation~\ref{eq:kf_adj}) based on the +$\left[C_{i\mathrm{vesicle}}\right]$ (see \cref{eq:state}). +$k_{\mathrm{f}i\mathrm{adj}}$ is calculated (see \cref{eq:kf_adj}) based on the vesicle properties and their hypothesized effect on the rate (in as many cases as possible, experimentally based) -(see Section~\ref{sec:kinetic_adjustments} for details). $dt$ can be varied -(see Section~\ref{sec:step_duration}), but for a given step is constant. This +(see \cref{sec:kinetic_adjustments} for details). $dt$ can be varied +(see \cref{sec:step_duration}), but for a given step is constant. This leads to the following: $n_i = k_{\mathrm{f}i}k_{\mathrm{f}i_\mathrm{adj}}\left[C_{i_\mathrm{monomer}}\right]S_\mathrm{vesicle}\mathrm{N_A}dt$ @@ -1517,59 +1523,59 @@ The environment, initial vesicle, and the state of the vesicle immediately before and immediately after splitting are stored to produce later output. -\section{Analyzing output} - -The analysis of output is handled by a separate perl program which -shares many common modules with the simulation program. Current output -includes simulation progress, summary tables, summary statistics, and -various graphs. - -\subsection{PCA plots} - -Two major groups of axes that contribute to vesicle variation between -subsequent generations are the components and properties of vesicles. -Each component in a vesicle is an axis on its own; it can be measured -either as an absolute number of molecules in each component, or the -fraction of molecules of that component over the total number of -molecules; the second approach is often more convenient, as it allows -vesicles with different number of molecules to be directly compared -(though it hides any effect of vesicle size). - -In order to visualize the variation between and transition of -subsequent generations of vesicles from their initial state in the -simulation, to their final state at the termination of the simulation, -we plot the projection of the generations onto the two principle PCA -axes (and alternatively, any pairing of the axes). - -\subsection{Carpet plots} - -Carpet plots show the distance/similarity between the composition or -properties of all generations in a single run. The difference between -each group of vesicle can be calculated using Euclidean distance -(properties and compositions) or H similarity (composition only). We -must use Euclidean distance for properties because the H distance -metric is invalid when comparing negative vector elements to positive -vector elements. - -In addition to showing distance or similarity, carpet plots also -depict propersomes and composomes as square boxes on the diagonals and -propertypes and compotypes as rectangles off the diagonals, each -propertype or compotype with a distinct color. - -\subsection{Propersomes, propertypes, composomes and compotypes} - -A generation is considered to be a propersome if it is less than $0.6$ -units (by Euclidean distance of normalized properties) away from the -generation immediately following and preceding. Likewise, a generation -is in a composome if its H similarity is more than $0.9$ (by the -normalized H metric) from the preceding and following generations. -Propersomes and composomes are then assigned to propertypes and -compotypes using Paritioning Around Medioids -(PAM). All values of $k$ between 2 and 15 -(or the number of propersomes and composomes, if that is less than 15) -are tried, and the clustering with the smallest -silhouette~\citep{Rousseeuw1987:silhouettes} is chosen as the ideal -clustering~\citep{Shenhav2005:pgard}. +% \section{Analyzing output} +% +% The analysis of output is handled by a separate perl program which +% shares many common modules with the simulation program. Current output +% includes simulation progress, summary tables, summary statistics, and +% various graphs. +% +% \subsection{PCA plots} +% +% Two major groups of axes that contribute to vesicle variation between +% subsequent generations are the components and properties of vesicles. +% Each component in a vesicle is an axis on its own; it can be measured +% either as an absolute number of molecules in each component, or the +% fraction of molecules of that component over the total number of +% molecules; the second approach is often more convenient, as it allows +% vesicles with different number of molecules to be directly compared +% (though it hides any effect of vesicle size). +% +% In order to visualize the variation between and transition of +% subsequent generations of vesicles from their initial state in the +% simulation, to their final state at the termination of the simulation, +% we plot the projection of the generations onto the two principle PCA +% axes (and alternatively, any pairing of the axes). +% +% \subsection{Carpet plots} +% +% Carpet plots show the distance/similarity between the composition or +% properties of all generations in a single run. The difference between +% each group of vesicle can be calculated using Euclidean distance +% (properties and compositions) or H similarity (composition only). We +% must use Euclidean distance for properties because the H distance +% metric is invalid when comparing negative vector elements to positive +% vector elements. +% +% In addition to showing distance or similarity, carpet plots also +% depict propersomes and composomes as square boxes on the diagonals and +% propertypes and compotypes as rectangles off the diagonals, each +% propertype or compotype with a distinct color. +% +% \subsection{Propersomes, propertypes, composomes and compotypes} +% +% A generation is considered to be a propersome if it is less than $0.6$ +% units (by Euclidean distance of normalized properties) away from the +% generation immediately following and preceding. Likewise, a generation +% is in a composome if its H similarity is more than $0.9$ (by the +% normalized H metric) from the preceding and following generations. +% Propersomes and composomes are then assigned to propertypes and +% compotypes using Paritioning Around Medioids +% (PAM). All values of $k$ between 2 and 15 +% (or the number of propersomes and composomes, if that is less than 15) +% are tried, and the clustering with the smallest +% silhouette~\citep{Rousseeuw1987:silhouettes} is chosen as the ideal +% clustering~\citep{Shenhav2005:pgard}. \bibliographystyle{unsrtnat}