More figure stuff.

author Michael Hanke <michael.hanke@gmail.com>

Sun, 15 May 2011 19:35:50 +0000 (15:35 -0400)

committer Michael Hanke <michael.hanke@gmail.com>

Sun, 15 May 2011 19:37:00 +0000 (15:37 -0400)
author Michael Hanke <michael.hanke@gmail.com>
Sun, 15 May 2011 19:35:50 +0000 (15:35 -0400)
committer Michael Hanke <michael.hanke@gmail.com>
Sun, 15 May 2011 19:37:00 +0000 (15:37 -0400)
diff --git a/survey/makestats b/survey/makestats

index 776ab7a8b2221c7e192d7f1aba300a6bc8f9038b..b0932fdb6401ddcfd2525d6799f716036b52cad3 100755 (executable)
--- a/survey/makestats
+++ b/survey/makestats
@@ -6,6 +6,33 @@ import sys
  import pylab as pl
  import numpy as np
  
+# some meaningful groups of OSes
+redhat_family = ["rhel", "centos", "fedora", "scilinux"]
+debian_family = ["debian", "ubuntu", "biolinux"]
+suse_family = ["suse", "slel"]
+other_linux_family = ["gentoo", "mandriva", "arch", "slackware", "otherlinux"]
+other_family = ["starbsd", "unix", "qnx", "beos", "solaris", "other"]
+
+os_cat_names = {
+        'win': 'Windows',
+        'mac': 'Mac OS',
+        'linux': 'GNU/Linux',
+        'otheros': 'Other OS'
+        }
+
+os_family = {
+        'win': ["windows"],
+        'mac': ["macosx"],
+        'linux': redhat_family + debian_family + suse_family + other_linux_family,
+        'otheros': other_family
+        }
+# end the reverse mapping
+os_family_rev = {}
+for ost in os_family:
+    for os in os_family[ost]:
+        os_family_rev[os] = ost
+
+
  class DB(dict):
      def __init__(self, srcdir):
          # eats the whole directory
@@ -14,6 +41,8 @@ class DB(dict):
              rawdata = json.load(open(dfn))
              self[rawdata['timestamp']] = rawdata
  
+        self.os_dict = load_list2dict('oslist.txt')
+
      def get_unique(self, key):
          # return a set of all (unique) values for a field id
          uniq = set()
@@ -69,6 +98,13 @@ class DB(dict):
                  match[k] = v
          return match
  
+    def get_nice_name(self, id):
+        srcs = [self.os_dict, os_cat_names]
+        for src in srcs:
+            if id in src:
+                return src[id]
+        # not found, nothing nicer
+        return id
  
  
  def load_list2dict(name):
@@ -79,22 +115,55 @@ def load_list2dict(name):
          d[kv[0]] = kv[1].strip().strip('"')
      return d
  
+def mkpic_os_per_env(db, destdir):
+    envs = ['pers_os', 'man_os', 'virt_host_os', 'virt_guest_os']
+    env_names = ['Personal', 'Managed', 'Virt. Host', 'Virt. Guest']
+    env_stats = {}
+    offset = 0
+    for env in envs:
+        counts = db.get_counts(env)
+        stats = dict(zip(os_family.keys(), [0] * len(os_family)))
+        for os in counts:
+            stats[os_family_rev[os]] += counts[os]
+        total_count = np.sum(stats.values())
+        for osf in stats:
+            stats[osf] = float(stats[osf]) / total_count
+        env_stats[env] = stats
+    # make stacked barplot
+    pl.figure(figsize=(6.4, 4.8), facecolor='w', edgecolor='k')
+    x = np.arange(len(envs))
+    bottoms = np.zeros(len(envs))
+    os_order = ['linux', 'mac', 'win', 'otheros']
+    colors = ['#B63537', '#4E4DA0', '#008200', 'gray']
+    for i, os in enumerate(os_order):
+        stat = [env_stats[e][os] for e in envs]
+        pl.bar(x, stat, bottom=bottoms, color=colors[i],
+               label=db.get_nice_name(os), width=0.8)
+        bottoms += stat
+    pl.legend(loc='lower right')
+    pl.xticks(x + 0.4,  [db.get_nice_name(e) for e in env_names])
+    pl.xlim(-0.25, len(envs))
+    pl.title("Operating system preference by environment")
+    pl.ylabel("Fraction of submissions")
+    pl.savefig('%s/ospref_by_env.png' % destdir, format='png')
+
  def mkpic_submissions_per_datamod(db, destdir):
      # simple demo
      dmd = load_list2dict('datamodlist.txt')
      spd = db.get_counts('bg_datamod')
      spd = sorted(spd.items(), cmp=lambda x, y: cmp(x[1], y[1]))[::-1]
      x = np.arange(len(spd))
-    pl.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
-    pl.title('Data modality')
+    pl.figure(figsize=(6.4, 4.8), facecolor='w', edgecolor='k')
+    pl.title('Submissions per data modality')
      pl.bar(x, [s[1] for s in spd])
-    pl.xticks(x + 0.5,  [dmd[k[0]] for k in spd], rotation=-15)
+    pl.xticks(x + 0.5,  [dmd[k[0]] for k in spd], rotation=-10)
      pl.ylabel('Survey submissions per data modality\n(multiple choices per submission possible)')
      pl.savefig('%s/submissions_per_datamod.png' % destdir, format='png')
  
  def main(srcdir, destdir):
      db = DB(srcdir)
-    mkpic_submissions_per_datamod(db, destdir)
+    for pic in [mkpic_submissions_per_datamod, mkpic_os_per_env]:
+        pic(db, destdir)
  
  if __name__ == '__main__':
      main(sys.argv[1], sys.argv[2])
author	Michael Hanke <michael.hanke@gmail.com>
	Sun, 15 May 2011 19:35:50 +0000 (15:35 -0400)
committer	Michael Hanke <michael.hanke@gmail.com>
	Sun, 15 May 2011 19:37:00 +0000 (15:37 -0400)