Towards survey stats.

author Michael Hanke <michael.hanke@gmail.com>

Sat, 14 May 2011 01:16:44 +0000 (21:16 -0400)

committer Michael Hanke <michael.hanke@gmail.com>

Sat, 14 May 2011 01:17:53 +0000 (21:17 -0400)
author Michael Hanke <michael.hanke@gmail.com>
Sat, 14 May 2011 01:16:44 +0000 (21:16 -0400)
committer Michael Hanke <michael.hanke@gmail.com>
Sat, 14 May 2011 01:17:53 +0000 (21:17 -0400)
diff --git a/survey/Makefile b/survey/Makefile

index 11348dbc5adf13f530dbfc20303fb05d76222078..7806eabd663daf766f1c9a88a847a32e1db08ae1 100644 (file)
--- a/survey/Makefile
+++ b/survey/Makefile
@@ -9,6 +9,22 @@ jquery.form.js:
  index.html: survey.rst
         rst2html --link-stylesheet --stylesheet-path=survey.css survey.rst > $@
  
+getdata:
+       rsync -rvzlhp --delete \
+               neurodebian@www.pymvpa.org:/home/neurodebian/surveydata .
+
+oslist.txt: select_os_options.inc
+       grep -v selected select_os_options.inc | sed -e 's/<option value="//' \
+               -e 's/" label="/: "/' -e 's/">.*$$/"/' > $@
+
+swlist.txt: survey.rst
+       grep '"sw"' survey.rst | sed -e 's/.*name="sw" value="//' -e 's," />,: ",' \
+               -e 's,</td>$$,",' -e 's/ "$$/ "Other"/' > $@
+
+datamodlist.txt: survey.rst
+       grep '"bg_datamod"' survey.rst | sed -e 's/.*name="bg_datamod" value="//' \
+               -e 's," />,: ",' -e 's,<br />$$,",' -e 's/ "$$/ "Other"/' > $@
+
  upload: index.html jquery.form.js survey.css
         rsync -rvzlhp --delete  \
          --exclude=Makefile --exclude='*.inc' --exclude='*.rst' \
diff --git a/survey/makestats b/survey/makestats

new file mode 100755 (executable)

index 0000000..776ab7a
--- /dev/null
+++ b/survey/makestats
@@ -0,0 +1,100 @@
+#!/usr/bin/python
+
+from glob import glob
+import json
+import sys
+import pylab as pl
+import numpy as np
+
+class DB(dict):
+    def __init__(self, srcdir):
+        # eats the whole directory
+        datafilenames = glob('%s/*.json' % srcdir)
+        for dfn in datafilenames:
+            rawdata = json.load(open(dfn))
+            self[rawdata['timestamp']] = rawdata
+
+    def get_unique(self, key):
+        # return a set of all (unique) values for a field id
+        uniq = set()
+        for d in self.values():
+            if key in d:
+                el = d[key]
+                if isinstance(el, list):
+                    uniq = uniq.union(el)
+                else:
+                    uniq = uniq.union((el,))
+        return uniq
+
+    def get_not_none(self, key):
+        # return a list of all values of a specific field id
+        # the second return value is count of submission that did not have data
+        # for this field id
+        val = []
+        missing = 0
+        for d in self.values():
+            if key in d:
+                el = d[key]
+                if isinstance(el, list):
+                    val.extend(el)
+                else:
+                    if el == 'none':
+                        missing += 1
+                    else:
+                        val.append(el)
+            else:
+                missing += 1
+        return val, missing
+
+    def get_counts(self, key):
+        # return a dict with field values as keys and respective submission 
+        # count as value
+        vals = self.get_not_none(key)[0]
+        uniq = np.unique(vals)
+        counts = dict(zip(uniq, [vals.count(u) for u in uniq]))
+        return counts
+
+    def select_match(self, key, values):
+        # return a db with all submissions were a field id has one of the
+        # supplied values
+        match = {}
+        for k, v in self.items():
+            if not key in v:
+                continue
+            el = v[key]
+            if isinstance(el, list):
+                if len(set(values).intersection(el)):
+                    match[k] = v
+            elif el in values:
+                match[k] = v
+        return match
+
+
+
+def load_list2dict(name):
+    d = {}
+    lfile = open(name)
+    for line in lfile:
+        kv = line.split(':')
+        d[kv[0]] = kv[1].strip().strip('"')
+    return d
+
+def mkpic_submissions_per_datamod(db, destdir):
+    # simple demo
+    dmd = load_list2dict('datamodlist.txt')
+    spd = db.get_counts('bg_datamod')
+    spd = sorted(spd.items(), cmp=lambda x, y: cmp(x[1], y[1]))[::-1]
+    x = np.arange(len(spd))
+    pl.figure(figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
+    pl.title('Data modality')
+    pl.bar(x, [s[1] for s in spd])
+    pl.xticks(x + 0.5,  [dmd[k[0]] for k in spd], rotation=-15)
+    pl.ylabel('Survey submissions per data modality\n(multiple choices per submission possible)')
+    pl.savefig('%s/submissions_per_datamod.png' % destdir, format='png')
+
+def main(srcdir, destdir):
+    db = DB(srcdir)
+    mkpic_submissions_per_datamod(db, destdir)
+
+if __name__ == '__main__':
+    main(sys.argv[1], sys.argv[2])
diff --git a/survey/surveycollector.cgi b/survey/surveycollector.cgi

new file mode 100755 (executable)

index 0000000..84cbeea
--- /dev/null
+++ b/survey/surveycollector.cgi
@@ -0,0 +1,104 @@
+#!/usr/bin/python
+
+import json
+import cgi
+import time
+import os
+
+def validate_form(form):
+    messages = []
+    if not 'pers_time' in form or form['pers_time'].value == 'none':
+        messages.append("Please indicate how much time you spend in your personal computing environment.")
+    if not 'man_time' in form or form['man_time'].value == 'none':
+        messages.append("Please indicate how much time you spend in a managed computing environment.")
+    if not 'virt_time' in form or form['virt_time'].value == 'none':
+        messages.append("Please indicate how often you use virtual machines.")
+    if not 'bg_datamod' in form or not len(form.getlist('bg_datamod')):
+        messages.append("Please indicate want kind of data you are working with.")
+    if 'bg_datamod' in form and 'other' in form.getlist('bg_datamod') and not form['bg_datamod_other'].value:
+        messages.append("You selected 'Other data modality' but did not specific which one.")
+    if 'pers_maint_time' in form and form['pers_maint_time'].value:
+        try:
+            t = float(form['pers_maint_time'].value)
+        except:
+            messages.append("The value you entered as maintenance effort per month needs to be a (floating point) number. For example: 1.2 or 5")
+    return messages
+
+
+def format_message(mesgs):
+    msg = '>>> We found a problem with your submission <<<\n\n'
+    return msg + '\n\n'.join(mesgs)
+
+def extract_results(form, result):
+    # simple strings
+    for ff in ["bg_country", "bg_employer", "bg_position", "bg_developer", "bg_datamod_other",
+               "pers_time", "pers_hardware", "pers_os",
+               "man_time", "man_hardware", "man_os",
+               "virt_time", "virt_other", "virt_guest_os", "virt_host_os",
+               "software_resource_other", "sw_other"]:
+        if ff in form:
+            result[ff] = form[ff].value
+    # integers
+    for ff in ["pers_r1", "pers_r2", "pers_r3", "pers_r4", "pers_r5", "pers_r6", "pers_r7", "pers_r8",
+               "man_r1", "man_r2", "man_r3", "man_r4", "man_r5",
+               "virt_r1", "virt_r2", "virt_r3", "virt_r4"]:
+        if ff in form:
+            result[ff] = int(form[ff].value)
+    # lists
+    for ff in ["bg_datamod", "virt_prod", "software_resource", "sw"]:
+        if ff in form:
+            result[ff] = form.getlist(ff)
+        
+    # special
+    if "pers_maint_time" in form and form["pers_maint_time"].value:
+        result["pers_maint_time"] = float(form["pers_maint_time"].value)
+
+    # timestamp
+    result['timestamp'] = time.time()
+
+    # IP
+    if 'REMOTE_ADDR' in os.environ:
+        result['remote_addr'] = os.environ['REMOTE_ADDR']
+
+    return result
+
+def main():
+    # list of form data keys
+    formkeys = []
+
+    # get the form data
+    form = cgi.FieldStorage(keep_blank_values=True)
+
+    # compose the server response
+    result ={}
+    result['success'] = False
+
+    # Make sure we always return something meaningful 
+    try:
+        messages = validate_form(form)
+        if not len(messages):
+            # no messages means all good
+            result['success'] = True
+            result = extract_results(form, result)
+            try:
+                logfile = open('/home/neurodebian/surveydata/%s.json' % result['timestamp'], 'w+')
+                logfile.write(json.write(result))
+                logfile.write('\n')
+                logfile.close()
+            except:
+                result['success'] = False
+                result['message'] = 'We are very sorry, but the server is unable to store your submission. Please contact team@neuro.debian.net.'
+        else:
+            result['message'] = format_message(messages)
+
+    finally:
+        # always talk to the client
+        print json.write(result)
+
+
+if __name__ == '__main__':
+    print "Content-Type: text/xhtml"    # HTML is following
+    print                               # blank line, end of headers
+
+    main()
+
author	Michael Hanke <michael.hanke@gmail.com>
	Sat, 14 May 2011 01:16:44 +0000 (21:16 -0400)
committer	Michael Hanke <michael.hanke@gmail.com>
	Sat, 14 May 2011 01:17:53 +0000 (21:17 -0400)
survey/Makefile		patch \| blob \| history
survey/makestats	[new file with mode: 0755]	patch \| blob
survey/surveycollector.cgi	[new file with mode: 0755]	patch \| blob