import os, sys, glob, json, re, shutil
from copy import copy
from mvpa.base import verbose
+from common import *
+
verbose.level = 2
datain = 'data'
dataout = 'dataout'
blacklist = ['1305808539.9.json', '1305808540.1.json', '1305808541.03.json', # persistent and curious mind-ware guy from Israel
]
-all_subs = dict(
- sw_other_name=dict(
- sw_electro=dict(
- cedspike='ced *spike2*', # NEW: http://www.ced.co.uk/pru.shtml?spk4wglu.htm
- datariver='exp control: datariver', # NEW: http://sccn.ucsd.edu/wiki/DataSuite
- eeglab='(eeglab|http://sccn.ucsd.edu/eeglab/)',
- emse='emse', # REFRESH
- erplab='erplab', # NEW: ERPLAB
- klusters='klusters.*', # REFRESH
- netstation='egi net station', # NEW: EGI Net Station
- neuroscan='(curry|neuroscan(| curry))', # REFRESH
- neuroscope='.*neuroscope', # REFRESH
- nutmeg='.*nutmeg', # NEW
- ),
- sw_img=dict(
- mricron='mricrogl',
- afni='afni for bci',
- dtistudio='dti-*studio', # NEW: or MRIStudio?
- brainsight='brainsight', # NEW: BrainSight
- nordicice='nordic ice', # NEW: NordicICE -- just 1
- trackvis='trackvis',
- xmedcon='xmedcon', # NEW
- ),
- sw_general=dict(
- lua='lua', # NEW
- stata='stata', # NEW
- statistica='statistica', # NEW
- java='java', # REFRESH
- ),
- sw_neusys=dict(
- neuroml='neuroml', # NEW: NeuroML -- more of a framework/standard than software
- xpp='xpp(|y|aut)', # REFRESH: XPP/XPPAUT and Python interface
- ),
- sw_psychphys=dict(
- asf='asf', # NEW: ASF http://code.google.com/p/asf/
- cogent='cogent(|2000)', # REFRESH
- crsvsg='crs toolbox.*', # NEW: CRS VSG Toolbox http://www.crsltd.com/catalog/vsgtoolbox/
- mindware='mind-ware', # NEW: MindWare
- nordicaktiva='nordic aktiva', # NEW: NordicActiva -- just 1 http://www.nordicneurolab.com/Products_and_Solutions/Clinical_Software_Solutions/nordicActiva.aspx http://www.nordicneurolab.com/Products_and_Solutions/Clinical_Software_Solutions/nordicAktiva.aspx
- superlab='superlab', # REFRESH
- psignifit='psignifit(|3)', # NEW
- ),
- ignore=dict(ignore=
- '(zsh vim mutt git'
- # just ignore
- '|my overall time.*|separate work.*|60% windows'
- '|.*my own .*software'
- # Different generic visualization solutions
- '|gnupot|.*gnu plot.*xmgrace|mayavi|matplotlib'
- '|trackvis'
- '|opengl|itk|vtk'
- '|paraview'
- # Really cool one for graphs
- '|gephi'
- # Generic DBs
- '|mysql|postgresql'
- # DB with imaging data (Italy?) but just once
- '|loris multi-site database system'
- # More languages/platforms?
- '|.net|haskel|gsl|cuda'
- # Python lovers
- '|theano|pygame|numpy|mdp|joblib|scipy|pytables|sympy'
- # ML toolboxes
- '|scikits-learn|probid .*'
- # Reference managers
- '|mendeley|jabref'
- # Python IDE?? quite nice btw
- '|spyder'
- # Move into survey?
- '|.*magnetic source locator.*' # Some kind of MEG inverse solver -- publications but no public project
- ')'
- ),
- ),
- )
for d in dataout, dataorig:
if os.path.exists(d):
ips[(ip, agent)] = previous_entries + [j]
json.dump(j, open(os.path.join(dataorig, fname), 'w'), indent=2)
- for ofield, osubs in all_subs.iteritems():
+ for ofield, osubs in entries_to_refresh.iteritems():
if not (ofield in j and j[ofield]):
continue
csv = j[ofield]
for sfield, ssubs in osubs.iteritems():
srecord = copy(j.get(sfield, []))
old_srecord = j.get(sfield, [])
- for name, regex in ssubs.iteritems():
+ for name, (regex, isnew) in ssubs.iteritems():
for i, v in enumerate(values):
if v is not None and re.match(regex, v):
# Found a match -- need to adjust the record
for v in values:
unhandled[v] = unhandled.get(v, 0) + 1
verbose(3, "Storing file %s" % fname)
+ # shorten IP
+ j['remote_addr'] = '.'.join(j['remote_addr'].split('.')[:2]) + '.x.x'
json.dump(j, open(os.path.join(dataout, fname), 'w'), indent=2)
#open(os.path.join(dataout, fname), 'w').write(json.write(j))