2 #emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
3 #ex: set sts=4 ts=4 sw=4 noet:
4 #------------------------- =+- Python script -+= -------------------------
7 @date Tue May 24 10:28:28 2011
11 Yaroslav Halchenko Dartmouth
12 web: http://www.onerussian.com College
13 e-mail: yoh@onerussian.com ICQ#: 60653192
17 COPYRIGHT: Yaroslav Halchenko 2011
21 Permission is hereby granted, free of charge, to any person obtaining a copy
22 of this software and associated documentation files (the "Software"), to deal
23 in the Software without restriction, including without limitation the rights
24 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 copies of the Software, and to permit persons to whom the Software is
26 furnished to do so, subject to the following conditions:
28 The above copyright notice and this permission notice shall be included in
29 all copies or substantial portions of the Software.
31 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
39 #-----------------\____________________________________/------------------
41 __author__ = 'Yaroslav Halchenko'
42 __revision__ = '$Revision: $'
44 __copyright__ = 'Copyright (c) 2011 Yaroslav Halchenko'
48 import os, sys, glob, json, re
50 from mvpa.base import verbose
56 blacklist = ['1305808539.9.json', '1305808540.1.json', '1305808541.03.json', # persistent and curious mind-ware guy from Israel
62 cedspike='ced *spike2*', # NEW: http://www.ced.co.uk/pru.shtml?spk4wglu.htm
63 datariver='exp control: datariver', # NEW: http://sccn.ucsd.edu/wiki/DataSuite
64 eeglab='(eeglab|http://sccn.ucsd.edu/eeglab/)',
65 emse='emse', # REFRESH
66 erplab='erplab', # NEW: ERPLAB
67 klusters='klusters.*', # REFRESH
68 netstation='egi net station', # NEW: EGI Net Station
69 neuroscan='(curry|neuroscan(| curry))', # REFRESH
70 neuroscope='.*neuroscope', # REFRESH
71 nutmeg='.*nutmeg', # NEW
76 dtistudio='dti-*studio', # NEW: or MRIStudio?
77 brainsight='brainsight', # NEW: BrainSight
78 nordicice='nordic ice', # NEW: NordicICE -- just 1
80 xmedcon='xmedcon', # NEW
85 statistica='statistica', # NEW
86 java='java', # REFRESH
89 neuroml='neuroml', # NEW: NeuroML -- more of a framework/standard than software
90 xpp='xpp(|y|aut)', # REFRESH: XPP/XPPAUT and Python interface
93 asf='asf', # NEW: ASF http://code.google.com/p/asf/
94 cogent='cogent(|2000)', # REFRESH
95 crsvsg='crs toolbox.*', # NEW: CRS VSG Toolbox http://www.crsltd.com/catalog/vsgtoolbox/
96 mindware='mind-ware', # NEW: MindWare
97 nordicaktiva='nordic aktiva', # NEW: NordicActiva -- just 1 http://www.nordicneurolab.com/Products_and_Solutions/Clinical_Software_Solutions/nordicActiva.aspx http://www.nordicneurolab.com/Products_and_Solutions/Clinical_Software_Solutions/nordicAktiva.aspx
98 superlab='superlab', # REFRESH
99 psignifit='psignifit(|3)', # NEW
104 '|my overall time.*|separate work.*|60% windows'
105 '|.*my own .*software'
106 # Different generic visualization solutions
107 '|gnupot|.*gnu plot.*xmgrace|mayavi|matplotlib'
111 # Really cool one for graphs
115 # DB with imaging data (Italy?) but just once
116 '|loris multi-site database system'
117 # More languages/platforms?
118 '|.net|haskel|gsl|cuda'
120 '|theano|pygame|numpy|mdp|joblib|scipy|pytables|sympy'
122 '|scikits-learn|probid .*'
125 # Python IDE?? quite nice btw
128 '|.*magnetic source locator.*' # Some kind of MEG inverse solver -- publications but no public project
134 for d in dataout, dataorig:
135 if os.path.exists(d):
141 infiles = glob.glob(os.path.join(datain, '*.json'))
143 #infiles = glob.glob(os.path.join(datain, '1305741725.57.json'))
145 fname = os.path.basename(f)
146 if fname in blacklist:
147 verbose(1, "Skipping %s because of blacklist" % f)
151 verbose(5, "Loading %s" % f)
152 j = json.load(open(f))
153 json.dump(j, open(os.path.join(dataorig, fname), 'w'), indent=2)
154 for ofield, osubs in all_subs.iteritems():
155 if not (ofield in j and j[ofield]):
158 values = [x.strip().lower() for x in re.split('[+,|;]', csv)]
159 values = [v for v in values if len(v)]
160 original_values = values[:]
161 verbose(3, "Working on %s: %r" % (ofield, values))
162 for sfield, ssubs in osubs.iteritems():
163 srecord = copy(j.get(sfield, []))
164 old_srecord = j.get(sfield, [])
165 for name, regex in ssubs.iteritems():
166 for i, v in enumerate(values):
167 if v is not None and re.match(regex, v):
168 # Found a match -- need to adjust the record
169 # and replace with None in values
171 if name in old_srecord:
172 verbose(1, "Value %s is already in %s=%s" % (v, sfield, old_srecord))
174 verbose(4, "Adding value %s for %s to %s" % (v, name, sfield))
176 if sfield == 'ignore':
177 # unhandled[v] = unhandled.get(v, 0) + 1
180 refreshed[name] = refreshed.get(name, 0) + 1
181 values = [v for v in values if v is not None]
182 if sfield == 'ignore':
183 verbose(4, "Skipping ignore")
185 if srecord != old_srecord:
186 verbose(4, "Adjusting %s to %s" % (old_srecord, srecord))
189 verbose(4, "Left unhandled: %s" % (values,))
191 unhandled[v] = unhandled.get(v, 0) + 1
192 verbose(3, "Storing file %s" % fname)
193 json.dump(j, open(os.path.join(dataout, fname), 'w'), indent=2)
194 #open(os.path.join(dataout, fname), 'w').write(json.write(j))
197 keys = sorted(d.keys())
198 return '\n '.join(["%s: %d" % (k, d[k]) for k in keys])
200 verbose(1, "=== Refreshed ===\n %s" % ppd(refreshed))
201 verbose(1, "=== Unhandled ===\n %s" % ppd(unhandled))
202 verbose(1, "=== Skipped: %d" % skipped)