Classifier SweepΒΆ
This examples shows a test of various classifiers on different datasets.
from mvpa2.suite import *
# no MVPA warnings during this example
warning.handlers = []
def main():
# fix seed or set to None for new each time
np.random.seed(44)
# Load Haxby dataset example
haxby8 = load_example_fmri_dataset(literal=True)
haxby8.samples = haxby8.samples.astype(np.float32)
# preprocess slightly
poly_detrend(haxby8, chunks_attr='chunks', polyord=1)
zscore(haxby8, chunks_attr='chunks', param_est=('targets', 'rest'))
haxby8_no0 = haxby8[haxby8.targets != 'rest']
dummy2 = normal_feature_dataset(perlabel=30, nlabels=2,
nfeatures=100,
nchunks=6, nonbogus_features=[11, 10],
snr=3.0)
for (dataset, datasetdescr), clfs_ in \
[
((dummy2,
"Dummy 2-class univariate with 2 useful features out of 100"),
clfswh[:]),
((pure_multivariate_signal(8, 3),
"Dummy XOR-pattern"),
clfswh['non-linear']),
((haxby8_no0,
"Haxby 8-cat subject 1"),
clfswh['multiclass']),
]:
# XXX put back whenever there is summary() again
#print "%s\n %s" % (datasetdescr, dataset.summary(idhash=False))
print " Classifier on %s\n" \
" : %%corr " \
"#features\t train predict full" % datasetdescr
for clf in clfs_:
print " %-40s: " % clf.descr,
# Let's prevent failures of the entire script if some particular
# classifier is not appropriate for the data
try:
# Change to False if you want to use CrossValidation
# helper, instead of going through splits manually to
# track training/prediction time of the classifiers
do_explicit_splitting = True
if not do_explicit_splitting:
cv = CrossValidation(
clf, NFoldPartitioner(), enable_ca=['stats', 'calling_time'])
error = cv(dataset)
# print cv.ca.stats
print "%5.1f%% - \t - - %.2fs" \
% (cv.ca.stats.percent_correct, cv.ca.calling_time)
continue
# To report transfer error (and possibly some other metrics)
confusion = ConfusionMatrix()
times = []
nf = []
t0 = time.time()
#TODO clf.ca.enable('nfeatures')
partitioner = NFoldPartitioner()
for nfold, ds in enumerate(partitioner.generate(dataset)):
(training_ds, validation_ds) = tuple(
Splitter(attr=partitioner.space).generate(ds))
clf.train(training_ds)
#TODO nf.append(clf.ca.nfeatures)
predictions = clf.predict(validation_ds.samples)
confusion.add(validation_ds.targets, predictions)
times.append([clf.ca.training_time, clf.ca.predicting_time])
tfull = time.time() - t0
times = np.mean(times, axis=0)
#TODO nf = np.mean(nf)
# print confusion
#TODO print "%5.1f%% %-4d\t %.2fs %.2fs %.2fs" % \
print "%5.1f%% - \t %.2fs %.2fs %.2fs" % \
(confusion.percent_correct, times[0], times[1], tfull)
#TODO (confusion.percent_correct, nf, times[0], times[1], tfull)
except LearnerError, e:
print " skipped due to '%s'" % e
if __name__ == "__main__":
main()
See also
The full source code of this example is included in the PyMVPA source distribution (doc/examples/clfs_examples.py
).