Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions patsy/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
import ast
import numbers
import six
from six.moves import cPickle as pickle
from patsy import PatsyError
from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling
from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling, check_pickle_version
from patsy.tokens import (pretty_untokenize, normalize_token_spacing,
python_tokenize)
from patsy.compat import call_and_wrap_exc
Expand Down Expand Up @@ -565,7 +566,26 @@ def eval(self, memorize_state, data):
memorize_state,
data)

__getstate__ = no_pickling
def __getstate__(self):
return (0, self.code, self.origin)

def __setstate__(self, state):
(version, code, origin) = state
check_pickle_version(version, 0, self.__class__.__name__)
self.code = code
self.origin = origin

def test_EvalFactor_pickle_saves_origin():
# The pickling tests use object equality before and after pickling
# to test that pickling worked correctly. But EvalFactor's origin field
# is not used in equality comparisons, so we need a separate test to
# test that it is being pickled.
ORIGIN = 123456
f = EvalFactor('a', ORIGIN)
new_f = pickle.loads(pickle.dumps(f))

assert f.origin is not None
assert f.origin == new_f.origin

def test_EvalFactor_basics():
e = EvalFactor("a+b")
Expand All @@ -577,8 +597,6 @@ def test_EvalFactor_basics():
assert e.origin is None
assert e2.origin == "asdf"

assert_no_pickling(e)

def test_EvalFactor_memorize_passes_needed():
from patsy.state import stateful_transform
foo = stateful_transform(lambda: "FOO-OBJ")
Expand Down
24 changes: 24 additions & 0 deletions patsy/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Exhaustive end-to-end tests of the top-level API.

import sys
from six.moves import cPickle as pickle
import __future__
import six
import numpy as np
Expand Down Expand Up @@ -758,3 +759,26 @@ def test_C_and_pandas_categorical():
[[1, 0],
[1, 1],
[1, 0]])

def test_pickle_builder_roundtrips():
formulas = ["a + b",
"center(i) + a * b + np.log(x)"]
dataset = {"i": range(3),
"x": [1.1, 2.2, 3.3],
"a": list("abc"),
"b": list("xyx")}

for formula in formulas:
yield check_pickle_builder_roundtrips, formula, dataset

def check_pickle_builder_roundtrips(formula, dataset):
design_matrix = dmatrix(formula, dataset)
# TODO Make new_dataset have different values from dataset?
new_dataset = dataset

m1 = dmatrix(design_matrix.design_info, new_dataset)

unpickled_design_info = pickle.loads(pickle.dumps(design_matrix.design_info))
m2 = dmatrix(unpickled_design_info, new_dataset)

assert np.allclose(m1, m2)
76 changes: 76 additions & 0 deletions patsy/test_pickling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from __future__ import print_function

import six
from six.moves import cPickle as pickle

import os
import shutil

from patsy import EvalFactor

PICKE_TESTCASES_ROOTDIR = os.path.join(os.path.dirname(__file__), '..', 'pickle_testcases')

pickling_testcases = {
"evalfactor_simple": EvalFactor("a+b"),
}

def test_pickling_same_version_roundtrips():
for obj in six.itervalues(pickling_testcases):
yield (check_pickling_same_version_roundtrips, obj)

def check_pickling_same_version_roundtrips(obj):
assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL))

def test_pickling_old_versions_still_work():
for (dirpath, dirnames, filenames) in os.walk(PICKE_TESTCASES_ROOTDIR):
for fname in filenames:
if os.path.splitext(fname)[1] == '.pickle':
yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname)

def check_pickling_old_versions_still_work(pickle_filename):
testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0]
with open(pickle_filename, 'rb') as f:
# When adding features to a class, it will happen that there is no
# way to make an instance of that version version of that class
# equal to any instance of a previous version. How do we handle
# that?
# Maybe adding a minimum version requirement to each test?
assert pickling_testcases[testcase_name] == pickle.load(f)

def test_unpickling_future_gives_sensible_error_msg():
# TODO How would we go about testing this?
pass

def create_pickles(version):
# TODO Add options to overwrite pickles directory, with force=True
# during development.
# TODO Add safety check that said force=True option will still give an
# error when trying to remove pickles for a released version, by
# comparing the version argument here with patsy.__version__.
pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version)
if os.path.exists(pickle_testcases_dir):
raise OSError("{} already exists. Aborting.".format(pickle_testcases_dir))
pickle_testcases_tempdir = pickle_testcases_dir + "_inprogress"
os.mkdir(pickle_testcases_tempdir)

try:
for name, obj in six.iteritems(pickling_testcases):
with open(os.path.join(pickle_testcases_tempdir, "{}.pickle".format(name)), "wb") as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
except Exception:
print("Exception during creation of pickles for {}. " \
"Removing partially created directory.".format(version))
shutil.rmtree(pickle_testcases_tempdir)
raise
finally:
os.rename(pickle_testcases_tempdir, pickle_testcases_dir)
print("Successfully created pickle testcases for new version {}.".format(version))

if __name__ == "__main__":
import argparse

arg_parser = argparse.ArgumentParser(description="Create and save pickle testcases for a new version of patsy.")
arg_parser.add_argument("version", help="The version of patsy for which to save a new set of pickle testcases.")
args = arg_parser.parse_args()

create_pickles(args.version)
22 changes: 21 additions & 1 deletion patsy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
]

import sys
from nose.tools import assert_raises
import numpy as np
import six
from six.moves import cStringIO as StringIO
Expand Down Expand Up @@ -698,7 +699,6 @@ def no_pickling(*args, **kwargs):

def assert_no_pickling(obj):
import pickle
from nose.tools import assert_raises
assert_raises(NotImplementedError, pickle.dumps, obj)

# Use like:
Expand All @@ -720,3 +720,23 @@ def test_safe_string_eq():
assert safe_string_eq(unicode("foo"), "foo")

assert not safe_string_eq(np.empty((2, 2)), "foo")

def check_pickle_version(version, required_version, name=""):
if version > required_version:
error_msg = "This version of patsy is too old to load this pickle"
elif version < required_version:
error_msg = "This pickle is too old and not supported by this version of patsy anymore"
else:
return

if name:
error_msg += " (for object {})".format(name)
error_msg += "."

# TODO Use a better exception than ValueError.
raise ValueError(error_msg)

def test_check_pickle_version():
assert_raises(ValueError, check_pickle_version, 0, 1)
assert_raises(ValueError, check_pickle_version, 1, 0)
check_pickle_version(0, 0)
Binary file added pickle_testcases/0.5/evalfactor_simple.pickle
Binary file not shown.
1 change: 1 addition & 0 deletions release-checklist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* verify that the ">97% coverage" claim in overview.rst is still true.
* cd docs; make clean html -- check that there are no warnings
* check MANIFEST.in
* run python -m patsy.test_pickling NEW_VERSION_NUMBER
* update version in doc/changes.rst, patsy/version.py
* make sure there are no uncommitted changes
* clone a clean source directory (so as to get a clean checkout
Expand Down