- Dill (almost) everything.

- Change dill a little to automatically omit undillable parts of objects.
- README.me Limitations
This commit is contained in:
w.pomp
2020-09-03 11:29:15 +02:00
parent 881496b8f4
commit da70cf7a2f
3 changed files with 130 additions and 13 deletions

View File

@@ -2,7 +2,8 @@
Used to parallelize for-loops using parfor in Matlab? This package allows you to do the same in python. Used to parallelize for-loops using parfor in Matlab? This package allows you to do the same in python.
Take any normal serial but parallelizable for-loop and execute it in parallel using easy syntax. Take any normal serial but parallelizable for-loop and execute it in parallel using easy syntax.
Don't worry about the technical details of using the multiprocessing module, race conditions, queues, Don't worry about the technical details of using the multiprocessing module, race conditions, queues,
parfor handles all that. parfor handles all that. Also, parfor uses dill instead of pickle, so that a lot more objects can be used
when parallelizing.
Tested on linux on python 2.7 and 3.8 Tested on linux on python 2.7 and 3.8
@@ -14,14 +15,20 @@ an iterator.
tqdm, dill tqdm, dill
## Limitations ## Limitations
Some objects cannot be passed and or used in child processes. Such objects include objects relying on Objects passed to the pool need to be dillable (dill needs to serialize them). Generators and SwigPyObjects are examples
java-bridge. Examples include reader objects from the python-bioformats package. of objects that cannot be used. They can be used however, for the iterator argument when using parfor, but its
iterations need to be dillable. You might be able to make objects dillable anyhow using dill.register.
### Required arguments: The function evaluated in parallel needs to terminate. If parfor hangs after seeming to complete the task, it probably
is because the individual processes cannot terminate. Importing javabridge (used in python-bioformats) and starting the
java virtual machine can cause it to hang since the processes only terminate after the java vm has quit.
## Arguments
### Required:
fun: function taking arguments: iteration from iterable, other arguments defined in args & kwargs fun: function taking arguments: iteration from iterable, other arguments defined in args & kwargs
iterable: iterable from which an item is given to fun as a first argument iterable: iterable from which an item is given to fun as a first argument
### Optional arguments: ### Optional:
args: tuple with other unnamed arguments to fun args: tuple with other unnamed arguments to fun
kwargs: dict with other named arguments to fun kwargs: dict with other named arguments to fun
length: give the length of the iterator in cases where len(iterator) results in an error length: give the length of the iterator in cases where len(iterator) results in an error
@@ -32,7 +39,7 @@ java-bridge. Examples include reader objects from the python-bioformats package.
serial: switch to serial if number of tasks less than serial, default: 4 serial: switch to serial if number of tasks less than serial, default: 4
debug: if an error occurs in an iteration, return the erorr instead of retrying in the main process debug: if an error occurs in an iteration, return the erorr instead of retrying in the main process
### Output ### Return
list with results from applying the decorated function to each iteration of the iterator list with results from applying the decorated function to each iteration of the iterator
specified as the first argument to the function specified as the first argument to the function

View File

@@ -1,9 +1,118 @@
from __future__ import print_function from __future__ import print_function
import sys
import multiprocessing import multiprocessing
import warnings import warnings
import dill
from tqdm.auto import tqdm from tqdm.auto import tqdm
from dill import dumps, loads
from traceback import format_exc from traceback import format_exc
from pickle import PicklingError, dispatch_table
PY3 = (sys.hexversion >= 0x3000000)
try:
from cStringIO import StringIO
except ImportError:
if PY3:
from io import BytesIO as StringIO
else:
from StringIO import StringIO
failed_rv = (lambda *args, **kwargs: None, ())
class Pickler(dill.Pickler):
""" Overload dill to ignore unpickleble parts of objects.
You probably didn't want to use these parts anyhow.
However, if you did, you'll have to find some way to make them pickleble.
"""
def save(self, obj, save_persistent_id=True):
""" Copied from pickle and amended. """
if PY3:
self.framer.commit_frame()
# Check for persistent id (defined by a subclass)
pid = self.persistent_id(obj)
if pid is not None and save_persistent_id:
self.save_pers(pid)
return
# Check the memo
x = self.memo.get(id(obj))
if x is not None:
self.write(self.get(x[0]))
return
rv = NotImplemented
reduce = getattr(self, "reducer_override", None)
if reduce is not None:
rv = reduce(obj)
if rv is NotImplemented:
# Check the type dispatch table
t = type(obj)
f = self.dispatch.get(t)
if f is not None:
f(self, obj) # Call unbound method with explicit self
return
# Check private dispatch table if any, or else
# copyreg.dispatch_table
reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
if reduce is not None:
rv = reduce(obj)
else:
# Check for a class with a custom metaclass; treat as regular
# class
if issubclass(t, type):
self.save_global(obj)
return
# Check for a __reduce_ex__ method, fall back to __reduce__
reduce = getattr(obj, "__reduce_ex__", None)
try:
if reduce is not None:
rv = reduce(self.proto)
else:
reduce = getattr(obj, "__reduce__", None)
if reduce is not None:
rv = reduce()
else:
raise PicklingError("Can't pickle %r object: %r" %
(t.__name__, obj))
except:
rv = failed_rv
# Check for string returned by reduce(), meaning "save as global"
if isinstance(rv, str):
try:
self.save_global(obj, rv)
except:
self.save_global(obj, failed_rv)
return
# Assert that reduce() returned a tuple
if not isinstance(rv, tuple):
raise PicklingError("%s must return string or tuple" % reduce)
# Assert that it returned an appropriately sized tuple
l = len(rv)
if not (2 <= l <= 6):
raise PicklingError("Tuple returned by %s must have "
"two to six elements" % reduce)
# Save the reduce() output and finally memoize the object
try:
self.save_reduce(obj=obj, *rv)
except:
self.save_reduce(obj=obj, *failed_rv)
def dumps(obj, protocol=None, byref=None, fmode=None, recurse=None, **kwds):#, strictio=None):
"""pickle an object to a string"""
protocol = dill.settings['protocol'] if protocol is None else int(protocol)
_kwds = kwds.copy()
_kwds.update(dict(byref=byref, fmode=fmode, recurse=recurse))
file = StringIO()
Pickler(file, protocol, **_kwds).dump(obj)
return file.getvalue()
def chunks(n, *args): def chunks(n, *args):
""" Yield successive n-sized chunks from lists. """ """ Yield successive n-sized chunks from lists. """
@@ -205,9 +314,9 @@ class parpool(object):
'debuggable error. If it doesn\'t, it\'s an error specific to parallel execution.' 'debuggable error. If it doesn\'t, it\'s an error specific to parallel execution.'
.format(r[1])) .format(r[1]))
warnings.formatwarning = pfmt warnings.formatwarning = pfmt
fun, args, kwargs = [loads(f[1]) for f in r[2][1:]] fun, args, kwargs = [dill.loads(f[1]) for f in r[2][1:]]
r = (False, r[1], fun(r[2][0], *args, **kwargs)) r = (False, r[1], fun(dill.loads(r[2][0]), *args, **kwargs))
self.res[r[1]] = r[2] self.res[r[1]] = dill.loads(r[2])
if not self.bar is None: if not self.bar is None:
self.bar.update() self.bar.update()
self._qbar_update() self._qbar_update()
@@ -216,6 +325,7 @@ class parpool(object):
def __call__(self, n, fun=None, args=None, kwargs=None, handle=None): def __call__(self, n, fun=None, args=None, kwargs=None, handle=None):
""" Add new iteration, using optional manually defined handle.""" """ Add new iteration, using optional manually defined handle."""
n = dumps(n, recurse=True)
if not fun is None: if not fun is None:
self.fun = fun self.fun = fun
if not args is None: if not args is None:
@@ -306,7 +416,7 @@ class parpool(object):
fun = self.get_from_cache(*Fun) fun = self.get_from_cache(*Fun)
args = self.get_from_cache(*Args) args = self.get_from_cache(*Args)
kwargs = self.get_from_cache(*Kwargs) kwargs = self.get_from_cache(*Kwargs)
self.Qo.put((False, i, fun(n, *args, **kwargs))) self.Qo.put((False, i, dumps(fun(dill.loads(n), *args, **kwargs), recurse=True)))
except multiprocessing.queues.Empty: except multiprocessing.queues.Empty:
continue continue
except: except:
@@ -320,7 +430,7 @@ class parpool(object):
hs, objs = zip(*self.cache) hs, objs = zip(*self.cache)
if h in hs: if h in hs:
return objs[hs.index(h)] return objs[hs.index(h)]
obj = loads(ser) obj = dill.loads(ser)
self.cache.append((h, obj)) self.cache.append((h, obj))
while len(self.cache) > self.cachesize: while len(self.cache) > self.cachesize:
self.cache.pop(0) self.cache.pop(0)

View File

@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup( setuptools.setup(
name="parfor", name="parfor",
version="2020.08.3", version="2020.09.2",
author="Wim Pomp", author="Wim Pomp",
author_email="wimpomp@gmail.com", author_email="wimpomp@gmail.com",
description="A package to mimic the use of parfor as done in Matlab.", description="A package to mimic the use of parfor as done in Matlab.",