- introduce n_processes to change the number of processes in the pool
This commit is contained in:
4
.github/workflows/pytest.yml
vendored
4
.github/workflows/pytest.yml
vendored
@@ -11,9 +11,9 @@ jobs:
|
|||||||
os: [ubuntu-20.04, windows-2019, macOS-11]
|
os: [ubuntu-20.04, windows-2019, macOS-11]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install
|
- name: Install
|
||||||
|
|||||||
21
README.md
21
README.md
@@ -13,6 +13,17 @@ Tested on linux, Windows and OSX with python 3.10.
|
|||||||
- Using dill instead of pickle: a lot more objects can be used when parallelizing
|
- Using dill instead of pickle: a lot more objects can be used when parallelizing
|
||||||
- Progress bars are built-in
|
- Progress bars are built-in
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
The work you want parfor to do is divided over a number of processes. These processes are started by parfor and put
|
||||||
|
together in a pool. This pool is reused when you want parfor to do more work, or shut down when no new work arrives
|
||||||
|
within 10 minutes.
|
||||||
|
|
||||||
|
A handle to each bit of work is put in a queue from which the workers take work. The objects needed to do the work are
|
||||||
|
stored in a memory manager in serialized form (using dill) and the manager hands out an object to a worker when the
|
||||||
|
worker is requesting it. The manager deletes objects automatically when they're not needed anymore.
|
||||||
|
|
||||||
|
When the work is done the result is sent back for collection in the main process.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
`pip install parfor`
|
`pip install parfor`
|
||||||
|
|
||||||
@@ -41,13 +52,11 @@ iterations need to be dillable. You might be able to make objects dillable anyho
|
|||||||
desc: string with description of the progress bar
|
desc: string with description of the progress bar
|
||||||
bar: bool enable progress bar,
|
bar: bool enable progress bar,
|
||||||
or a callback function taking the number of passed iterations as an argument
|
or a callback function taking the number of passed iterations as an argument
|
||||||
pbar: bool enable buffer indicator bar, or a callback function taking the queue size as an argument
|
|
||||||
rP: ratio workers to cpu cores, default: 1
|
|
||||||
nP: number of workers, default, None, overrides rP if not None
|
|
||||||
serial: execute in series instead of parallel if True, None (default): let pmap decide
|
serial: execute in series instead of parallel if True, None (default): let pmap decide
|
||||||
qsize: maximum size of the task queue
|
|
||||||
length: deprecated alias for total
|
length: deprecated alias for total
|
||||||
**bar_kwargs: keywords arguments for tqdm.tqdm
|
n_processes: number of processes to use,
|
||||||
|
the parallel pool will be restarted if the current pool does not have the right number of processes
|
||||||
|
**bar_kwargs: keyword arguments for tqdm.tqdm
|
||||||
|
|
||||||
### Return
|
### Return
|
||||||
list with results from applying the function 'fun' to each iteration of the iterable / iterator
|
list with results from applying the function 'fun' to each iteration of the iterable / iterator
|
||||||
@@ -149,7 +158,7 @@ Since generators don't have a predefined length, give parfor the length (total)
|
|||||||
|
|
||||||
# Extra's
|
# Extra's
|
||||||
## `pmap`
|
## `pmap`
|
||||||
The function parfor decorates, use it like `map`.
|
The function parfor decorates, it's used similarly to `map`.
|
||||||
|
|
||||||
## `Chunks`
|
## `Chunks`
|
||||||
Split a long iterator in bite-sized chunks to parallelize
|
Split a long iterator in bite-sized chunks to parallelize
|
||||||
|
|||||||
@@ -256,13 +256,13 @@ class ParPool:
|
|||||||
The target function and its argument can be changed at any time.
|
The target function and its argument can be changed at any time.
|
||||||
"""
|
"""
|
||||||
def __init__(self, fun: Callable[[Any, ...], Any] = None,
|
def __init__(self, fun: Callable[[Any, ...], Any] = None,
|
||||||
args: tuple[Any] = None, kwargs: dict[str, Any] = None, bar: Bar = None):
|
args: tuple[Any] = None, kwargs: dict[str, Any] = None, n_processes: int = None, bar: Bar = None):
|
||||||
self.id = id(self)
|
self.id = id(self)
|
||||||
self.handle = 0
|
self.handle = 0
|
||||||
self.tasks = {}
|
self.tasks = {}
|
||||||
self.bar = bar
|
self.bar = bar
|
||||||
self.bar_lengths = {}
|
self.bar_lengths = {}
|
||||||
self.spool = PoolSingleton(self)
|
self.spool = PoolSingleton(n_processes, self)
|
||||||
self.manager = self.spool.manager
|
self.manager = self.spool.manager
|
||||||
self.fun = fun
|
self.fun = fun
|
||||||
self.args = args
|
self.args = args
|
||||||
@@ -372,13 +372,14 @@ class PoolSingleton:
|
|||||||
|
|
||||||
instance = None
|
instance = None
|
||||||
|
|
||||||
def __new__(cls, *args: Any, **kwargs: Any) -> PoolSingleton:
|
def __new__(cls, n_processes: int = None, *args: Any, **kwargs: Any) -> PoolSingleton:
|
||||||
if cls.instance is not None: # restart if any workers have shut down
|
# restart if any workers have shut down or if we want to have a different number of processes
|
||||||
if cls.instance.n_workers.value < cls.instance.n_processes:
|
if cls.instance is not None:
|
||||||
|
if cls.instance.n_workers.value < cls.instance.n_processes or cls.instance.n_processes != n_processes:
|
||||||
cls.instance.close()
|
cls.instance.close()
|
||||||
if cls.instance is None or not cls.instance.is_alive:
|
if cls.instance is None or not cls.instance.is_alive:
|
||||||
new = super().__new__(cls)
|
new = super().__new__(cls)
|
||||||
new.n_processes = cpu_count
|
new.n_processes = n_processes or cpu_count
|
||||||
new.instance = new
|
new.instance = new
|
||||||
new.is_started = False
|
new.is_started = False
|
||||||
ctx = Context()
|
ctx = Context()
|
||||||
@@ -396,7 +397,7 @@ class PoolSingleton:
|
|||||||
cls.instance = new
|
cls.instance = new
|
||||||
return cls.instance
|
return cls.instance
|
||||||
|
|
||||||
def __init__(self, parpool: Parpool = None) -> None: # noqa
|
def __init__(self, n_processes: int = None, parpool: Parpool = None) -> None: # noqa
|
||||||
if parpool is not None:
|
if parpool is not None:
|
||||||
self.pools[parpool.id] = parpool
|
self.pools[parpool.id] = parpool
|
||||||
|
|
||||||
@@ -457,7 +458,7 @@ class PoolSingleton:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def close(cls) -> None:
|
def close(cls) -> None:
|
||||||
if hasattr(cls, 'instance') and cls.instance is not None:
|
if cls.instance is not None:
|
||||||
instance = cls.instance
|
instance = cls.instance
|
||||||
cls.instance = None
|
cls.instance = None
|
||||||
|
|
||||||
@@ -549,7 +550,7 @@ class Worker:
|
|||||||
def pmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iteration] = None,
|
def pmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iteration] = None,
|
||||||
args: tuple[Any, ...] = None, kwargs: dict[str, Any] = None, total: int = None, desc: str = None,
|
args: tuple[Any, ...] = None, kwargs: dict[str, Any] = None, total: int = None, desc: str = None,
|
||||||
bar: Bar | bool = True, terminator: Callable[[], None] = None, serial: bool = None, length: int = None,
|
bar: Bar | bool = True, terminator: Callable[[], None] = None, serial: bool = None, length: int = None,
|
||||||
**bar_kwargs: Any) -> list[Result]:
|
n_processes: int = None, **bar_kwargs: Any) -> list[Result]:
|
||||||
""" map a function fun to each iteration in iterable
|
""" map a function fun to each iteration in iterable
|
||||||
use as a function: pmap
|
use as a function: pmap
|
||||||
use as a decorator: parfor
|
use as a decorator: parfor
|
||||||
@@ -567,6 +568,8 @@ def pmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iterat
|
|||||||
or a callback function taking the number of passed iterations as an argument
|
or a callback function taking the number of passed iterations as an argument
|
||||||
serial: execute in series instead of parallel if True, None (default): let pmap decide
|
serial: execute in series instead of parallel if True, None (default): let pmap decide
|
||||||
length: deprecated alias for total
|
length: deprecated alias for total
|
||||||
|
n_processes: number of processes to use,
|
||||||
|
the parallel pool will be restarted if the current pool does not have the right number of processes
|
||||||
**bar_kwargs: keywords arguments for tqdm.tqdm
|
**bar_kwargs: keywords arguments for tqdm.tqdm
|
||||||
|
|
||||||
output:
|
output:
|
||||||
@@ -654,7 +657,7 @@ def pmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iterat
|
|||||||
bar = stack.enter_context(ExternalBar(callback=bar))
|
bar = stack.enter_context(ExternalBar(callback=bar))
|
||||||
else:
|
else:
|
||||||
bar = stack.enter_context(tqdm(**bar_kwargs))
|
bar = stack.enter_context(tqdm(**bar_kwargs))
|
||||||
with ParPool(chunk_fun, args, kwargs, bar) as p:
|
with ParPool(chunk_fun, args, kwargs, n_processes, bar) as p:
|
||||||
for i, (j, l) in enumerate(zip(iterable, iterable.lengths)): # add work to the queue
|
for i, (j, l) in enumerate(zip(iterable, iterable.lengths)): # add work to the queue
|
||||||
p(j, handle=i, barlength=iterable.lengths[i])
|
p(j, handle=i, barlength=iterable.lengths[i])
|
||||||
if bar.total is None or bar.total < i+1:
|
if bar.total is None or bar.total < i+1:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "parfor"
|
name = "parfor"
|
||||||
version = "2024.4.0"
|
version = "2024.5.0"
|
||||||
description = "A package to mimic the use of parfor as done in Matlab."
|
description = "A package to mimic the use of parfor as done in Matlab."
|
||||||
authors = ["Wim Pomp <wimpomp@gmail.com>"]
|
authors = ["Wim Pomp <wimpomp@gmail.com>"]
|
||||||
license = "GPLv3"
|
license = "GPLv3"
|
||||||
|
|||||||
@@ -1,4 +1,9 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from os import getpid
|
||||||
|
from time import sleep
|
||||||
|
from typing import Any, Iterator, Optional, Sequence
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -6,14 +11,14 @@ from parfor import Chunks, ParPool, parfor, pmap
|
|||||||
|
|
||||||
|
|
||||||
class SequenceIterator:
|
class SequenceIterator:
|
||||||
def __init__(self, sequence):
|
def __init__(self, sequence: Sequence) -> None:
|
||||||
self._sequence = sequence
|
self._sequence = sequence
|
||||||
self._index = 0
|
self._index = 0
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self) -> SequenceIterator:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __next__(self):
|
def __next__(self) -> Any:
|
||||||
if self._index < len(self._sequence):
|
if self._index < len(self._sequence):
|
||||||
item = self._sequence[self._index]
|
item = self._sequence[self._index]
|
||||||
self._index += 1
|
self._index += 1
|
||||||
@@ -21,19 +26,19 @@ class SequenceIterator:
|
|||||||
else:
|
else:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self) -> int:
|
||||||
return len(self._sequence)
|
return len(self._sequence)
|
||||||
|
|
||||||
|
|
||||||
class Iterable:
|
class Iterable:
|
||||||
def __init__(self, sequence):
|
def __init__(self, sequence: Sequence) -> None:
|
||||||
self.sequence = sequence
|
self.sequence = sequence
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self) -> SequenceIterator:
|
||||||
return SequenceIterator(self.sequence)
|
return SequenceIterator(self.sequence)
|
||||||
|
|
||||||
|
|
||||||
def iterators():
|
def iterators() -> tuple[Iterator, Optional[int]]:
|
||||||
yield range(10), None
|
yield range(10), None
|
||||||
yield list(range(10)), None
|
yield list(range(10)), None
|
||||||
yield (i for i in range(10)), 10
|
yield (i for i in range(10)), 10
|
||||||
@@ -42,23 +47,23 @@ def iterators():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('iterator', iterators())
|
@pytest.mark.parametrize('iterator', iterators())
|
||||||
def test_chunks(iterator):
|
def test_chunks(iterator: tuple[Iterator, Optional[int]]) -> None:
|
||||||
chunks = Chunks(iterator[0], size=2, length=iterator[1])
|
chunks = Chunks(iterator[0], size=2, length=iterator[1])
|
||||||
assert list(chunks) == [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]
|
assert list(chunks) == [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]
|
||||||
|
|
||||||
|
|
||||||
def test_parpool():
|
def test_parpool() -> None:
|
||||||
def fun(i, j, k): # noqa
|
def fun(i, j, k) -> int: # noqa
|
||||||
return i * j * k
|
return i * j * k
|
||||||
|
|
||||||
with ParPool(fun, (3,), {'k': 2}) as pool:
|
with ParPool(fun, (3,), {'k': 2}) as pool: # noqa
|
||||||
for i in range(10):
|
for i in range(10):
|
||||||
pool[i] = i
|
pool[i] = i
|
||||||
|
|
||||||
assert [pool[i] for i in range(10)] == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54]
|
assert [pool[i] for i in range(10)] == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54]
|
||||||
|
|
||||||
|
|
||||||
def test_parfor():
|
def test_parfor() -> None:
|
||||||
@parfor(range(10), (3,), {'k': 2})
|
@parfor(range(10), (3,), {'k': 2})
|
||||||
def fun(i, j, k):
|
def fun(i, j, k):
|
||||||
return i * j * k
|
return i * j * k
|
||||||
@@ -66,14 +71,14 @@ def test_parfor():
|
|||||||
assert fun == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54]
|
assert fun == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54]
|
||||||
|
|
||||||
|
|
||||||
def test_pmap():
|
def test_pmap() -> None:
|
||||||
def fun(i, j, k):
|
def fun(i, j, k):
|
||||||
return i * j * k
|
return i * j * k
|
||||||
|
|
||||||
assert pmap(fun, range(10), (3,), {'k': 2}) == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54]
|
assert pmap(fun, range(10), (3,), {'k': 2}) == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54]
|
||||||
|
|
||||||
|
|
||||||
def test_id_reuse():
|
def test_id_reuse() -> None:
|
||||||
def fun(i):
|
def fun(i):
|
||||||
return i[0].a
|
return i[0].a
|
||||||
|
|
||||||
@@ -87,5 +92,16 @@ def test_id_reuse():
|
|||||||
yield t
|
yield t
|
||||||
del t
|
del t
|
||||||
|
|
||||||
a = pmap(fun, Chunks(gen(1000), size=1, length=1000), total=1000)
|
a = pmap(fun, Chunks(gen(1000), size=1, length=1000), total=1000) # noqa
|
||||||
assert all([i == j for i, j in enumerate(a)])
|
assert all([i == j for i, j in enumerate(a)])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('n_processes', (2, 4, 6))
|
||||||
|
def test_n_processes(n_processes) -> None:
|
||||||
|
|
||||||
|
@parfor(range(12), n_processes=n_processes)
|
||||||
|
def fun(i): # noqa
|
||||||
|
sleep(0.25)
|
||||||
|
return getpid()
|
||||||
|
|
||||||
|
assert len(set(fun)) == n_processes
|
||||||
|
|||||||
Reference in New Issue
Block a user