- add gmap: function like pmap, but returning a generator instead of a list

- add arguments for returning results out/in order and returning result indices
This commit is contained in:
Wim Pomp
2024-09-05 18:37:47 +02:00
parent 29203dd128
commit 4d80316244
4 changed files with 58 additions and 16 deletions

View File

@@ -7,8 +7,8 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
python-version: ["3.10"] python-version: ["3.10", "3.12"]
os: [ubuntu-20.04, windows-2019, macOS-11] os: [ubuntu-latest, windows-latest, macOS-latest]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4

View File

@@ -6,7 +6,7 @@ Take any normal serial but parallelizable for-loop and execute it in parallel us
Don't worry about the technical details of using the multiprocessing module, race conditions, queues, Don't worry about the technical details of using the multiprocessing module, race conditions, queues,
parfor handles all that. parfor handles all that.
Tested on linux, Windows and OSX with python 3.10. Tested on linux, Windows and OSX with python 3.10 and 3.12.
## Why is parfor better than just using multiprocessing? ## Why is parfor better than just using multiprocessing?
- Easy to use - Easy to use
@@ -56,6 +56,8 @@ iterations need to be dillable. You might be able to make objects dillable anyho
length: deprecated alias for total length: deprecated alias for total
n_processes: number of processes to use, n_processes: number of processes to use,
the parallel pool will be restarted if the current pool does not have the right number of processes the parallel pool will be restarted if the current pool does not have the right number of processes
yield_ordered: return the result in the same order as the iterable
yield_index: return the index of the result too
**bar_kwargs: keyword arguments for tqdm.tqdm **bar_kwargs: keyword arguments for tqdm.tqdm
### Return ### Return
@@ -158,7 +160,10 @@ Since generators don't have a predefined length, give parfor the length (total)
# Extra's # Extra's
## `pmap` ## `pmap`
The function parfor decorates, it's used similarly to `map`. The function parfor decorates, it's used similarly to `map`, it returns a list with the results.
## `gmap`
Same as pmap, but returns a generator. Useful to use the result as soon as it's generated.
## `Chunks` ## `Chunks`
Split a long iterator in bite-sized chunks to parallelize Split a long iterator in bite-sized chunks to parallelize

View File

@@ -8,7 +8,7 @@ from importlib.metadata import version
from os import devnull, getpid from os import devnull, getpid
from time import time from time import time
from traceback import format_exc from traceback import format_exc
from typing import Any, Callable, Hashable, Iterable, Iterator, NoReturn, Optional, Protocol, Sized, TypeVar from typing import Any, Callable, Generator, Hashable, Iterable, Iterator, NoReturn, Optional, Protocol, Sized, TypeVar
from warnings import warn from warnings import warn
from tqdm.auto import tqdm from tqdm.auto import tqdm
@@ -551,10 +551,11 @@ class Worker:
self.n_workers.value -= 1 self.n_workers.value -= 1
def pmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iteration] = None, def gmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iteration] = None,
args: tuple[Any, ...] = None, kwargs: dict[str, Any] = None, total: int = None, desc: str = None, args: tuple[Any, ...] = None, kwargs: dict[str, Any] = None, total: int = None, desc: str = None,
bar: Bar | bool = True, terminator: Callable[[], None] = None, serial: bool = None, length: int = None, bar: Bar | bool = True, terminator: Callable[[], None] = None, serial: bool = None, length: int = None,
n_processes: int = None, **bar_kwargs: Any) -> list[Result]: n_processes: int = None, yield_ordered: bool = True, yield_index: bool = False,
**bar_kwargs: Any) -> Generator[Result, None, None]:
""" map a function fun to each iteration in iterable """ map a function fun to each iteration in iterable
use as a function: pmap use as a function: pmap
use as a decorator: parfor use as a decorator: parfor
@@ -574,10 +575,13 @@ def pmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iterat
length: deprecated alias for total length: deprecated alias for total
n_processes: number of processes to use, n_processes: number of processes to use,
the parallel pool will be restarted if the current pool does not have the right number of processes the parallel pool will be restarted if the current pool does not have the right number of processes
yield_ordered: return the result in the same order as the iterable
yield_index: return the index of the result too
**bar_kwargs: keywords arguments for tqdm.tqdm **bar_kwargs: keywords arguments for tqdm.tqdm
output: output:
list with results from applying the function \'fun\' to each iteration of the iterable / iterator list (pmap) or generator (gmap) with results from applying the function \'fun\' to each iteration
of the iterable / iterator
examples: examples:
<< from time import sleep << from time import sleep
@@ -663,16 +667,49 @@ def pmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iterat
bar = stack.enter_context(tqdm(**bar_kwargs)) bar = stack.enter_context(tqdm(**bar_kwargs))
with ParPool(chunk_fun, args, kwargs, n_processes, bar) as p: with ParPool(chunk_fun, args, kwargs, n_processes, bar) as p:
for i, (j, l) in enumerate(zip(iterable, iterable.lengths)): # add work to the queue for i, (j, l) in enumerate(zip(iterable, iterable.lengths)): # add work to the queue
p(j, handle=i, barlength=iterable.lengths[i]) p(j, handle=i, barlength=l)
if bar.total is None or bar.total < i + 1: if bar.total is None or bar.total < i + 1:
bar.total = i + 1 bar.total = i + 1
if is_chunked: if is_chunked:
return [p[i] for i in range(len(iterable))] if yield_ordered:
if yield_index:
for i in range(len(iterable)):
yield i, p[i]
else: else:
return sum([p[i] for i in range(len(iterable))], []) # collect the results for i in range(len(iterable)):
yield p[i]
else:
if yield_index:
for _ in range(len(iterable)):
yield p.get_newest()
else:
for _ in range(len(iterable)):
yield p.get_newest()[1]
else:
if yield_ordered:
if yield_index:
for i in range(len(iterable)):
yield i, p[i][0]
else:
for i in range(len(iterable)):
yield p[i][0]
else:
if yield_index:
for _ in range(len(iterable)):
i, n = p.get_newest()
yield i, n[0]
else:
for _ in range(len(iterable)):
yield p.get_newest()[1][0]
@wraps(pmap) @wraps(gmap)
def pmap(*args, **kwargs) -> list[Result]:
return list(gmap(*args, **kwargs)) # type: ignore
@wraps(gmap)
def parfor(*args: Any, **kwargs: Any) -> Callable[[Callable[[Iteration, Any, ...], Result]], list[Result]]: def parfor(*args: Any, **kwargs: Any) -> Callable[[Callable[[Iteration, Any, ...], Result]], list[Result]]:
def decfun(fun: Callable[[Iteration, Any, ...], Result]) -> list[Result]: def decfun(fun: Callable[[Iteration, Any, ...], Result]) -> list[Result]:
return pmap(fun, *args, **kwargs) return pmap(fun, *args, **kwargs)

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "parfor" name = "parfor"
version = "2024.7.1" version = "2024.9.0"
description = "A package to mimic the use of parfor as done in Matlab." description = "A package to mimic the use of parfor as done in Matlab."
authors = ["Wim Pomp <wimpomp@gmail.com>"] authors = ["Wim Pomp <wimpomp@gmail.com>"]
license = "GPLv3" license = "GPLv3"