From fb7757828f3a8c82364fd61feb363f8cfebdbd9d Mon Sep 17 00:00:00 2001 From: Wim Pomp Date: Wed, 11 Sep 2024 13:08:03 +0200 Subject: [PATCH] - fix serial computation - some more tests --- README.md | 14 +++++++------- parfor/__init__.py | 26 +++++++++++++++++++++++--- pyproject.toml | 2 +- tests/test_parfor.py | 23 +++++++++++++++++++++-- 4 files changed, 52 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8d10976..e5ef78a 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ iterations need to be dillable. You might be able to make objects dillable anyho fun = [] for i in range(10): sleep(1) - fun.append(a*i**2) + fun.append(a * i ** 2) print(fun) >> [0, 3, 12, 27, 48, 75, 108, 147, 192, 243] @@ -84,7 +84,7 @@ iterations need to be dillable. You might be able to make objects dillable anyho @parfor(range(10), (3,)) def fun(i, a): sleep(1) - return a*i**2 + return a * i ** 2 print(fun) >> [0, 3, 12, 27, 48, 75, 108, 147, 192, 243] @@ -93,7 +93,7 @@ iterations need to be dillable. You might be able to make objects dillable anyho @parfor(range(10), (3,), bar=False) def fun(i, a): sleep(1) - return a*i**2 + return a * i ** 2 print(fun) >> [0, 3, 12, 27, 48, 75, 108, 147, 192, 243] @@ -110,7 +110,7 @@ use the `if __name__ == '__main__':` structure: @parfor(range(10), (3,)) def fun(i, a): sleep(1) - return a*i**2 + return a * i ** 2 print(fun) >> [0, 3, 12, 27, 48, 75, 108, 147, 192, 243] @@ -124,7 +124,7 @@ or: @parfor(range(10), (3,)) def fun(i, a): sleep(1) - return a*i**2 + return a * i ** 2 return fun if __name__ == '__main__': @@ -140,7 +140,7 @@ pmap maps an iterator to a function like map does, but in parallel from time import sleep def fun(i, a): sleep(1) - return a*i**2 + return a * i ** 2 print(pmap(fun, range(10), (3,))) >> [0, 3, 12, 27, 48, 75, 108, 147, 192, 243] @@ -170,5 +170,5 @@ Split a long iterator in bite-sized chunks to parallelize ## `ParPool` More low-level accessibility to parallel execution. Submit tasks and request the result at any time, -(although necessarily submit first, then request a specific task), use different functions and function +(although to avoid breaking causality, submit first, then request), use different functions and function arguments for different tasks. diff --git a/parfor/__init__.py b/parfor/__init__.py index 5bfc0f7..7083674 100644 --- a/parfor/__init__.py +++ b/parfor/__init__.py @@ -655,10 +655,30 @@ def gmap(fun: Callable[[Iteration, Any, ...], Result], iterable: Iterable[Iterat if 'disable' not in bar_kwargs: bar_kwargs['disable'] = not bar if serial is True or (serial is None and len(iterable) < min(cpu_count, 4)) or Worker.nested: # serial case - if callable(bar): - return sum([chunk_fun(c, *args, **kwargs) for c in ExternalBar(iterable, bar)], []) + + def tqdm_chunks(chunks: Chunks, *args, **kwargs) -> Iterable[Iteration]: # noqa + with tqdm(*args, **kwargs) as b: + for chunk, length in zip(chunks, chunks.lengths): # noqa + yield chunk + b.update(length) + + iterable = (ExternalBar(iterable, bar, sum(iterable.lengths)) if callable(bar) + else tqdm_chunks(iterable, **bar_kwargs)) + if is_chunked: + if yield_index: + for i, c in enumerate(iterable): + yield i, chunk_fun(c, *args, **kwargs) + else: + for c in iterable: + yield chunk_fun(c, *args, **kwargs) else: - return sum([chunk_fun(c, *args, **kwargs) for c in tqdm(iterable, **bar_kwargs)], []) + if yield_index: + for i, c in enumerate(iterable): + yield i, chunk_fun(c, *args, **kwargs)[0] + else: + for c in iterable: + yield chunk_fun(c, *args, **kwargs)[0] + else: # parallel case with ExitStack() as stack: if callable(bar): diff --git a/pyproject.toml b/pyproject.toml index b19a67d..6019a13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "parfor" -version = "2024.9.0" +version = "2024.9.1" description = "A package to mimic the use of parfor as done in Matlab." authors = ["Wim Pomp "] license = "GPLv3" diff --git a/tests/test_parfor.py b/tests/test_parfor.py index e78b32f..091beb3 100644 --- a/tests/test_parfor.py +++ b/tests/test_parfor.py @@ -71,11 +71,30 @@ def test_parfor() -> None: assert fun == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54] -def test_pmap() -> None: +@pytest.mark.parametrize('serial', (True, False)) +def test_pmap(serial) -> None: def fun(i, j, k): return i * j * k - assert pmap(fun, range(10), (3,), {'k': 2}) == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54] + assert pmap(fun, range(10), (3,), {'k': 2}, serial=serial) == [0, 6, 12, 18, 24, 30, 36, 42, 48, 54] + + +@pytest.mark.parametrize('serial', (True, False)) +def test_pmap_with_idx(serial) -> None: + def fun(i, j, k): + return i * j * k + + assert (pmap(fun, range(10), (3,), {'k': 2}, serial=serial, yield_index=True) == + [(0, 0), (1, 6), (2, 12), (3, 18), (4, 24), (5, 30), (6, 36), (7, 42), (8, 48), (9, 54)]) + + +@pytest.mark.parametrize('serial', (True, False)) +def test_pmap_chunks(serial) -> None: + def fun(i, j, k): + return [i_ * j * k for i_ in i] + + chunks = Chunks(range(10), size=2) + assert pmap(fun, chunks, (3,), {'k': 2}, serial=serial) == [[0, 6], [12, 18], [24, 30], [36, 42], [48, 54]] def test_id_reuse() -> None: