From f62b711692d75523b527fd85542dbf379c9cefa4 Mon Sep 17 00:00:00 2001 From: Wim Pomp Date: Wed, 9 Oct 2024 15:07:38 +0200 Subject: [PATCH] - add tiffwrite function for python - parallel zstd compression --- .github/workflows/mypy.yml | 21 -------- .gitignore | 2 + Cargo.toml | 3 +- {tiffwrite => py/tiffwrite}/__init__.py | 51 ++++++++++++++---- pyproject.toml | 6 ++- src/lib.rs | 72 +++++++++++++------------ src/py.rs | 15 ++++-- 7 files changed, 97 insertions(+), 73 deletions(-) delete mode 100644 .github/workflows/mypy.yml rename {tiffwrite => py/tiffwrite}/__init__.py (64%) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml deleted file mode 100644 index 9146014..0000000 --- a/.github/workflows/mypy.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: MyPy - -on: [push, pull_request] - -jobs: - mypy: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.12"] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: pip install .[test] - - name: Test with mypy - run: mypy . \ No newline at end of file diff --git a/.gitignore b/.gitignore index f5af85d..889ae51 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ /.pytest_cache/ /venv/ /target/ +/Cargo.lock +/foo.tif diff --git a/Cargo.toml b/Cargo.toml index 71603b6..0bc729b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,12 +12,11 @@ crate-type = ["cdylib", "rlib"] pyo3 = { version = "0.21.2", features = ["extension-module", "abi3-py310", "generate-import-lib", "anyhow", "multiple-pymethods"] } anyhow = "1.0.89" rayon = "1.10.0" -fraction = "0.15.3" num = "0.4.3" ndarray = "0.15.6" chrono = "0.4.38" numpy = "0.21.0" -futures = "0.3.31" +zstd = "0.13.2" [features] nopython = [] diff --git a/tiffwrite/__init__.py b/py/tiffwrite/__init__.py similarity index 64% rename from tiffwrite/__init__.py rename to py/tiffwrite/__init__.py index ebf2282..bef208d 100644 --- a/tiffwrite/__init__.py +++ b/py/tiffwrite/__init__.py @@ -1,16 +1,22 @@ from __future__ import annotations -import numpy as np -from typing import Any, Self, Sequence +from itertools import product from pathlib import Path +from typing import Any, Sequence + +import numpy as np from numpy.typing import ArrayLike, DTypeLike +from tqdm.auto import tqdm -from . import tiffwrite as rs +from . import tiffwrite_rs as rs # noqa -__all__ = ['Tag', 'IJTiffFile', 'tiffwrite'] +__all__ = ['Header', 'IJTiffFile', 'IFD', 'FrameInfo', 'Tag', 'Strip', 'tiffwrite'] +class Header: + pass + class IFD(dict): pass @@ -19,11 +25,16 @@ class Tag(rs.Tag): pass +Strip = tuple[list[int], list[int]] +CZT = tuple[int, int, int] +FrameInfo = tuple[IFD, Strip, CZT] + + class IJTiffFile(rs.IJTiffFile): def __new__(cls, path: str | Path, shape: tuple[int, int, int], dtype: DTypeLike = 'uint16', colors: Sequence[str] = None, colormap: str = None, pxsize: float = None, deltaz: float = None, timeinterval: float = None, comment: str = None, - **extratags: Tag.Value | Tag) -> None: + **extratags: Tag.Value | Tag) -> IJTiffFile: new = super().__new__(cls, str(path), shape) if colors is not None: new = new.with_colors(colors) @@ -41,14 +52,14 @@ class IJTiffFile(rs.IJTiffFile): new = new.extend_extratags(extratags) return new - def __init__(self, path: str | Path, shape: tuple[int, int, int], dtype: DTypeLike = 'uint16', - colors: Sequence[str] = None, colormap: str = None, pxsize: float = None, - deltaz: float = None, timeinterval: float = None, comment: str = None, - **extratags: Tag.Value | Tag) -> None: + def __init__(self, path: str | Path, shape: tuple[int, int, int], dtype: DTypeLike = 'uint16', # noqa + colors: Sequence[str] = None, colormap: str = None, pxsize: float = None, # noqa + deltaz: float = None, timeinterval: float = None, comment: str = None, # noqa + **extratags: Tag.Value | Tag) -> None: # noqa self.path = Path(path) self.dtype = np.dtype(dtype) - def __enter__(self) -> Self: + def __enter__(self) -> IJTiffFile: return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -81,7 +92,7 @@ class IJTiffFile(rs.IJTiffFile): raise TypeError(f'Cannot save type {self.dtype}') -def tiffwrite(file: str | Path, data: ArrayLike, axes: str = 'TZCXY', dtype: DTypeLike = None, bar: bool = False, +def tiffwrite(file: str | Path, data: np.ndarray, axes: str = 'TZCXY', dtype: DTypeLike = None, bar: bool = False, *args: Any, **kwargs: Any) -> None: """ file: string; filename of the new tiff file data: 2 to 5D numpy array @@ -90,3 +101,21 @@ def tiffwrite(file: str | Path, data: ArrayLike, axes: str = 'TZCXY', dtype: DTy bar: bool; whether to show a progress bar other args: see IJTiffFile """ + + axes = axes[-np.ndim(data):].upper() + if not axes == 'CZTXY': + axes_shuffle = [axes.find(i) for i in 'CZTXY'] + axes_add = [i for i, j in enumerate(axes_shuffle) if j < 0] + axes_shuffle = [i for i in axes_shuffle if i >= 0] + data = np.transpose(data, axes_shuffle) + for axis in axes_add: + data = np.expand_dims(data, axis) + + shape = data.shape[:3] + with IJTiffFile(file, shape, data.dtype if dtype is None else dtype, *args, **kwargs) as f: # type: ignore + at_least_one = False + for n in tqdm(product(*[range(i) for i in shape]), total=np.prod(shape), # type: ignore + desc='Saving tiff', disable=not bar): + if np.any(data[n]) or not at_least_one: + f.save(data[n], *n) + at_least_one = True diff --git a/pyproject.toml b/pyproject.toml index 484a62e..7e13236 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,9 +13,13 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] +[project.optional-dependencies] +test = ["pytest"] + [tool.maturin] +python-source = "py" features = ["pyo3/extension-module"] -module-name = "tiffwrite" +module-name = "tiffwrite.tiffwrite_rs" [tool.isort] line_length = 119 \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 0953369..e900cf5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,19 +6,19 @@ use std::collections::HashMap; use std::fs::{File, OpenOptions}; use std::io::{Read, Seek, SeekFrom, Write}; use anyhow::Result; -use fraction::Fraction; -use num::{Complex, Zero}; -use num::complex::ComplexFloat; +use num::{Complex, Rational32, Zero}; use ndarray::{s, Array2}; use num::traits::ToBytes; use std::hash::{DefaultHasher, Hash, Hasher}; use chrono::Utc; +use zstd::stream::encode_all; +use rayon::prelude::*; const TAG_SIZE: usize = 20; const OFFSET_SIZE: usize = 8; const OFFSET: u64 = 16; -const COMPRESSION: u16 = 1; +const COMPRESSION: u16 = 50000; #[derive(Clone, Debug)] @@ -116,10 +116,10 @@ impl Tag { Tag::new(code, long.into_iter().map(|x| x.to_le_bytes()).flatten().collect(), 4) } - pub fn rational(code: u16, rational: Vec) -> Self { + pub fn rational(code: u16, rational: Vec) -> Self { Tag::new(code, rational.into_iter().map(|x| - u32::try_from(*x.denom().unwrap()).unwrap().to_le_bytes().into_iter().chain( - u32::try_from(*x.numer().unwrap()).unwrap().to_le_bytes()).collect::>() + u32::try_from(*x.denom()).unwrap().to_le_bytes().into_iter().chain( + u32::try_from(*x.numer()).unwrap().to_le_bytes()).collect::>() ).flatten().collect(), 5) } @@ -135,10 +135,10 @@ impl Tag { Tag::new(code, slong.into_iter().map(|x| x.to_le_bytes()).flatten().collect(), 9) } - pub fn srational(code: u16, srational: Vec) -> Self { + pub fn srational(code: u16, srational: Vec) -> Self { Tag::new(code, srational.into_iter().map(|x| - i32::try_from(*x.denom().unwrap()).unwrap().to_le_bytes().into_iter().chain( - i32::try_from(*x.numer().unwrap()).unwrap().to_le_bytes()).collect::>() + i32::try_from(*x.denom()).unwrap().to_le_bytes().into_iter().chain( + i32::try_from(*x.numer()).unwrap().to_le_bytes()).collect::>() ).flatten().collect(), 10) } @@ -162,7 +162,7 @@ impl Tag { pub fn complex(code: u16, complex: Vec>) -> Self { Tag::new(code, complex.into_iter().map(|x| - x.re().to_le_bytes().into_iter().chain(x.im().to_le_bytes()).collect::>() + x.re.to_le_bytes().into_iter().chain(x.im.to_le_bytes()).collect::>() ).flatten().collect(), 15) } @@ -237,7 +237,7 @@ impl Tag { #[derive(Clone, Debug)] struct Frame { - tilebyteoffsets: Vec, + tileoffsets: Vec, tilebytecounts: Vec, image_width: u32, image_length: u32, @@ -250,11 +250,11 @@ struct Frame { impl Frame { fn new( - tilebyteoffsets: Vec, tilebytecounts: Vec, image_width: u32, image_length: u32, + tileoffsets: Vec, tilebytecounts: Vec, image_width: u32, image_length: u32, bits_per_sample: u16, sample_format: u16, tile_width: u16, tile_length: u16 ) -> Self { Frame { - tilebyteoffsets, tilebytecounts, image_width, image_length, bits_per_sample, + tileoffsets, tilebytecounts, image_width, image_length, bits_per_sample, sample_format, tile_width, tile_length, extra_tags: Vec::new() } } @@ -276,8 +276,7 @@ macro_rules! bytes_impl { const SAMPLE_FORMAT: u16 = $sample_format; #[inline] - fn bytes(&self) -> Vec - { + fn bytes(&self) -> Vec { self.to_le_bytes().to_vec() } } @@ -293,7 +292,6 @@ bytes_impl!(u128, 128, 1); bytes_impl!(usize, 64, 1); #[cfg(target_pointer_width = "32")] bytes_impl!(usize, 32, 1); - bytes_impl!(i8, 8, 2); bytes_impl!(i16, 16, 2); bytes_impl!(i32, 32, 2); @@ -422,30 +420,38 @@ impl IJTiffFile { pub fn save(&mut self, frame: Array2, c: usize, z: usize, t: usize, extra_tags: Option>) -> Result<()> { - self.compress_frame(frame, c, z, t, extra_tags); + self.compress_frame(frame.reversed_axes(), c, z, t, extra_tags)?; Ok(()) } - fn compress_frame(&mut self, frame: Array2, c: usize, z: usize, t: usize, - extra_tags: Option>) { + fn compress_frame(&mut self, frame: Array2, + c: usize, z: usize, t: usize, + extra_tags: Option>) -> Result<()> { let image_width = frame.shape()[0] as u32; let image_length = frame.shape()[1] as u32; - let mut tilebyteoffsets = Vec::new(); + let tile_size = 2usize.pow(((image_width as f64 * image_length as f64 / 64f64).log2() / 2f64).round() as u32).max(16).min(1024); + let mut tileoffsets = Vec::new(); let mut tilebytecounts = Vec::new(); - let tiles = IJTiffFile::tile(frame.reversed_axes(), 64); - for tile in tiles { - let bytes: Vec = tile.map(|x| x.bytes()).into_iter().flatten().collect(); - tilebytecounts.push(bytes.len() as u64); - tilebyteoffsets.push(self.write(&bytes).unwrap()); + let tiles = IJTiffFile::tile(frame.reversed_axes(), tile_size); + let byte_tiles: Vec> = tiles.into_iter().map( + |tile| tile.map(|x| x.bytes()).into_iter().flatten().collect() + ).collect(); + for tile in byte_tiles.into_par_iter().map(|x| encode_all(&*x, 3)).collect::>() { + if let Ok(bytes) = tile { + tilebytecounts.push(bytes.len() as u64); + tileoffsets.push(self.write(&bytes)?); + } } - let mut frame = Frame::new(tilebyteoffsets, tilebytecounts, image_width, image_length, - T::BITS_PER_SAMPLE, T::SAMPLE_FORMAT, 64, 64); + + let mut frame = Frame::new(tileoffsets, tilebytecounts, image_width, image_length, + T::BITS_PER_SAMPLE, T::SAMPLE_FORMAT, tile_size as u16, tile_size as u16); if let Some(tags) = extra_tags { for tag in tags { frame.extra_tags.push(tag); } } self.frames.insert(self.get_frame_number(c, z, t), frame); + Ok(()) } fn tile(frame: Array2, size: usize) -> Vec> { @@ -484,11 +490,11 @@ impl IJTiffFile { tiles } - fn get_colormap(&self, colormap: &Vec) -> Result> { + fn get_colormap(&self, _colormap: &Vec) -> Result> { todo!(); } - fn get_color(&self, colors: (u8, u8, u8)) -> Result> { + fn get_color(&self, _colors: (u8, u8, u8)) -> Result> { todo!(); } @@ -497,12 +503,12 @@ impl IJTiffFile { let mut warn = false; for frame_number in 0..self.n_frames { if let Some(frame) = self.frames.get(&(frame_number, 0)) { - let mut tilebyteoffsets = Vec::new(); + let mut tileoffsets = Vec::new(); let mut tilebytecounts = Vec::new(); let mut frame_count = 0; for channel in 0..self.samples_per_pixel { if let Some(frame_n) = self.frames.get(&(frame_number, channel)) { - tilebyteoffsets.extend(frame_n.tilebyteoffsets.iter()); + tileoffsets.extend(frame_n.tileoffsets.iter()); tilebytecounts.extend(frame_n.tilebytecounts.iter()); frame_count += 1; } else { @@ -519,7 +525,7 @@ impl IJTiffFile { ifd.push_tag(Tag::ascii(305, "tiffwrite_rs")); ifd.push_tag(Tag::short(322, vec![frame.tile_width])); ifd.push_tag(Tag::short(323, vec![frame.tile_length])); - ifd.push_tag(Tag::long8(324, tilebyteoffsets)); + ifd.push_tag(Tag::long8(324, tileoffsets)); ifd.push_tag(Tag::long8(325, tilebytecounts)); ifd.push_tag(Tag::short(339, vec![frame.sample_format])); if frame_number == 0 { diff --git a/src/py.rs b/src/py.rs index e8f92ef..5da17e5 100644 --- a/src/py.rs +++ b/src/py.rs @@ -1,7 +1,6 @@ use pyo3::prelude::*; use crate::{IJTiffFile, Tag}; -use fraction::Fraction; -use num::Complex; +use num::{Complex, Rational32, FromPrimitive}; use numpy::{PyReadonlyArray2, PyArrayMethods}; @@ -36,7 +35,7 @@ impl PyTag { #[staticmethod] fn rational(code: u16, rational: Vec) -> Self { - PyTag { tag: Tag::rational(code, rational.into_iter().map(|x| Fraction::from(x)).collect()) } + PyTag { tag: Tag::rational(code, rational.into_iter().map(|x| Rational32::from_f64(x).unwrap()).collect()) } } #[staticmethod] @@ -56,7 +55,7 @@ impl PyTag { #[staticmethod] fn srational(code: u16, srational: Vec) -> Self { - PyTag { tag: Tag::srational(code, srational.into_iter().map(|x| Fraction::from(x)).collect()) } + PyTag { tag: Tag::srational(code, srational.into_iter().map(|x| Rational32::from_f64(x).unwrap()).collect()) } } #[staticmethod] @@ -98,6 +97,10 @@ impl PyTag { fn ifd8(code: u16, ifd8: Vec) -> Self { PyTag { tag: Tag::ifd8(code, ifd8) } } + + fn count(&self) -> u64 { + self.tag.count() + } } @@ -197,8 +200,10 @@ impl_save!(i64, save_i64); impl_save!(f32, save_f32); impl_save!(f64, save_f64); + #[pymodule] -fn tiffwrite(m: &Bound<'_, PyModule>) -> PyResult<()> { +#[pyo3(name = "tiffwrite_rs")] +fn tiffwrite_rs(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; Ok(())