From cbeca59989aa908622dac578665120dc670e927f Mon Sep 17 00:00:00 2001 From: Wim Pomp Date: Tue, 5 Dec 2023 13:45:56 +0100 Subject: [PATCH] - Zstd decompression for czi files. --- ndbioimage/readers/cziread.py | 135 ++++++++++++++++++++++++++++++++++ pyproject.toml | 5 +- 2 files changed, 138 insertions(+), 2 deletions(-) diff --git a/ndbioimage/readers/cziread.py b/ndbioimage/readers/cziread.py index 013a414..43d276b 100644 --- a/ndbioimage/readers/cziread.py +++ b/ndbioimage/readers/cziread.py @@ -1,16 +1,151 @@ import re from abc import ABC from functools import cached_property +from io import BytesIO from itertools import product from pathlib import Path import czifile +import imagecodecs import numpy as np from lxml import etree from ome_types import model +from tifffile import repeat_nd from .. import AbstractReader +try: + # TODO: use zoom from imagecodecs implementation when available + from scipy.ndimage.interpolation import zoom +except ImportError: + try: + from ndimage.interpolation import zoom + except ImportError: + zoom = None + + +def zstd_decode(data: bytes) -> bytes: + """ decode zstd bytes, copied from BioFormats ZeissCZIReader """ + def read_var_int(stream: BytesIO) -> int: + a = stream.read(1)[0] + if a & 128: + b = stream.read(1)[0] + if b & 128: + c = stream.read(1)[0] + return (c << 14) | ((b & 127) << 7) | (a & 127) + return (b << 7) | (a & 127) + return a & 255 + + try: + with BytesIO(data) as stream: + size_of_header = read_var_int(stream) + high_low_unpacking = False + while stream.tell() < size_of_header: + chunk_id = read_var_int(stream) + # only one chunk ID defined so far + if chunk_id == 1: + high_low_unpacking = (stream.read(1)[0] & 1) == 1 + else: + raise ValueError(f'Invalid chunk id: {chunk_id}') + pointer = stream.tell() + except Exception: + high_low_unpacking = False + pointer = 0 + + decoded = imagecodecs.zstd_decode(data[pointer:]) + if high_low_unpacking: + second_half = len(decoded) // 2 + return bytes([decoded[second_half + i // 2] if i % 2 else decoded[i // 2] for i in range(len(decoded))]) + else: + return decoded + + +def data(self, raw=False, resize=True, order=0): + """Read image data from file and return as numpy array.""" + DECOMPRESS = czifile.czifile.DECOMPRESS + DECOMPRESS[5] = imagecodecs.zstd_decode + DECOMPRESS[6] = zstd_decode + + de = self.directory_entry + fh = self._fh + if raw: + with fh.lock: + fh.seek(self.data_offset) + data = fh.read(self.data_size) + return data + if de.compression: + # if de.compression not in DECOMPRESS: + # raise ValueError('compression unknown or not supported') + with fh.lock: + fh.seek(self.data_offset) + data = fh.read(self.data_size) + data = DECOMPRESS[de.compression](data) + if de.compression == 2: + # LZW + data = np.fromstring(data, de.dtype) # noqa + elif de.compression in (5, 6): + # ZSTD + data = np.frombuffer(data, de.dtype) + else: + dtype = np.dtype(de.dtype) + with fh.lock: + fh.seek(self.data_offset) + data = fh.read_array(dtype, self.data_size // dtype.itemsize) + + data = data.reshape(de.stored_shape) + if de.compression != 4 and de.stored_shape[-1] in (3, 4): + if de.stored_shape[-1] == 3: + # BGR -> RGB + data = data[..., ::-1] + else: + # BGRA -> RGBA + tmp = data[..., 0].copy() + data[..., 0] = data[..., 2] + data[..., 2] = tmp + if de.stored_shape == de.shape or not resize: + return data + + # sub / supersampling + factors = [j / i for i, j in zip(de.stored_shape, de.shape)] + factors = [(int(round(f)) if abs(f - round(f)) < 0.0001 else f) + for f in factors] + + # use repeat if possible + if order == 0 and all(isinstance(f, int) for f in factors): + data = repeat_nd(data, factors).copy() + data.shape = de.shape + return data + + # remove leading dimensions with size 1 for speed + shape = list(de.stored_shape) + i = 0 + for s in shape: + if s != 1: + break + i += 1 + shape = shape[i:] + factors = factors[i:] + data.shape = shape + + # resize RGB components separately for speed + if zoom is None: + raise ImportError("cannot import 'zoom' from scipy or ndimage") + if shape[-1] in (3, 4) and factors[-1] == 1.0: + factors = factors[:-1] + old = data + data = np.empty(de.shape, de.dtype[-2:]) + for i in range(shape[-1]): + data[..., i] = zoom(old[..., i], zoom=factors, order=order) + else: + data = zoom(data, zoom=factors, order=order) + + data.shape = de.shape + return data + + +# monkeypatch zstd into czifile +czifile.czifile.SubBlockSegment.data = data + class Reader(AbstractReader, ABC): priority = 0 diff --git a/pyproject.toml b/pyproject.toml index 8b879cf..a7729e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ndbioimage" -version = "2023.12.0" +version = "2023.12.1" description = "Bio image reading, metadata and some affine registration." authors = ["W. Pomp "] license = "GPLv3" @@ -15,7 +15,7 @@ python = "^3.8" numpy = "*" pandas = "*" tifffile = "*" -czifile = "*" +czifile = "2019.7.2" tiffwrite = "*" ome-types = "^0.4.0" pint = "*" @@ -26,6 +26,7 @@ parfor = ">=2023.10.1" JPype1 = "*" SimpleITK-SimpleElastix = "*" scikit-image = "*" +imagecodecs = "*" pytest = { version = "*", optional = true } [tool.poetry.extras]