- some warnings

- better color(map) support
- make python module optional in rust
- max 48 threads
This commit is contained in:
Wim Pomp
2024-10-12 13:19:22 +02:00
parent 4d31933a38
commit 625b222a0d
9 changed files with 224 additions and 105 deletions

24
.github/workflows/publish.yml vendored Normal file
View File

@@ -0,0 +1,24 @@
name: Publish
on: workflow_dispatch
jobs:
publish_wheels:
uses: ./.github/workflows/wheels.yml
publish:
name: publish
needs: publish_wheels
runs-on: ubuntu-latest
environment: pypi
steps:
- uses: actions/download-artifact@v4
with:
# unpacks all tiffwrite artifacts into dist/
pattern: tiffwrite-*
path: dist
merge-multiple: true
- name: Publish package to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
repository-url: https://upload.pypi.org/legacy/

View File

@@ -1,6 +1,6 @@
name: PyTest
on: [push, pull_request]
on: [workflow_call, push, pull_request]
jobs:
pytest:

42
.github/workflows/wheels.yml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: Wheels
on: workflow_call
jobs:
wheels_pytest:
uses: ./.github/workflows/pytest.yml
build_wheels:
name: Build wheels on ${{ matrix.os }}
needs: [ wheels_pytest ]
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest, windows-latest, macos-13, macos-latest ]
steps:
- uses: actions/checkout@v4
- name: Build wheels
uses: pypa/cibuildwheel@v2.21.2
- uses: actions/upload-artifact@v4
with:
name: tiffwrite-wheels-${{ matrix.os }}-${{ strategy.job-index }}
path: ./wheelhouse/*.whl
build_sdist:
name: Build source distribution
needs: [ wheels_pytest ]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build sdist
run: |
pip install build
python -m build --sdist
- uses: actions/upload-artifact@v4
with:
name: tiffwrite-sdist
path: dist/*.tar.gz

View File

@@ -3,20 +3,23 @@ name = "tiffwrite"
version = "2024.10.2"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "tiffwrite"
crate-type = ["cdylib", "rlib"]
[dependencies]
pyo3 = { version = "0.21.2", features = ["extension-module", "abi3-py310", "generate-import-lib", "anyhow", "multiple-pymethods"] }
anyhow = "1.0.89"
rayon = "1.10.0"
num = "0.4.3"
ndarray = "0.15.6"
chrono = "0.4.38"
numpy = "0.21.0"
ndarray = "0.15.6"
num = "0.4.3"
rayon = "1.10.0"
zstd = "0.13.2"
numpy = { version = "0.22.0", optional = true }
[dependencies.pyo3]
version = "0.22.2"
features = ["extension-module", "abi3-py310", "generate-import-lib", "anyhow", "multiple-pymethods"]
optional = true
[features]
nopython = []
python = ["dep:pyo3", "dep:numpy"]

View File

@@ -1,23 +1,26 @@
[![mypy](https://github.com/wimpomp/tiffwrite/actions/workflows/mypy.yml/badge.svg)](https://github.com/wimpomp/tiffwrite/actions/workflows/mypy.yml)
[![pytest](https://github.com/wimpomp/tiffwrite/actions/workflows/pytest.yml/badge.svg)](https://github.com/wimpomp/tiffwrite/actions/workflows/pytest.yml)
# Tiffwrite
Exploiting [tifffile](https://pypi.org/project/tifffile/) in parallel to write BioFormats/ImageJ compatible tiffs with
good compression.
Write BioFormats/ImageJ compatible tiffs with zstd compression in parallel using Rust.
## Features
- Writes bigtiff files that open in ImageJ as hyperstack with correct dimensions.
- Parallel compression.
- Write individual frames in random order.
- Compresses even more by referencing tag or image data which otherwise would have been saved several times.
For example empty frames, or a long string tag on every frame.
For example empty frames, or a long string tag on every frame. Editing tiffs becomes mostly impossible, but compression
makes that very hard anyway.
- Enables memory efficient scripts by saving frames whenever they're ready to be saved, not waiting for the whole stack.
- Colormaps, extra tags, globally or frame dependent.
- Colormaps
- Extra tags, globally or frame dependent.
## Installation
pip install tiffwrite
or
- install [rust](https://rustup.rs/)
pip install tiffwrite@git+https://github.com/wimpomp/tiffwrite
# Usage
@@ -67,11 +70,10 @@ or
from tiffwrite import IJTiffFile
import numpy as np
shape = (3, 5, 10) # channels, z, time
with IJTiffFile('file.tif', shape, pxsize=0.09707) as tif:
for c in range(shape[0]):
for z in range(shape[1]):
for t in range(shape[2]):
with IJTiffFile('file.tif', pxsize=0.09707) as tif:
for c in range(3):
for z in range(5):
for t in range(10):
tif.save(np.random.randint(0, 10, (32, 32)), c, z, t)
## Saving multiple tiffs simultaneously
@@ -79,7 +81,7 @@ or
import numpy as np
shape = (3, 5, 10) # channels, z, time
with IJTiffFile('fileA.tif', shape) as tif_a, IJTiffFile('fileB.tif', shape) as tif_b:
with IJTiffFile('fileA.tif') as tif_a, IJTiffFile('fileB.tif') as tif_b:
for c in range(shape[0]):
for z in range(shape[1]):
for t in range(shape[2]):

View File

@@ -3,8 +3,10 @@ from __future__ import annotations
from itertools import product
from pathlib import Path
from typing import Any, Sequence
from warnings import warn
import colorcet
import matplotlib
import numpy as np
from matplotlib import colors as mpl_colors
from numpy.typing import ArrayLike, DTypeLike
@@ -12,13 +14,13 @@ from tqdm.auto import tqdm
from . import tiffwrite_rs as rs # noqa
__all__ = ['Header', 'IJTiffFile', 'IFD', 'FrameInfo', 'Tag', 'Strip', 'tiffwrite']
class Header:
pass
class IFD(dict):
pass
@@ -32,38 +34,60 @@ CZT = tuple[int, int, int]
FrameInfo = tuple[np.ndarray, None, CZT]
class TiffWriteWarning(UserWarning):
pass
class IJTiffFile(rs.IJTiffFile):
def __new__(cls, path: str | Path, shape: tuple[int, int, int] = None, dtype: DTypeLike = 'uint16',
colors: Sequence[str] = None, colormap: str = None, pxsize: float = None,
deltaz: float = None, timeinterval: float = None, compression: int = None, comment: str = None,
**extratags: Tag) -> IJTiffFile:
new = super().__new__(cls, str(path))
""" Writes a tiff file in a format that the BioFormats reader in Fiji understands.
file: filename of the new tiff file
shape: not used anymore
dtype: datatype to use when saving to tiff
colors: a tuple with a color per channel, chosen from matplotlib.colors, html colors are also possible
colormap: name of a colormap from colorcet
pxsize: pixel size in um
deltaz: z slice interval in um
timeinterval: time between frames in seconds
extratags: other tags to be saved, example: (Tag.ascii(315, 'John Doe'), Tag.bytes(4567, [400, 500])
or (Tag.ascii(33432, 'Made by me'),).
"""
def __new__(cls, path: str | Path, *args, **kwargs) -> IJTiffFile:
return super().__new__(cls, str(path))
def __init__(self, path: str | Path, shape: tuple[int, int, int] = None, dtype: DTypeLike = 'uint16',
colors: Sequence[str] = None, colormap: str = None, pxsize: float = None,
deltaz: float = None, timeinterval: float = None, compression: int = None, comment: str = None,
extratags: Sequence[Tag] = None) -> None:
self.path = Path(path)
self.shape = shape
self.dtype = np.dtype(dtype)
if compression is not None:
if isinstance(compression, Sequence):
compression = compression[-1]
new.set_compression_level(compression)
self.set_compression_level(compression)
if colors is not None:
new.colors = np.array([get_color(color) for color in colors])
self.colors = np.array([get_color(color) for color in colors])
if colormap is not None:
new.colormap = get_colormap(colormap)
self.colormap = get_colormap(colormap)
if pxsize is not None:
new.px_size = float(pxsize)
self.px_size = float(pxsize)
if deltaz is not None:
new.delta_z = float(deltaz)
self.delta_z = float(deltaz)
if timeinterval is not None:
new.time_interval = float(timeinterval)
self.time_interval = float(timeinterval)
if comment is not None:
new.comment = comment
for extra_tag in extratags:
new.append_extra_tag(extra_tag, None)
return new
def __init__(self, path: str | Path, shape: tuple[int, int, int] = None, dtype: DTypeLike = 'uint16', # noqa
colors: Sequence[str] = None, colormap: str = None, pxsize: float = None, # noqa
deltaz: float = None, timeinterval: float = None, comment: str = None, # noqa
**extratags: Tag.Value | Tag) -> None: # noqa
self.path = Path(path)
self.dtype = np.dtype(dtype)
self.comment = comment
if extratags is not None:
for extra_tag in extratags:
self.append_extra_tag(extra_tag, None)
if self.dtype.itemsize == 1 and colors is not None:
warn('Fiji will not interpret colors saved in an (u)int8 tif, save as (u)int16 instead.',
TiffWriteWarning, stacklevel=2)
if shape is not None:
warn('Providing shape is not needed anymore, the argument will be removed in the future.',
DeprecationWarning, stacklevel=2)
if colors is not None and colormap is not None:
warn('Cannot have colors and colormap simultaneously.', TiffWriteWarning, stacklevel=2)
def __enter__(self) -> IJTiffFile:
return self
@@ -71,7 +95,8 @@ class IJTiffFile(rs.IJTiffFile):
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def save(self, frame: ArrayLike, c: int, z: int, t: int) -> None:
def save(self, frame: ArrayLike, c: int, z: int, t: int, extratags: Sequence[Tag] = None) -> None:
""" save a 2d numpy array to the tiff at channel=c, slice=z, time=t, with optional extra tif tags """
for frame, _, (cn, zn, tn) in self.compress_frame(frame):
frame = np.asarray(frame).astype(self.dtype)
match self.dtype:
@@ -97,15 +122,36 @@ class IJTiffFile(rs.IJTiffFile):
self.save_f64(frame, c + cn, z + zn, t + tn)
case _:
raise TypeError(f'Cannot save type {self.dtype}')
if extratags is not None:
for extra_tag in extratags:
self.append_extra_tag(extra_tag, (c, z, t))
def compress_frame(self, frame: ArrayLike) -> tuple[FrameInfo]: # noqa
""" backwards compatibility """
return (frame, None, (0, 0, 0)),
def get_colormap(colormap: str) -> np.ndarray:
colormap = getattr(colorcet, colormap)
colormap[0] = '#ffffff'
colormap[-1] = '#000000'
return np.array([[int(''.join(i), 16) for i in zip(*[iter(s[1:])] * 2)] for s in colormap]).astype('uint8')
if hasattr(colorcet, colormap.rstrip('_r')):
cm = np.array([[int(''.join(i), 16) for i in zip(*[iter(s[1:])] * 2)]
for s in getattr(colorcet, colormap.rstrip('_r'))]).astype('uint8')
if colormap.endswith('_r'):
cm = cm[::-1]
if colormap.startswith('glasbey') or colormap.endswith('glasbey'):
cm[0] = 0, 0, 0
cm[-1] = 255, 255, 255
else:
cmap = matplotlib.colormaps.get_cmap(colormap)
if cmap.N < 256:
cm = (255 * np.vstack(((0, 0, 0),
matplotlib.cm.ScalarMappable(matplotlib.colors.Normalize(1, 254),
cmap).to_rgba(np.arange(1, 254))[:, :3],
(1, 1, 1)))).astype('uint8')
else:
cm = (255 * matplotlib.cm.ScalarMappable(matplotlib.colors.Normalize(0, 255), cmap)
.to_rgba(np.arange(256))[:, :3]).astype('uint8')
return cm
def get_color(color: str) -> np.ndarray:
return np.array([int(''.join(i), 16) for i in zip(*[iter(mpl_colors.to_hex(color)[1:])] * 2)]).astype('uint8')
@@ -131,10 +177,7 @@ def tiffwrite(file: str | Path, data: np.ndarray, axes: str = 'TZCXY', dtype: DT
data = np.expand_dims(data, axis)
shape = data.shape[:3]
with IJTiffFile(file, shape, data.dtype if dtype is None else dtype, *args, **kwargs) as f: # type: ignore
at_least_one = False
with IJTiffFile(file, dtype=data.dtype if dtype is None else dtype, *args, **kwargs) as f:
for n in tqdm(product(*[range(i) for i in shape]), total=np.prod(shape), # type: ignore
desc='Saving tiff', disable=not bar):
if np.any(data[n]) or not at_least_one:
f.save(data[n], *n)
at_least_one = True
f.save(data[n], *n)

View File

@@ -15,11 +15,11 @@ classifiers = [
dependencies = ["colorcet", "matplotlib", "numpy", "tqdm"]
[project.optional-dependencies]
test = ["pytest", "tifffile"]
test = ["pytest", "tifffile", "imagecodecs"]
[tool.maturin]
python-source = "py"
features = ["pyo3/extension-module"]
features = ["pyo3/extension-module", "python"]
module-name = "tiffwrite.tiffwrite_rs"
[tool.isort]

View File

@@ -1,4 +1,4 @@
#[cfg(not(feature = "nopython"))]
#[cfg(feature = "python")]
mod py;
use anyhow::Result;
@@ -6,26 +6,30 @@ use chrono::Utc;
use ndarray::{s, Array2};
use num::{traits::ToBytes, Complex, FromPrimitive, Rational32, Zero};
use rayon::prelude::*;
use std::{cmp::Ordering, collections::HashMap};
use std::fs::{File, OpenOptions};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::io::{copy, Read, Seek, SeekFrom, Write};
use std::{thread, thread::JoinHandle};
use zstd::{DEFAULT_COMPRESSION_LEVEL, stream::Encoder};
use std::time::Duration;
use std::{cmp::Ordering, collections::HashMap};
use std::{
thread,
thread::{sleep, JoinHandle},
};
use zstd::{stream::Encoder, DEFAULT_COMPRESSION_LEVEL};
const TAG_SIZE: usize = 20;
const OFFSET_SIZE: usize = 8;
const OFFSET: u64 = 16;
const COMPRESSION: u16 = 50000;
pub fn encode_all(source: Vec<u8>, level: i32) -> Result<Vec<u8>> {
fn encode_all(source: Vec<u8>, level: i32) -> Result<Vec<u8>> {
let mut result = Vec::<u8>::new();
copy_encode(&*source, &mut result, level, source.len() as u64)?;
Ok(result)
}
/// copy_encode from zstd crate, but let it include the content size in the zstd block header
pub fn copy_encode<R, W>(mut source: R, destination: W, level: i32, length: u64) -> Result<()>
fn copy_encode<R, W>(mut source: R, destination: W, level: i32, length: u64) -> Result<()>
where
R: Read,
W: Write,
@@ -518,7 +522,7 @@ impl IJTiffFile {
}
pub fn set_compression_level(&mut self, compression_level: i32) {
self.compression_level = compression_level;
self.compression_level = compression_level.max(-7).min(22);
}
pub fn description(&self, c_size: usize, z_size: usize, t_size: usize) -> String {
@@ -644,10 +648,17 @@ impl IJTiffFile {
.into_iter()
.map(|tile| tile.map(|x| x.bytes()).into_iter().flatten().collect())
.collect();
let bytes = byte_tiles
.into_par_iter()
.map(|x| encode_all(x, compression_level).unwrap())
.collect::<Vec<_>>();
let bytes = if byte_tiles.len() > 4 {
byte_tiles
.into_par_iter()
.map(|x| encode_all(x, compression_level).unwrap())
.collect::<Vec<_>>()
} else {
byte_tiles
.into_iter()
.map(|x| encode_all(x, compression_level).unwrap())
.collect::<Vec<_>>()
};
CompressedFrame {
bytes,
image_width,
@@ -657,22 +668,24 @@ impl IJTiffFile {
sample_format: T::SAMPLE_FORMAT,
}
}
loop {
self.collect_threads(false)?;
if self.threads.len() < 48 {
break;
}
sleep(Duration::from_millis(100));
}
let compression_level = self.compression_level;
self.threads.insert(
(c, z, t),
thread::spawn(move || compress(frame, compression_level)),
);
for key in self
.threads
.keys()
.cloned()
.collect::<Vec<(usize, usize, usize)>>()
{
if self.threads[&key].is_finished() {}
}
Ok(())
}
fn collect_threads(&mut self, block: bool) -> Result<()> {
for (c, z, t) in self.threads.keys().cloned().collect::<Vec<_>>() {
if self.threads[&(c, z, t)].is_finished() {
if block | self.threads[&(c, z, t)].is_finished() {
if let Some(thread) = self.threads.remove(&(c, z, t)) {
self.write_frame(thread.join().unwrap(), c, z, t)?;
}
@@ -739,40 +752,33 @@ impl IJTiffFile {
}
fn get_colormap(&self, colormap: &Vec<Vec<u8>>, bits_per_sample: u16) -> Vec<u16> {
if bits_per_sample == 8 {
colormap
.iter()
.flatten()
.map(|x| (*x as u16) * 256)
.collect()
} else {
colormap
.iter()
.map(|x| vec![x; 256])
.flatten()
.flatten()
.map(|x| (*x as u16) * 256)
.collect()
let mut r = Vec::new();
let mut g = Vec::new();
let mut b = Vec::new();
let n = 2usize.pow(bits_per_sample as u32 - 8);
for color in colormap {
r.extend(vec![(color[0] as u16) * 257; n]);
g.extend(vec![(color[1] as u16) * 257; n]);
b.extend(vec![(color[2] as u16) * 257; n]);
}
r.extend(g);
r.extend(b);
r
}
fn get_color(&self, colors: &Vec<u8>, bits_per_sample: u16) -> Result<Vec<u16>> {
fn get_color(&self, colors: &Vec<u8>, bits_per_sample: u16) -> Vec<u16> {
let mut c = Vec::new();
let lvl = if bits_per_sample == 8 { 255 } else { 65535 };
for i in 0..=lvl {
c.push(i * (colors[0] as u16) / 255);
c.push(i * (colors[1] as u16) / 255);
c.push(i * (colors[2] as u16) / 255);
let n = 2usize.pow(bits_per_sample as u32 - 8);
for color in colors {
for i in 0..256 {
c.extend(vec![i * (*color as u16) / 255 * 257; n])
}
}
Ok(c)
c
}
fn close(&mut self) -> Result<()> {
for (c, z, t) in self.threads.keys().cloned().collect::<Vec<_>>() {
if let Some(thread) = self.threads.remove(&(c, z, t)) {
self.write_frame(thread.join().unwrap(), c, z, t)?;
}
}
self.collect_threads(true)?;
let mut c_size = 1;
let mut z_size = 1;
let mut t_size = 1;
@@ -812,7 +818,7 @@ impl IJTiffFile {
ifd.push_tag(Tag::short(259, &vec![COMPRESSION]));
ifd.push_tag(Tag::ascii(270, &self.description(c_size, z_size, t_size)));
ifd.push_tag(Tag::short(277, &vec![frame_count as u16]));
ifd.push_tag(Tag::ascii(305, "tiffwrite_tllab_NKI"));
ifd.push_tag(Tag::ascii(305, "tiffwrite_rs"));
ifd.push_tag(Tag::short(322, &vec![frame.tile_width]));
ifd.push_tag(Tag::short(323, &vec![frame.tile_length]));
ifd.push_tag(Tag::long8(324, &offsets));
@@ -824,7 +830,6 @@ impl IJTiffFile {
let r = vec![Rational32::from_f64(px_size).unwrap()];
ifd.push_tag(Tag::rational(282, &r));
ifd.push_tag(Tag::rational(283, &r));
ifd.push_tag(Tag::short(296, &vec![1]));
}
if let Colors::Colormap(_) = &self.colors {
ifd.push_tag(Tag::short(262, &vec![3]));
@@ -839,11 +844,11 @@ impl IJTiffFile {
));
}
}
if frame_number < samples_per_pixel as usize {
if frame_number < c_size {
if let Colors::Colors(colors) = &self.colors {
ifd.push_tag(Tag::short(
320,
&self.get_color(&colors[frame_number], frame.bits_per_sample)?,
&self.get_color(&colors[frame_number], frame.bits_per_sample),
));
ifd.push_tag(Tag::short(262, &vec![3]));
}

View File

@@ -10,7 +10,7 @@ from tiffwrite import IJTiffFile
@pytest.mark.parametrize('dtype', ('uint8', 'uint16', 'uint32', 'uint64',
'int8', 'int16', 'int32', 'int64', 'float32', 'float64'))
def test_single(tmp_path: Path, dtype) -> None:
with IJTiffFile(tmp_path / 'test.tif', dtype=dtype) as tif:
with IJTiffFile(tmp_path / 'test.tif', dtype=dtype, pxsize=0.1, deltaz=0.5, timeinterval=6.5) as tif:
a0, b0 = np.meshgrid(range(100), range(100))
a0[::2, :] = 0
b0[:, ::2] = 1