- First commit.

This commit is contained in:
Wim Pomp
2021-12-03 16:23:54 +01:00
parent 2fd65d8f01
commit e7b999faa7
3 changed files with 630 additions and 2 deletions

520
tiffwrite/__init__.py Executable file
View File

@@ -0,0 +1,520 @@
import sys
import struct
import numpy as np
from io import BytesIO
from multiprocessing import Pool, Process, Queue, Event, cpu_count, Value, queues
from time import sleep
from tqdm.auto import tqdm
import tifffile
import colorcet
from itertools import product
from collections import OrderedDict
from multipledispatch import dispatch
from numbers import Number
from fractions import Fraction
def get_colormap(colormap, dtype='int8', byteorder='<'):
colormap = getattr(colorcet, colormap)
colormap[0] = '#ffffff'
colormap[-1] = '#000000'
colormap = 65535 * np.array([[int(''.join(i), 16) for i in zip(*[iter(s[1:])] * 2)] for s in colormap]) // 255
if np.dtype(dtype).itemsize == 2:
colormap = np.tile(colormap, 256).reshape((-1, 3))
return b''.join([struct.pack(byteorder + 'H', c) for c in colormap.T.flatten()])
def tiffwrite(file, data, axes='TZCXY', bar=False, colormap=None, pxsize=None):
""" file: string; filename of the new tiff file
data: 2 to 5D numpy array
axes: string; order of dimensions in data, default: TZCXY for 5D, ZCXY for 4D, CXY for 3D, XY for 2D data
bar: bool; whether or not to show a progress bar
colormap: string; choose any colormap from the colorcet module
pxsize: float; set tiff tag so ImageJ can read the pixel size
"""
axes = axes[-np.ndim(data):].upper()
if not axes == 'CZTXY':
T = [axes.find(i) for i in 'CZTXY']
E = [i for i, j in enumerate(T) if j < 0]
T = [i for i in T if i >= 0]
data = np.transpose(data, T)
for e in E:
data = np.expand_dims(data, e)
shape = data.shape[:3]
with IJTiffWriter(file, shape, data.dtype, colormap, pxsize=pxsize) as f:
at_least_one = False
for n in tqdm(product(*[range(i) for i in shape]), total=np.prod(shape), desc='Saving tiff', disable=not bar):
if np.any(data[n]) or not at_least_one:
f.save(data[n], *n)
at_least_one = True
def readheader(b):
b.seek(0)
byteorder = {b'II': '<', b'MM': '>'}[b.read(2)]
bigtiff = {42: False, 43: True}[struct.unpack(byteorder + 'H', b.read(2))[0]]
if bigtiff:
tagsize = 20
tagnoformat = 'Q'
offsetsize = struct.unpack(byteorder + 'H', b.read(2))[0]
offsetformat = {8: 'Q', 16: '2Q'}[offsetsize]
assert struct.unpack(byteorder + 'H', b.read(2))[0] == 0, 'Not a TIFF-file'
offset = struct.unpack(byteorder + offsetformat, b.read(offsetsize))[0]
else:
tagsize = 12
tagnoformat = 'H'
offsetformat = 'I'
offsetsize = 4
offset = struct.unpack(byteorder + offsetformat, b.read(offsetsize))[0]
return byteorder, bigtiff, tagsize, tagnoformat, offsetformat, offsetsize, offset
def readifd(b):
""" Reads the first IFD of the tiff file in the file handle b
wp@tl20200214
"""
byteorder, bigtiff, tagsize, tagnoformat, offsetformat, offsetsize, offset = readheader(b)
b.seek(offset)
nTags = struct.unpack(byteorder + tagnoformat, b.read(struct.calcsize(tagnoformat)))[0]
assert nTags < 4096, 'Too many tags'
addr = []
addroffset = []
length = 8 if bigtiff else 2
length += nTags * tagsize + offsetsize
tags = {}
for i in range(nTags):
pos = offset + struct.calcsize(tagnoformat) + tagsize * i
b.seek(pos)
code, ttype = struct.unpack(byteorder + 'HH', b.read(4))
count = struct.unpack(byteorder + offsetformat, b.read(offsetsize))[0]
dtype = tifffile.TIFF.DATA_FORMATS[ttype]
dtypelen = struct.calcsize(dtype)
toolong = struct.calcsize(dtype) * count > offsetsize
if toolong:
addr.append(b.tell() - offset)
caddr = struct.unpack(byteorder + offsetformat, b.read(offsetsize))[0]
addroffset.append(caddr - offset)
cp = b.tell()
b.seek(caddr)
if ttype == 1:
value = b.read(count)
elif ttype == 2:
value = b.read(count).decode('ascii').rstrip('\x00')
elif ttype == 5:
value = [struct.unpack(byteorder + dtype, b.read(dtypelen)) for _ in range(count)]
else:
value = [struct.unpack(byteorder + dtype, b.read(dtypelen))[0] for _ in range(count)]
if toolong:
b.seek(cp)
tags[code] = (ttype, value)
b.seek(offset)
return tags
def getchunks(frame):
with BytesIO(frame) as b:
tags = readifd(b)
stripoffsets = tags[273][1]
stripbytecounts = tags[279][1]
chunks = []
for o, c in zip(stripoffsets, stripbytecounts):
b.seek(o)
chunks.append(b.read(c))
return stripbytecounts, tags, chunks
def fmt_err(exc_info):
t, m, tb = exc_info
while tb.tb_next:
tb = tb.tb_next
return 'line {}: {}'.format(tb.tb_lineno, m)
def multiplexer(files, byteorder, bigtiff, Qo, V, W, E):
try:
w = {file: writer(file, v['shape'], byteorder, bigtiff, W, v['colormap'], v['dtype'], v['extratags'])
for file, v in files.items()}
for v in w.values(): # start writing all files
next(v)
while not V.is_set(): # take frames from queue and write to file
try:
frame, file, n, fmin, fmax = Qo.get(True, 0.02)
w[file].send((frame, n, fmin, fmax))
except queues.Empty:
continue
for v in w.values(): # finish writing files
v.close()
except Exception:
E.put(fmt_err(sys.exc_info()))
def writer(file, shape, byteorder, bigtiff, W, colormap=None, dtype=None, extratags=None):
""" Writes a tiff file, writer function for IJTiffWriter
file: filename of the new tiff file
shape: shape (CZT) of the data to be written
byteorder: byteorder of the file to be written, '<' or '>'
bigtiff: False: file will be normal tiff, True: file will be bigtiff
Qo: Queue from which to take the compressed frames for writing
V: Value; 1 when more frames need to be written, 0 when writer can finish
W: Value in which writer will log how many frames are written
colormap: array with 2^bitspersample x 3 values RGB
wp@tl20200214
"""
spp = shape[0] if colormap is None else 1 # samples/pixel
nframes = np.prod(shape[1:]) if colormap is None else np.prod(shape)
offsetformat, offsetsize, tagnoformat, tagsize = (('I', 4, 'H', 8), ('Q', 8, 'Q', 20))[bigtiff]
strips = {}
tags = {}
hashes = {}
N = []
def hashcheck(bvalue, offset):
addr = fh.tell()
fh.seek(offset)
same = bvalue == fh.read(len(bvalue))
fh.seek(addr)
return same
def frn(n):
if colormap is None:
return n[1] + n[2] * shape[1], n[0]
else:
return n[0] + n[1] * shape[0] + n[2] * shape[0] * shape[1], 0
def addframe(frame, n):
framenr, channel = frn(n)
stripbytecounts, tags[framenr], chunks = getchunks(frame)
stripbyteoffsets = []
for c in chunks:
hc = hash(c)
if hc in hashes and hashcheck(c, hashes[hc]): # reuse previously saved data
stripbyteoffsets.append(hashes[hc])
else:
if fh.tell() % 2:
fh.write(b'\x00')
stripbyteoffsets.append(fh.tell())
hashes[hc] = stripbyteoffsets[-1]
fh.write(c) # write the data now, ifds later
strips[(framenr, channel)] = (stripbyteoffsets, stripbytecounts)
W.value += 1
N.append(n)
return framenr, channel
def addtag(code, ttype, value):
if isinstance(ttype, str):
ttype = tifffile.TIFF.DATATYPES[ttype.upper()]
dtype = tifffile.TIFF.DATA_FORMATS[ttype]
count = len(value) // struct.calcsize(dtype) if isinstance(value, (bytes, str)) else len(value)
offset = fh.tell()
fh.write(struct.pack(byteorder + 'HH', code, ttype))
fh.write(struct.pack(byteorder + offsetformat, count))
if isinstance(value, bytes):
bvalue = value
elif isinstance(value, str):
bvalue = value.encode('ascii')
elif ttype == 5:
bvalue = b''.join([struct.pack(byteorder + dtype, *v) for v in value])
else:
bvalue = b''.join([struct.pack(byteorder + dtype, v) for v in value])
if len(bvalue) <= offsetsize:
fh.write(bvalue)
tagdata = None
else:
tagdata = (fh.tell(), bvalue)
fh.seek(offset + tagsize)
return tagdata
def addtagdata(addr, bvalue):
if fh.tell() % 2:
fh.write(b'\x00')
hc = hash(bvalue)
if hc in hashes and hashcheck(bvalue, hashes[hc]):
tagoffset = hashes[hc]
else:
tagoffset = fh.tell()
hashes[hc] = tagoffset
fh.write(bvalue)
fh.seek(addr)
fh.write(struct.pack(byteorder + offsetformat, tagoffset))
fh.seek(0, 2)
if colormap is None:
description = \
'ImageJ=1.11a\nimages={}\nslices={}\nframes={}\nhyperstack=true\nmode=grayscale\nloop=false\n'. \
format(np.prod(shape[1:]), *shape[1:])
else:
description = \
'ImageJ=1.11a\nimages={}\nchannels={}\nslices={}\nframes={}\nhyperstack=true\nmode=grayscale\nloop=false\n'. \
format(np.prod(shape), *shape)
try:
description = bytes(description, 'ascii') # python 3
except:
pass
with open(file, 'w+b') as fh:
try:
fh.write({'<': b'II', '>': b'MM'}[byteorder])
if bigtiff:
offset = 16
fh.write(struct.pack(byteorder + 'H', 43))
fh.write(struct.pack(byteorder + 'H', 8))
fh.write(struct.pack(byteorder + 'H', 0))
fh.write(struct.pack(byteorder + 'Q', offset))
else:
offset = 8
fh.write(struct.pack(byteorder + 'H', 42))
fh.write(struct.pack(byteorder + 'I', offset))
fminmax = np.tile((np.inf, -np.inf), (shape[0], 1))
while True:
frame, n, fmin, fmax = yield
fminmax[n[0]] = min(fminmax[n[0]][0], fmin), max(fminmax[n[0]][1], fmax)
addframe(frame, n)
except GeneratorExit:
if dtype.kind == 'i':
dmin, dmax = np.iinfo(dtype).min, np.iinfo(dtype).max
else:
dmin, dmax = 0, 65535
fminmax[np.isposinf(fminmax)] = dmin
fminmax[np.isneginf(fminmax)] = dmax
for i in range(fminmax.shape[0]):
if fminmax[i][0] == fminmax[i][1]:
fminmax[i] = dmin, dmax
if len(N) < np.prod(shape): # add empty frames if needed
empty_frame = None
for n in product(*[range(i) for i in shape]):
if not n in N:
framenr, channel = frn(n)
if empty_frame is None:
tag = tags[framenr] if framenr in tags.keys() else tags[list(tags.keys())[-1]]
frame = IJTiffFrame(np.zeros(tag[257][1] + tag[256][1], dtype), byteorder, bigtiff)
empty_frame = addframe(frame, n)
else:
strips[(framenr, channel)] = strips[empty_frame]
if not framenr in tags.keys():
tags[framenr] = tags[empty_frame[0]]
offset_addr = offset - offsetsize
if not colormap is None:
tags[0][320] = (3, get_colormap(colormap, dtype, byteorder))
tags[0][262] = (3, [3])
# unfortunately, ImageJ doesn't read this from bigTiff, maybe we'll figure out how to force IJ in the future
for tag in tifffile.tifffile.imagej_metadata_tag(
{'Ranges': tuple(fminmax.flatten().astype(int))}, byteorder):
tags[0][tag[0]] = ({50839: 1, 50838: 4}[tag[0]], tag[3])
for framenr in range(nframes):
stripbyteoffsets, stripbytecounts = zip(*[strips[(framenr, channel)] for channel in range(spp)])
tp, value = tags[framenr][258]
tags[framenr][258] = (tp, spp * value)
tags[framenr][270] = (2, description)
tags[framenr][273] = (16, sum(stripbyteoffsets, []))
tags[framenr][277] = (3, [spp])
tags[framenr][279] = (16, sum(stripbytecounts, []))
tags[framenr][305] = (2, b'tiffwrite_tllab_NKI')
if extratags is not None:
tags[framenr].update(extratags)
if colormap is None and shape[0] > 1:
tags[framenr][284] = (3, [2])
# write offset to this ifd in the previous one
if fh.tell() % 2:
fh.write(b'\x00')
offset = fh.tell()
fh.seek(offset_addr)
fh.write(struct.pack(byteorder + offsetformat, offset))
# write ifd
fh.seek(offset)
fh.write(struct.pack(byteorder + tagnoformat, len(tags[framenr])))
tagdata = [addtag(code, *tags[framenr][code]) for code in sorted(tags[framenr].keys())]
offset_addr = fh.tell()
fh.write(b'\x00' * offsetsize)
for i in [j for j in tagdata if j is not None]:
addtagdata(*i)
fh.seek(offset_addr)
fh.write(struct.pack(byteorder + tagnoformat, 0))
def IJTiffFrame(frame, byteorder, bigtiff):
with BytesIO() as framedata:
with tifffile.TiffWriter(framedata, bigtiff, byteorder) as t:
t.save(frame, compress=9, contiguous=True)
return framedata.getvalue()
def compressor(byteorder, bigtiff, Qi, Qo, V, E):
""" Compresses tiff frames
byteorder: byteorder of the file to be written, '<' or '>'
bigtiff: False: file will be normal tiff, True: file will be bigtiff
Qi: Queue from which new frames which need to be compressed are taken
Qo: Queue where compressed frames are stored
V: Value; 1 when more frames need to be compressed, 0 when compressor can finish
"""
try:
while not V.is_set():
try:
frame, file, n = Qi.get(True, 0.02)
if isinstance(frame, tuple):
fun, args, kwargs = frame[:3]
frame = fun(*args, **kwargs)
fmin = frame.flatten()
fmin = fmin[fmin > 0]
fmin = np.nanmin(fmin) if len(fmin) else np.inf
fmax = np.nanmax(frame)
Qo.put((IJTiffFrame(frame, byteorder, bigtiff), file, n, fmin, fmax))
except queues.Empty:
continue
except Exception:
E.put(fmt_err(sys.exc_info()))
class IJTiffWriter():
""" Class for writing ImageJ big tiff files using good compression and multiprocessing to compress quickly
Usage:
with IJTiffWriter(file, shape) as t:
t.save(frame, c, z, t)
file: string; filename of the new tiff file, or list of filenames.
shape: iterable; shape (C, Z, T) of data to be written in file, or list of shapes.
dtype: cast data to dtype before saving, only (u)int8, (u)int16 and float32 are supported.
colormap: string; choose any colormap from the colorcet module.
nP: int; number of compressor workers to use
extratags: dict {int tagnr: (int type, data)}, extra tags to save on every frame, will cause a crash if not used correctly!
pxsize: float; set tiff tag so ImageJ can read the pixel size (in um).
frame: 2D numpy array with data
c, z, t: channel, z, time coordinates of the frame
"""
# TODO: better error handling
# TODO: extratags per frame, handled by save method
# TODO: extratags sanity check
def __init__(self, file, shape, dtype='uint16', colormap=None, nP=None, extratags=None, pxsize=None):
files = [file] if isinstance(file, str) else file
shapes = [shape] if isinstance(shape[0], Number) else shape # CZT
dtypes = [np.dtype(dtype)] if isinstance(dtype, (str, np.dtype)) else [np.dtype(d) for d in dtype]
colormaps = [colormap] if colormap is None or isinstance(colormap, str) else colormap
extratagss = [extratags] if extratags is None or isinstance(extratags, dict) else extratags
pxsizes = [pxsize] if pxsize is None or isinstance(pxsize, Number) else pxsize
for i, pxsize in enumerate(pxsizes):
if pxsize is not None:
res = Fraction(pxsize).limit_denominator(2 ** 31 - 1)
res = [res.denominator, res.numerator]
extratagss[i] = {**(extratagss[i] or {}), **{282: (5, [res]), 283: (5, [res])}}
nFiles = len(files)
if not len(shapes) == nFiles:
shapes *= nFiles
if not len(dtypes) == nFiles:
dtypes *= nFiles
if not len(colormaps) == nFiles:
colormaps *= nFiles
if not len(extratagss) == nFiles:
extratagss *= nFiles
self.files = OrderedDict((file,
{'shape': shape, 'dtype': dtype, 'colormap': colormap, 'frames': [], 'extratags': extratags})
for file, shape, dtype, colormap, extratags in zip(files, shapes, dtypes, colormaps, extratagss)
if len(file))
assert np.all([len(s) == 3 for s in shapes]), 'please specify all c, z, t for the shape'
assert np.all([d.char in 'BbHhf' for d in dtypes]), 'datatype not supported'
self.bigtiff = True # normal tiff also possible, but should be opened by bioformats in ImageJ
self.byteorder = '<'
self.nP = nP or max(2, min(cpu_count() // 6, np.prod(shape)))
self.Qi = Queue(10 * self.nP)
self.Qo = Queue(10 * self.nP)
self.E = Queue()
self.V = Event()
self.W = Value('i', 0)
self.Compressor = Pool(self.nP, compressor, (self.byteorder, self.bigtiff, self.Qi, self.Qo, self.V, self.E))
self.Writer = Process(target=multiplexer, args=(self.files, self.byteorder, self.bigtiff, self.Qo, self.V,
self.W, self.E))
self.Writer.start()
@dispatch(object, Number, Number, Number)
def save(self, frame, *n):
self.save(next(iter(self.files.keys())), frame, *n)
@dispatch(Number, object, Number, Number, Number)
def save(self, filenr, frame, *n):
self.save(list(self.files.keys())[filenr], frame, *n)
@dispatch(str, object, Number, Number, Number)
def save(self, file, frame, *n):
assert file in self.files, 'file was not opened by {}'.format(self)
assert n not in self.files[file]['frames'], 'frame {} {} {} is present already'.format(*n)
assert all([0 <= i < s for i, s in zip(n, self.files[file]['shape'])]), \
'frame {} {} {} is outside shape {} {} {}'.format(n[0], n[1], n[2], *self.files[file]['shape'])
if not self.E.empty():
print(self.E.get())
# fun, args, kwargs, dshape = frame
if not isinstance(frame, tuple):
assert frame.ndim == 2, 'data must be 2 dimensional'
if not self.files[file]['dtype'] is None:
frame = frame.astype(self.files[file]['dtype'])
self.files[file]['frames'].append(n)
self.Qi.put((frame, file, n))
def close(self):
nFrames = sum([len(v['frames']) for v in self.files.values()])
if self.W.value < nFrames:
with tqdm(total=nFrames, leave=False, desc='Finishing writing frames',
disable=(nFrames - self.W.value) < 100) as bar:
while self.W.value < nFrames:
if not self.E.empty():
print(self.E.get())
break
bar.n = self.W.value
bar.refresh()
sleep(0.02)
bar.n = sum([len(v['frames']) for v in self.files.values()])
bar.refresh()
self.V.set()
while not self.Qi.empty():
self.Qi.get()
self.Qi.close()
self.Qi.join_thread()
while not self.Qo.empty():
self.Qo.get()
self.Qo.close()
self.Qo.join_thread()
while not self.E.empty():
print(self.E.get())
self.E.close()
self.Compressor.close()
self.Compressor.join()
self.Writer.join(5)
if self.Writer.is_alive():
self.Writer.terminate()
self.Writer.join(5)
if self.Writer.is_alive():
print('Writer process won''t close.')
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
self.close()