Source code for spotlob.spim

"""A Spim is the object holding the images and metadata.
It has methods, that return a Spim of the next stage.
For example, a blank, empty Spim can be created and is then
in the stage `SpimStage.new`. It contains only the information
where to find the image file. If `Spim.read(Writer)` is called,
a new Spim is returned, which contains the image data and is at
stage `SpimStage.loaded`.

Here is a list of the stages that a Spim can be in and in between,
the methods that return a Spim of the next stage.

.. graphviz::

    strict digraph {
        node [shape=box, width=2]

        0 [label="new", target="_top"];
        1 [label="loaded"];
        2 [label="converted"];
        3 [label="preprocessed", below=2];
        4 [label="binarized", below=1];
        5 [label="postprocessed", below=0];
        6 [label="features_extracted"];
        7 [label="features_filtered"];
        8 [label="analyzed"];
        9 [label="stored"];

        {rank=same;
            0 -> 1 [label="read"];
            1 -> 2 [label="convert"];
        }
        2 -> 3 [label="preprocess"];
        {rank=same;
            4 -> 3 [label="binarize", dir="back"];
            5 -> 4 [label="postprocess", dir="back"];
        }
        5 -> 6 [label="extract_features"];
        {rank=same;
            6 -> 7 [label="filter_features"];
            7 -> 8 [label="analyze"];
        }
        8 -> 9 [label="store"];
    }

With every step, information is collected. A spim at a later stage
does not duplicate the image data from former stages. However, if this
data is still needed, it can contain a reference to its predecessors.
"""


import pandas


[docs]class SpimStage(object): """Enumeration of the stages that a Spim can go through""" new = 0 loaded = 1 converted = 2 preprocessed = 3 binarized = 4 postprocessed = 5 features_extracted = 6 features_filtered = 7 analyzed = 8 stored = 9
[docs]class Spim(object): """Spotlob image item""" # TODO: describe nature of Spim, immutable concept def __init__(self, image, metadata, stage, cached, predecessors): """A Spim is a **Spotlob image item**, an object representing an image and the metadata that is collected along the process through a pipeline. Parameters ---------- image : numpy array an image metadata : dict the data desribing the image and containing results stage : SpimStage the stage along the pipeline the image has passed cached : bool if this is true, a reference to predecessors of this Spim are stored and they are kept in memory. This is required if a process step is to be repeated predecessors : dict(SpimStage, Spim) a registry of predecessors of the current spim, stored alongside the stage they are in """ self._image = image self.metadata = metadata self.stage = stage self.cached = cached self.predecessors = predecessors if image is not None: self.metadata.update({"image_shape": image.shape})
[docs] @classmethod def from_file(cls, image_filepath, cached=False): """Create a Spim object from an image file. The path is stored in the Spim object, but the image is not yet loaded. Parameters ---------- image_filepath : str Path to an image file. The image type must be understood by the reader that is given when the `read`-function is called. If an invalid image type is given at this stage, it will not be recognized cached : bool, optional If the spim is to be cached, a reference to predecessors will be kept and not be deleted by the garbage collector. This allows to go back to an earlier stage after applying processes, but is more memory consuming. (the default is False) Returns ------- Spim An empty Spim at SpimStage.new, that does not contain any data except the filepath """ md = {"filepath": image_filepath} return Spim(None, md, SpimStage.new, cached=cached, predecessors=dict())
@property def image(self): """Gives the image contained in this Spim or in the latest predecessor, that has an image Raises ------ Exception Exception is raised if no image is present, most likely because it has not been cached Returns ------- numpy.array latest image """ if not (self._image is None): return self._image elif self.cached: return self.predecessor_image() else: raise Exception("image not found, has not been cached") def predecessor_image(self): predecessor_stages = self.predecessors.keys() predecessor_stages = sorted(predecessor_stages) for i in predecessor_stages[::-1]: p = self.predecessors[i] if not (p.image is None): return p.image raise Exception("no image found") def read(self, reader): im, metadata = reader.apply(self.metadata["filepath"]) metadata.update(self.metadata) metadata.update({"image_shape": im.shape}) return Spim(im, metadata, SpimStage.loaded, self.cached, self._predecessors_and_self()) def apply_process(self, process): assert self.stage == process.input_stage im = process.apply(self.image) return Spim(im, self.metadata.copy(), self.stage + 1, self.cached, self._predecessors_and_self()) def convert(self, converter): return self.apply_process(converter) def preprocess(self, preprocessor): return self.apply_process(preprocessor) def binarize(self, binarizer): return self.apply_process(binarizer) def postprocess(self, postprocessor): return self.apply_process(postprocessor) def extract_features(self, feature_extractor): contours = feature_extractor.apply(self.image) new_metadata = self.metadata.copy() new_metadata.update({"contours": contours}) newspim = Spim(None, new_metadata, SpimStage.features_extracted, self.cached, self._predecessors_and_self()) return newspim def filter_features(self, feature_filter): filtered_contours = feature_filter.apply(self.metadata["contours"], self.metadata["image_shape"]) metadata = self.metadata.copy() metadata["contours"] = filtered_contours return Spim(None, metadata, SpimStage.features_filtered, self.cached, self._predecessors_and_self()) def analyze(self, analysis): results = analysis.apply(self.metadata) metadata = self.metadata.copy() metadata["results"] = results return Spim(None, metadata, SpimStage.analyzed, self.cached, self._predecessors_and_self()) def store(self, writer): assert self.stage == SpimStage.analyzed metadata = self.metadata.copy() contours = metadata["contours"] fresh_image = self.get_at_stage(SpimStage.loaded).image image_path = writer.store_image(fresh_image, contours) data_path = writer.store_data(self.get_data()) metadata["output_image_filepath"] = image_path metadata["output_data_path"] = data_path return Spim(None, metadata, SpimStage.stored, self.cached, self._predecessors_and_self())
[docs] def func_at_stage(self, spimstage): """The method like `self.read()`, `self.convert()`,... that can be safely called at the given stage Parameters ---------- spimstage : int SpimStage that the requested method corresponds to Returns ------- callable the function, that can be applied the given stage """ # TODO: the static map of functions should be defined elsewhere functions = [self.read, self.convert, self.preprocess, self.binarize, self.postprocess, self.extract_features, self.filter_features, self.analyze, self.store] return functions[spimstage]
[docs] def do_process_at_stage(self, process): """Apply the given process at at this Spim if the process fits this stage or at a predecessor of this Spim that fits the process' input stage Parameters ---------- process : SpotlobProcessStep Process to apply Returns ------- Spim The Spim that results from the process being applied. It is in stage `process.input_stage + 1` """ return self.func_at_stage(process.input_stage)(process)
def _predecessors_and_self(self): if self.cached: outd = dict() for p_stage, p_spim in self.predecessors.items(): if p_stage < self.stage: outd.update({p_stage: p_spim}) outd.update({self.stage: self}) return outd else: # TODO: should this return self return dict()
[docs] def get_at_stage(self, spimstage): """Get the Spim at a given stage. This returns a predecessor if it has been chached Parameters ---------- spimstage : int That the returned Spim should be at Raises ------ Exception If there is no predecessor at the requested stage, for example if Spim has not been cached Returns ------- Spim The Spim at the requested Stage """ if spimstage == self.stage: return self else: try: return self.predecessors[spimstage] except KeyError: # TODO: check if cached = False, then predecessor cannot exist msg = "Spim has no predecessor at stage %s." % spimstage # TODO: more specific exception predecessor does not exist raise Exception(msg)
[docs] def get_data(self): """get all metadata and results as flat metadata RETURNS ------- pandas.Dataframe all metadata including collected results """ # TODO: better tests for get_data if "results" in self.metadata.keys(): # results is a dataframe # flatten to one dataframe md_copy = self.metadata.copy() results = md_copy.pop("results") results["filepath"] = md_copy["filepath"] # TODO: find a way to include contours output in get_data # drop contours _ = md_copy.pop("contours") md = pandas.Series(md_copy).to_frame().T return results.merge(md, on="filepath") else: return pandas.Series(self.metadata).to_frame().T
def __repr__(self): return "<Spim instance %s at stage %s>" % (id(self), self.stage)