Source code for kalasiris.cube

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""These functions provide some mechanisms for dealing with
ISIS cube files.  These functions are not comprehensive, and only
seek to provide functionality that does not exist elsewhere.

For example, there is already a GDAL driver for ISIS cubes, and
access to the primary bands and such can already be accomplished
via GDAL, and in order to get the image pixels as a numpy array::

    import numpy as np
    from osgeo import gdal_array

    cube = 'some.cub'
    img_arr = gdal_array.LoadFile(cube)

If you want to make sure to mask out all of the special pixels
in the image you've read into img_arr above, you can do this::

    import pvl
    import kalasiris as isis

    label = pvl.load(cube)
    specialpix = getattr(isis.specialpixels,
                         label['IsisCube']['Core']['Pixels']['Type'])
    masked_img_arr = np.ma.masked_outside(img_arr,
                                          specialpix.Min, specialpix.Max)

"""

# Copyright 2020, Ross A. Beyer (rbeyer@seti.org)
#
# Reuse is permitted under the terms of the license.
# The AUTHORS file and the LICENSE file are at the
# top level of this library.

import os
import struct
from collections import abc
from typing import Tuple
from warnings import warn

import kalasiris as isis

data_sizes = {"Integer": 4, "Double": 8, "Real": 4, "Text": 1}
data_formats = {"Integer": "i", "Double": "d", "Real": "f"}


def _get_start_size(d: dict) -> Tuple[int, int]:
    """Returns a tuple of ints that represent the true start byte and size
    based on the provided dict.

    This is a convenience function such that if you have these values in a
    dictionary with a *StartByte* and *Bytes* keys.

    Since the StartByte labels on an ISIS cube are 1-based counts, and not
    0-based, in order to convert from the numbers provided in the labels
    to a byte count for Python's read() functions, you must subtract one
    from the listed StartByte, which this function does.
    """
    start = int(d["StartByte"]) - 1
    size = int(d["Bytes"])
    return start, size



[docs]
def get_startsize_from(label=None, table_name=None, cube_path=None) -> Tuple[int, int]:
    """Returns a tuple of ints that represent the true start byte and size
    based on the provided *label* or combination of *table_name* and
    *cube_path*.

    Either *label* or *table_name* and *cube_path* are needed.  If
    neither a *label* nor a *table_name* is provided, then this
    function will raise a ValueError.  If both are provided, *label*
    will take precedence and *table_name* will be ignored.

    *label* is a dict which must contain a *StartByte* key and a *Bytes*
    key whose values can be converted to int (if not already).  These
    values should be those in the cube file label for the table.

    The name of the table as a string can be provided via *table_name*
    and the ISIS getkey function will be applied to the file at
    *cube_path* to extract the needed StartByte and Bytes values
    from the label.  However, if there is more than one table in
    the cube, getkey can only find the first, and a ValueError might
    be returned.

    If the pvl library is available, this function will use it to find
    *all* of the tables in the *cube_path* labels and will find the one
    named by *table_name* if it is present.
    """
    if label is None and table_name is None:
        raise ValueError("Neither label nor table_name were provided.")

    if label is not None:
        return _get_start_size(label)
    else:
        try:
            import pvl

            label = pvl.load(str(cube_path))
            for t in label.getlist("Table"):
                if t["Name"] == table_name:
                    return _get_start_size(t)
            else:
                raise KeyError(
                    f"There is no table '{table_name}' in the "
                    f"labels of {cube_path}."
                )
        except ImportError:
            name = isis.getkey(
                cube_path, objname="Table", keyword="Name"
            ).stdout.strip()
            if table_name == name:
                return _get_start_size(
                    {
                        "StartByte": isis.getkey(
                            cube_path, objname="Table", keyword="StartByte"
                        ).stdout.strip(),
                        "Bytes": isis.getkey(
                            cube_path, objname="Table", keyword="Bytes"
                        ).stdout.strip(),
                    }
                )
            else:
                raise KeyError(
                    f"The first table in {cube_path} that ISIS getkey could "
                    f"find is not named {table_name} it is {name}.  If your "
                    "cube has more than one table, you can provide the info "
                    "via the label dict instead of table_name, or if you "
                    "install the pvl Python library then this function can "
                    "use it to find all of the tables."
                )



# This function is derived from this commit dated Sep 24, 2019:
# https://github.com/USGS-Astrogeology/ale/commit/add5368ba46b2c911de9515afeaccc4d1c981000

[docs]
def read_table_data(cube_path: os.PathLike, label=None, table_name=None) -> bytes:
    """Returns a bytes object with the contents read from the file at
    *cube_path* based on the elements provided in the *label* or
    *table_name*.

    Either *label* or *table_name* is needed.  If neither a *label*
    nor a *table_name* is provided, then this function will raise
    a ValueError.  If both are provided, *label* will take precedence
    and *table_name* will be ignored.

    *label* is a dict which must contain a *StartByte* key and a *Bytes*
    key whose values can be converted to int (if not already).  These
    values should be those in the cube file label for the table.

    The name of the table as a string can be provided via *table_name*
    and the ISIS getkey function will be applied to extract the needed
    StartByte and Bytes values from the label.  However, if there is more
    than one table in the cube, getkey can only find the first, and a
    ValueError might be returned.

    If the pvl library is available, this function will use it to find
    *all* of the tables in the *cube_path* labels and will find the one
    named by *table_name* if it is present.
    """

    (start, size) = get_startsize_from(label, table_name, cube_path)

    with open(cube_path, "rb") as cubehandle:
        cubehandle.seek(start)
        table = cubehandle.read(size)

    return table



# This function is derived from this commit dated Sep 24, 2019:
# https://github.com/USGS-Astrogeology/ale/commit/add5368ba46b2c911de9515afeaccc4d1c981000

[docs]
def parse_table(data: bytes, fields: list) -> dict:
    """Return a Python dictionary created from the bytes *data* of
    an ISIS cube table (presumably extracted via read_table_data()),
    and described by the *fields* list and *records*.

    Please be aware that this does not perform masking of the ISIS
    special pixels that may be present in the table, and simply
    returns them as the appropriate int or float values.

    The *fields* list must be a list of dicts, each of which must
    contain the following keys: 'Name', 'Type', and 'Size'.  The
    'Name' key can be any string (and these will end up being the
    keys in the returned dict).  'Size' is the size in bytes of the
    field, and 'Type' is a string that must be one of 'Integer',
    'Double', 'Real', or 'Text'.

    If you are using the pvl library, the get_table() function will
    be easier to use.
    """

    row_len = 0
    for f in fields:
        row_len += data_sizes[f["Type"]] * int(f["Size"])
    if len(data) % row_len != 0:
        raise ValueError(
            f"The total sizes of each field ({row_len}) do not evenly divide "
            f"into the size of the data ({len(data)}), so something is off."
        )

    # Parse the binary data
    results = {f["Name"]: [] for f in fields}
    offset = 0
    while offset < len(data):
        for f in fields:
            if f["Type"] == "Text":
                field_data = data[offset : offset + int(f["Size"])].decode(
                    encoding="latin_1"
                )
            else:
                data_fmt = data_formats[f["Type"]] * int(f["Size"])
                f_data = struct.unpack_from(data_fmt, data, offset)
                if len(f_data) == 1:
                    field_data = f_data[0]
                else:
                    field_data = list(f_data)

            results[f["Name"]].append(field_data)
            offset += data_sizes[f["Type"]] * int(f["Size"])

    return results



# This function is derived from this commit dated Sep 24, 2019:
# https://github.com/USGS-Astrogeology/ale/commit/add5368ba46b2c911de9515afeaccc4d1c981000

[docs]
def get_table(cube_path: os.PathLike, table_name: str) -> dict:
    """Return a Python dictionary created from the named table in the ISIS cube.

    This function requires the pvl Python library.
    """
    # Toyed with allowing a file_object=None argument, docstring would have
    # been:
    #
    # If the optional *file_object* is given, it should be the result
    # of opening *cube_path* which is readable.  This simply allows
    # a caller to provide an already-opened file object.  Otherwise,
    # this function will open and then close the file at *cube_path*.
    #
    # Decided against it, the potential to pass a file_object that was
    # *different* from an opened *cube_path* had the potential for much
    # mayhem without a tremendous amount of gain.

    try:
        import pvl

        label = pvl.load(str(cube_path))
        table_label = None
        for t in label.getlist("Table"):
            if t["Name"] == table_name:
                table_label = t
                break

        # if file_object is not None:
        #     (start, size) = _get_start_size(table_label)
        #     file_object.seek(start)
        #     table_data = file_object.read(size)
        # else:
        table_data = read_table_data(cube_path, table_label)
        return parse_table(table_data, table_label.getlist("Field"))

        # The original ale function added the keywords into the returned
        # table, but that doesn't seem like a great idea, since that means
        # that those keys are 'special' meta-data keys, whereas the other
        # keys in the returned dict are 'regular' field keys, and once
        # returned, there's no way to know which is which.
        # # Add the keywords from the label
        # results.update({
        #     key: value for key,
        #                    value in table_label.items() if not isinstance(
        #         value, pvl._collections.PVLGroup)})
        #
        # return results

    except ImportError:
        warn(
            "The pvl library is not present, so get_table() cannot be used. "
            "The parse_table() function might work for you.",
            ImportWarning,
        )
        raise




[docs]
def overwrite_table_data(
    cube_path: os.PathLike, data: bytes, label=None, table_name=None
):
    """The file at *cube_path* will be modified by overwriting the
    data in the specfied table name with the contents of *data*.

    Either *label* or *table_name* is needed.  If neither a *label*
    dict (which must contain a 'Name' key) nor a *table_name* is
    provided, then this function will raise a ValueError.  If both
    are provided, *label* will take precedence and *table_name*
    will be ignored.

    *label* is a dict which must contain *Name*, *StartByte*, and
    *Bytes* keys (*StartByte* and *Bytes* must be convertable to
    int if not already).  These values will be used to locate where
    in the file to write the new *data*.

    The name of the table as a string can be provided via *table_name*
    and the ISIS getkey function will be applied to extract the needed
    StartByte and Bytes values from the label.  However, if there is more
    than one table in the cube, getkey can only find the first, and a
    ValueError might be returned, even thought there is a table of that
    name in the file.

    If the pvl library is available, this function will use it to find
    *all* of the tables in the *cube_path* labels and will find the one
    named by *table_name* if it is present.
    """

    (start, size) = get_startsize_from(label, table_name, cube_path)

    if size != len(data):
        raise ValueError(
            f"The size of the table ({size}) to be overwritten from the file "
            f"({cube_path}) is different from size of the data provided "
            f"({len(data)})."
        )

    with open(cube_path, "r+b") as cubehandle:
        cubehandle.seek(start)
        cubehandle.write(data)

    return




[docs]
def encode_table(table: dict, fields: list) -> bytes:
    """Return a bytes object created from the *table* dict.

    The *table* dict must contain lists of equal length as values.
    If they are not of equal length, an IndexError will be raised.

    The *fields* list must be a list of dicts, each of which must
    contain the following keys: 'Name', 'Type', and 'Size'.  The
    'Name' key can be any string, but must match the keys in the
    *table* dict.  'Size' is the size in bytes of the field, and
    'Type' is a string that must be one of 'Integer', 'Double',
    'Real', or 'Text'.

    If a field's 'Size' value is more than 1, then the list which
    is the value of the *table* with that key name must be a list of
    length 'Size'.

    If you are using the pvl library, the overwrite_table() function will
    be easier to use.
    """

    field_lengths = set()
    for v in table.values():
        field_lengths.add(len(v))

    if not len(field_lengths) == 1:
        raise IndexError(
            "At least one of the lists in the table has "
            f"a different length than the rest: {field_lengths}"
        )

    data = bytes()
    for row in range(field_lengths.pop()):
        for f in fields:
            obj = table[f["Name"]][row]
            size = int(f["Size"])
            if f["Type"] == "Text":
                if len(obj) > size:
                    raise IndexError(
                        f"The length of {obj} ({len(obj)}) is "
                        "larger than the allowable Size of the "
                        f"field ({size})"
                    )
                else:
                    data += obj.ljust(size).encode(encoding="latin_1")
            else:
                data_fmt = data_formats[f["Type"]] * size
                if isinstance(obj, abc.Sequence):
                    if len(obj) == size:
                        data += struct.pack(data_fmt, *obj)
                    else:
                        raise IndexError(
                            f"The length of {obj} ({len(obj)}) is different "
                            f"than the Size of the field ({size})."
                        )
                elif size == 1:
                    data += struct.pack(data_fmt, obj)
                else:
                    raise ValueError(
                        f"There is only a single value ({obj}) but the field "
                        f"indicates there should be {int(f['Size'])}."
                    )

    return data




[docs]
def overwrite_table(cube_path: os.PathLike, table_name: str, table: dict):
    """The file at *cube_path* will be modified by overwriting the
    data in the specfied table name with the contents of *table*.

    The *table* dict must contain lists of equal length as values.
    If they are not of equal length, an IndexError will be raised.
    The *table* dict must also contain, as keys, all of the Field names
    from *table_name* in the *cube_path*.

    This function requires the pvl Python library.
    """

    try:
        import pvl

        label = pvl.load(str(cube_path))
        table_label = None
        for t in label.getlist("Table"):
            if t["Name"] == table_name:
                table_label = t
                break

        data = encode_table(table, table_label.getlist("Field"))
        overwrite_table_data(cube_path, data, table_label)

        return

    except ImportError:
        warn(
            "The pvl library is not present, so overwrite_table() cannot be "
            "used. The overwrite_table_data() function might work for you.",
            ImportWarning,
        )
        raise
Source code for kalasiris.cube

kalasiris

Navigation

Related Topics