Skip to content

data

augmentations

add_depth_dim(X, y)

Add extra dimension at tail for x only. This is trivial to do in-line. This is slightly more convenient than writing a labmda.

Parameters:

Name Type Description Default
X tf.tensor required
y tf.tensor required

Returns:

Type Description
tf.tensor, tf.tensor

X, y tuple, with X having a new trailing dimension.

Source code in indl/data/augmentations.py
def add_depth_dim(X, y):
    """
    Add extra dimension at tail for x only. This is trivial to do in-line.
    This is slightly more convenient than writing a labmda.
    Args:
        X (tf.tensor):
        y (tf.tensor):

    Returns:
        tf.tensor, tf.tensor: X, y tuple, with X having a new trailing dimension.

    """
    x_dat = tf.expand_dims(X, -1)  # Prepare as an image, with only 1 colour-depth channel.
    return x_dat, y

cast_type(X, y, x_type=tf.float32, y_type=tf.uint8)

Cast input pair to new dtypes.

Parameters:

Name Type Description Default
X tf.tensor

Input tensor

required
y tf.tensor

Input labels

required
x_type tf.dtypes

tf data type

tf.float32
y_type tf.dtypes

tf data type

tf.uint8

Returns:

Type Description
tf.tensor, tf.tensor

X, y tuple, each cast to its new type.

Source code in indl/data/augmentations.py
def cast_type(X, y, x_type=tf.float32, y_type=tf.uint8):
    """
    Cast input pair to new dtypes.
    Args:
        X (tf.tensor): Input tensor
        y (tf.tensor): Input labels
        x_type (tf.dtypes): tf data type
        y_type (tf.dtypes): tf data type

    Returns:
        tf.tensor, tf.tensor: X, y tuple, each cast to its new type.
    """
    x_dat = tf.cast(X, x_type)
    y_dat = tf.cast(y, y_type)
    return x_dat, y_dat

random_slice(X, y, training=True, max_offset=0, axis=1)

Slice a tensor X along axis, beginning at a random offset up to max_offset, taking (X.shape[axis] - max_offset) samples. If training==False, this will take the last N-max_offset samples.

Parameters:

Name Type Description Default
X tf.tensor

input tensor

required
y tf.tensor

input labels

required
training bool

if the model is run in training state

True
max_offset int

number of samples

0
axis int

axis along which to slice

1

Returns:

Type Description
tf.tensor, tf.tensor

X, y tuple randomly sliced.

Source code in indl/data/augmentations.py
def random_slice(X, y, training=True, max_offset=0, axis=1):
    """
    Slice a tensor X along axis, beginning at a random offset up to max_offset,
    taking (X.shape[axis] - max_offset) samples.
    If training==False, this will take the last N-max_offset samples.
    Args:
        X (tf.tensor): input tensor
        y (tf.tensor): input labels
        training (bool): if the model is run in training state
        max_offset (int): number of samples
        axis (int): axis along which to slice

    Returns:
        tf.tensor, tf.tensor: X, y tuple randomly sliced.
    """

    if training:
        offset = tf.random.uniform(shape=[], minval=0, maxval=max_offset, dtype=tf.int32)
    else:
        offset = max_offset
    n_subsamps = X.shape[axis] - max_offset
    if axis == 0:
        if len(y.shape) > axis and y.shape[axis] == X.shape[axis]:
            y = tf.slice(y, [offset, 0], [n_subsamps, -1])
        X = tf.slice(X, [offset, 0], [n_subsamps, -1])
    else:  # axis == 1
        if len(y.shape) > axis and y.shape[axis] == X.shape[axis]:
            y = tf.slice(y, [0, offset], [-1, n_subsamps])
        X = tf.slice(X, [0, offset], [-1, n_subsamps])
    return X, y

helper

get_tf_dataset(X, Y, training=True, batch_size=8, max_offset=0, slice_ax=1)

Convert a pair of tf tensors into a tf.data.Dataset with some augmentations. The added augmentations are:

  • add_depth_dim (with default params)
  • cast_type (with default params)
  • random_slice

Parameters:

Name Type Description Default
X tf.tensor

X data - must be compatible with above augmentations.

required
Y tf.tensor

Y data - must be compatible with above augmentations.

required
training bool or tuple

passed to random_slice, or if a tuple (e.g. from sklearn.model_selection.train_test_split) then this function returns training and test sets.

True
batch_size int

Unused I think.

8
max_offset int

Passed to random_slice

0
slice_ax int

Passed to random_slice

1

Returns:

Type Description
tf.data.Dataset(, tf.Dataset)

A tensorflow dataset with extra augmentations. If training is a tuple then two datasets are returning: training set and test set.

Source code in indl/data/helper.py
def get_tf_dataset(X, Y, training=True, batch_size=8, max_offset=0, slice_ax=1):
    """
    Convert a pair of tf tensors into a tf.data.Dataset with some augmentations.
    The added augmentations are:

    - `add_depth_dim` (with default params)
    - `cast_type` (with default params)
    - `random_slice`
    Args:
        X (tf.tensor): X data - must be compatible with above augmentations.
        Y (tf.tensor): Y data - must be compatible with above augmentations.
        training (bool or tuple): passed to `random_slice`, or if a tuple
            (e.g. from sklearn.model_selection.train_test_split) then this function returns training and test sets.
        batch_size (int): Unused I think.
        max_offset (int): Passed to `random_slice`
        slice_ax (int): Passed to `random_slice`

    Returns:
        tf.data.Dataset(, tf.Dataset): A tensorflow dataset with extra augmentations. If training is a tuple
         then two datasets are returning: training set and test set.
    """
    # TODO: trn_test as arg

    if isinstance(training, tuple):
        ds_train = get_tf_dataset(X[training[0]], Y[training[0]], training=True, batch_size=batch_size)
        ds_test = get_tf_dataset(X[training[1]], Y[training[1]], training=False, batch_size=batch_size)
        return ds_train, ds_test

    _ds = tf.data.Dataset.from_tensor_slices((X, Y))

    _ds = _ds.map(add_depth_dim)

    _ds = _ds.map(cast_type)

    slice_fun = partial(random_slice, training=training, max_offset=max_offset, axis=slice_ax)
    _ds = _ds.map(slice_fun)

    if training:
        _ds = _ds.shuffle()

    _ds = _ds.batch(X.shape[0] + 1, drop_remainder=not training)

    return _ds