# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown copyright. The Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Module containing a plugin to calculate the modal weather code in a period."""
from typing import Optional
import iris
import numpy as np
from iris.analysis import Aggregator
from iris.cube import Cube, CubeList
from numpy import ndarray
from scipy import stats
from improver import BasePlugin
from improver.blending import RECORD_COORD
from improver.blending.utilities import (
record_run_coord_to_attr,
store_record_run_as_coord,
)
from improver.utilities.cube_manipulation import MergeCubes
from ..metadata.forecast_times import forecast_period_coord
from .utilities import DAYNIGHT_CODES, GROUPED_CODES
CODE_MAX = 100
UNSET_CODE_INDICATOR = -99
[docs]class ModalWeatherCode(BasePlugin):
"""Plugin that returns the modal code over the period spanned by the
input data. In cases of a tie in the mode values, scipy returns the smaller
value. The opposite is desirable in this case as the significance /
importance of the weather codes generally increases with the value. To
achieve this the codes are subtracted from an arbitrarily larger
number prior to calculating the mode, and this operation reversed in the
final output.
If there are many different codes for a single point over the time
spanned by the input cubes it may be that the returned mode is not robust.
Given the preference to return more significant codes explained above,
a 12 hour period with 12 different codes, one of which is thunder, will
return a thunder code to describe the whole period. This is likely not a
good representation. In these cases grouping is used to try and select
a suitable weather code (e.g. a rain shower if the codes include a mix of
rain showers and dynamic rain) by providing a more robust mode. The lowest
number (least significant) member of the group is returned as the code.
Use of the least significant member reflects the lower certainty in the
forecasts.
Where there are different weather codes available for night and day, the
modal code returned is always a day code, regardless of the times
covered by the input files.
"""
[docs] def __init__(
self, model_id_attr: Optional[str] = None, record_run_attr: Optional[str] = None
):
"""
Set up plugin and create an aggregator instance for reuse
Args:
model_id_attr:
Name of attribute recording source models that should be
inherited by the output cube. The source models are expected as
a space-separated string.
record_run_attr:
Name of attribute used to record models and cycles used in
constructing the weather symbols.
"""
self.aggregator_instance = Aggregator("mode", self.mode_aggregator)
self.model_id_attr = model_id_attr
self.record_run_attr = record_run_attr
[docs] @staticmethod
def _unify_day_and_night(cube: Cube):
"""Remove distinction between day and night codes so they can each
contribute when calculating the modal code. The cube of weather
codes is modified in place with all night codes made into their
daytime equivalents.
Args:
A cube of weather codes.
"""
night_codes = np.array(DAYNIGHT_CODES) - 1
for code in night_codes:
cube.data[cube.data == code] += 1
[docs] @staticmethod
def _group_codes(modal: Cube, cube: Cube):
"""In instances where the mode returned is not significant, i.e. the
weather code chosen occurs infrequently in the period, the codes can be
grouped to yield a more definitive period code. Given the uncertainty,
the least significant weather type (lowest number in a group that is
found in the data) is used to replace the other data values that belong
to that group prior to recalculating the modal code.
The modal cube is modified in place.
Args:
modal:
The modal weather code cube which contains UNSET_CODE_INDICATOR
values that need to be replaced with a more definitive period
code.
cube:
The original input data. Data relating to unset points will be
grouped and the mode recalculated."""
undecided_points = np.argwhere(modal.data == UNSET_CODE_INDICATOR)
for point in undecided_points:
data = cube.data[(..., *point)].copy()
for _, codes in GROUPED_CODES.items():
default_code = sorted([code for code in data if code in codes])
if default_code:
data[np.isin(data, codes)] = default_code[0]
mode_result, counts = stats.mode(CODE_MAX - data)
modal.data[tuple(point)] = CODE_MAX - mode_result
[docs] @staticmethod
def mode_aggregator(data: ndarray, axis: int) -> ndarray:
"""An aggregator for use with iris to calculate the mode along the
specified axis. If the modal value selected comprises less than 10%
of data along the dimension being collapsed, the value is set to the
UNSET_CODE_INDICATOR to indicate that the uncertainty was too high to
return a mode.
Args:
data:
The data for which a mode is to be calculated.
axis:
The axis / dimension over which to calculate the mode.
Returns:
The data array collapsed over axis, containing the calculated modes.
"""
# Iris aggregators support indexing from the end of the array.
if axis < 0:
axis += data.ndim
# Aggregation coordinate is moved to the -1 position in initialisation;
# move this back to the leading coordinate
data = np.moveaxis(data, [axis], [0])
minimum_significant_count = 0.1 * data.shape[0]
mode_result, counts = stats.mode(CODE_MAX - data, axis=0)
mode_result[counts < minimum_significant_count] = (
CODE_MAX - UNSET_CODE_INDICATOR
)
return CODE_MAX - np.squeeze(mode_result)
[docs] @staticmethod
def _set_blended_times(cube: Cube) -> None:
"""Updates time coordinates so that time point is at the end of the time bounds,
blend_time and forecast_reference_time (if present) are set to the end of the
bound period and bounds are removed, and forecast_period is updated to match."""
cube.coord("time").points = cube.coord("time").bounds[0][-1]
for coord_name in ["blend_time", "forecast_reference_time"]:
if coord_name in [c.name() for c in cube.coords()]:
coord = cube.coord(coord_name)
if coord.has_bounds():
coord = coord.copy(coord.bounds[0][-1])
cube.replace_coord(coord)
if "forecast_period" in [c.name() for c in cube.coords()]:
calculated_coord = forecast_period_coord(
cube, force_lead_time_calculation=True
)
new_coord = cube.coord("forecast_period").copy(
points=calculated_coord.points, bounds=calculated_coord.bounds
)
cube.replace_coord(new_coord)
[docs] def process(self, cubes: CubeList) -> Cube:
"""Calculate the modal weather code, with handling for edge cases.
Args:
cubes:
A list of weather code cubes at different times. A modal
code will be calculated over the time coordinate to return
the most comon code, which is taken to be the best
representation of the whole period.
Returns:
A single weather code cube with time bounds that span those of
the input weather code cubes.
"""
# Store the information for the record_run attribute on the cubes.
if self.record_run_attr and self.model_id_attr:
store_record_run_as_coord(cubes, self.record_run_attr, self.model_id_attr)
cube = MergeCubes()(cubes)
# Create the expected cell method. The aggregator adds a cell method
# but cannot include an interval, so we create it here manually,
# ensuring to preserve any existing cell methods.
cell_methods = list(cube.cell_methods)
try:
(input_data_period,) = np.unique(np.diff(cube.coord("time").bounds)) / 3600
except ValueError as err:
raise ValueError(
"Input diagnostics do not have consistent periods."
) from err
cell_methods.append(
iris.coords.CellMethod(
"mode", coords="time", intervals=f"{int(input_data_period)} hour"
)
)
self._unify_day_and_night(cube)
# Handle case in which a single time is provided.
if len(cube.coord("time").points) == 1:
result = cube
else:
result = cube.collapsed("time", self.aggregator_instance)
self._set_blended_times(result)
result.cell_methods = None
for cell_method in cell_methods:
result.add_cell_method(cell_method)
if self.model_id_attr:
# Update contributing models
contributing_models = set()
for source_cube in cubes:
for model in source_cube.attributes[self.model_id_attr].split(" "):
contributing_models.update([model])
result.attributes[self.model_id_attr] = " ".join(
sorted(list(contributing_models))
)
if self.record_run_attr and self.model_id_attr:
record_run_coord_to_attr(
result, cube, self.record_run_attr, discard_weights=True
)
result.remove_coord(RECORD_COORD)
# Handle any unset points where it was hard to determine a suitable mode
if (result.data == UNSET_CODE_INDICATOR).any():
self._group_codes(result, cube)
return result