# noqa: D100
import json
import logging
import os
import zipfile
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from itertools import chain
from multiprocessing.pool import ThreadPool
from pathlib import Path
from typing import (
Callable,
Deque,
Dict,
Generator,
Iterable,
List,
Optional,
Tuple,
Union,
)
import cftime
import numpy as np
import pandas as pd
import requests
import sentry_sdk
import xarray as xr
import xclim
import yaml
from netCDF4 import num2date
from pandas.api.types import is_numeric_dtype
from pywps import (
FORMATS,
BoundingBoxInput,
BoundingBoxOutput,
ComplexInput,
ComplexOutput,
LiteralInput,
LiteralOutput,
Process,
configuration,
)
from pywps.configuration import get_config_value
from pywps.inout.outputs import MetaFile, MetaLink4
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
from slugify import slugify
from xclim.core.indicator import build_indicator_module_from_yaml
from xclim.core.utils import InputKind
LOGGER = logging.getLogger("PYWPS")
PywpsInput = Union[LiteralInput, ComplexInput, BoundingBoxInput]
PywpsOutput = Union[LiteralOutput, ComplexOutput, BoundingBoxOutput]
RequestInputs = Dict[str, Deque[PywpsInput]]
# These are parameters that set options. They are not `compute` arguments.
INDICATOR_OPTIONS = [
"check_missing",
"missing_options",
"cf_compliance",
"data_validation",
]
def get_virtual_modules():
"""Load virtual modules."""
modules = {}
if modfiles := get_config_value("finch", "xclim_modules"):
for modfile in modfiles.split(","):
if os.path.isabs(modfile):
mod = build_indicator_module_from_yaml(Path(modfile))
else:
mod = build_indicator_module_from_yaml(
Path(__file__).parent.parent.joinpath(modfile)
)
indicators = []
for indname, ind in mod.iter_indicators():
indicators.append(ind.get_instance())
modules[Path(modfile).name] = dict(indicators=indicators)
return modules
[docs]
@dataclass
class DatasetConfiguration:
"""Dataset Configuration class.
Attributes
----------
path: str
The path (or url) to the root directory where to search for the data.
pattern: str
The pattern of the filenames. Must include at least : "variable", "scenario" and "model".
Patterns must be understandable by :py:func:`parse.parse`.
local: bool
Whether the path points to a local directory or a remote THREDDS catalog.
depth : int
The depth to which search for files below the directory. < 0 will search recursively.
suffix : str
When the files are local, this is the suffix of the files.
allowed_values : dict
Mapping from field name to a list of allowed values.
Must include "scenario", "model" and "variable",
the latter defines which variable are available and thus which indicator can be used.
model_lists : dict
A mapping from list name to a list of model names to provide special sub-lists.
The values can also be a tuple of (model name, realization numer),
in which case, pattern must include a "realization" field.
"""
path: str
pattern: str
local: bool
allowed_values: dict
depth: int = 0
suffix: str = "*nc"
model_lists: dict = field(default_factory=dict)
def get_datasets_config(): # noqa: D103
p = get_config_value("finch", "datasets_config")
if not p: # No config given.
return {}
if not Path(p).is_absolute():
p = Path(__file__).parent.parent / p
with open(p) as f:
conf = yaml.safe_load(f)
return {ds: DatasetConfiguration(**dsconf) for ds, dsconf in conf.items()}
def get_available_variables(): # noqa: D103
conf = get_datasets_config()
return set(chain(*(d.allowed_values["variable"] for d in conf.values())))
def iter_xc_variables(indicator: xclim.core.indicator.Indicator): # noqa: D103
for n, p in indicator.parameters.items():
if p.kind in [InputKind.VARIABLE, InputKind.OPTIONAL_VARIABLE]:
yield n
def log_file_path(process: Process) -> Path:
"""Return the filepath to write the process logfile."""
return Path(process.workdir) / "log.txt"
def write_log(
process: Process,
message: str,
level=logging.INFO,
*,
process_step: str = None,
subtask_percentage: int = None,
):
"""Log the process status.
- With the logging module
- To a log file stored in the process working directory
- Update the response document with the message and the status percentage
subtask_percentage: not the percentage of the whole process, but the percent done
in the current processing step. (see `process.status_percentage_steps`)
"""
LOGGER.log(level, message)
status_percentage = process.response.status_percentage
# if a process_step is given, set this as the status percentage
if process_step:
status_percentage = process.status_percentage_steps.get(
process_step, status_percentage
)
# if a subtask percentage is given, add this value to the status_percentage
if subtask_percentage is not None:
steps_percentages = list(process.status_percentage_steps.values())
for n, percent in enumerate(steps_percentages):
if status_percentage < percent:
next_step_percentage = percent
current_step_percentage = steps_percentages[n - 1]
break
else:
current_step_percentage, next_step_percentage = 1, 100
if steps_percentages:
current_step_percentage = steps_percentages[-1]
step_delta = next_step_percentage - current_step_percentage
sub_percentage = subtask_percentage / 100 * step_delta
status_percentage = current_step_percentage + int(sub_percentage)
if level >= logging.INFO:
log_file_path(process).open("a", encoding="utf8").write(message + "\n")
try:
process.response.update_status(message, status_percentage=status_percentage)
except AttributeError:
pass
def get_attributes_from_config():
"""Get all explicitly passed metadata attributes from the config in section finch:metadata."""
# Remove all "defaults", only keep explicitly-passed options
# This works because we didn't define any defaults for this section.
# But will do strange things if any of the defaults have the same name as a passed field
# This is especially risky, since ALL environment variables are listed in the defaults...
names = set(configuration.CONFIG["finch:metadata"].keys()) - set(
configuration.CONFIG._defaults.keys()
)
return {
name: configuration.get_config_value("finch:metadata", name) for name in names
}
def compute_indices(
process: Process, func: Callable, inputs: RequestInputs
) -> xr.Dataset: # noqa: D103
kwds = {}
global_attributes = {}
for name, input_queue in inputs.items():
if isinstance(input_queue[0], LiteralInput):
value = [inp.data for inp in input_queue]
if len(input_queue) == 1:
value = value[0]
kwds[name] = value
variable = kwds.pop("variable", None)
for name, input_queue in inputs.items():
input = input_queue[0]
if isinstance(input, ComplexInput):
if input.supported_formats[0] == FORMATS.JSON:
kwds[name] = json.loads(input.data)
elif input.supported_formats[0] in [FORMATS.NETCDF, FORMATS.DODS]:
ds = try_opendap(
input, logging_function=lambda msg: write_log(process, msg)
)
global_attributes = global_attributes or ds.attrs
vars = list(ds.data_vars.values())
if variable:
if variable in ds.data_vars:
kwds[name] = ds.data_vars[variable]
else:
raise KeyError(
f"Variable name '{name}' not in data_vars {list(ds.data_vars)}"
)
else:
# Get variable matching input parameter name.
if name in ds.data_vars:
kwds[name] = ds.data_vars[name]
# If only one variable in dataset, use it.
elif len(vars) == 1:
kwds[name] = vars[0]
user_attrs = get_attributes_from_config()
global_attributes.update(
{
"climateindex_package_id": "https://github.com/Ouranosinc/xclim",
"product": "derived climate index",
},
**user_attrs,
)
options = {name: kwds.pop(name) for name in INDICATOR_OPTIONS if name in kwds}
with xclim.core.options.set_options(**options):
out = func(**kwds)
output_dataset = xr.Dataset(
data_vars=None, coords=out.coords, attrs=global_attributes
)
# fix frequency of computed output (xclim should handle this)
if output_dataset.attrs.get("frequency") == "day" and "freq" in kwds:
conversions = {
"YS": "yr",
"MS": "mon",
"QS-DEC": "seasonal",
"AS-JUL": "seasonal",
}
output_dataset.attrs["frequency"] = conversions.get(kwds["freq"], "day")
output_dataset[out.name] = out
return output_dataset
def drs_filename(ds: xr.Dataset, variable: str = None):
"""Generate filename according to the data reference syntax (DRS).
Parameters
----------
ds : xr.Dataset
variable : str
appropriate variable for filename, if not set (default), variable will be determined from the dataset variables.
Returns
-------
str
DRS filename
Raises
------
KeyError
When the dataset doesn't have the required attributes.
Notes
-----
Copied and modified from https://github.com/bird-house/eggshell which doesn't have a release usable by finch.
Based on the metadata in the resource.
http://cmip-pcmdi.llnl.gov/cmip5/docs/cmip5_data_reference_syntax.pdf
https://pypi.python.org/pypi/drslib
"""
if len(ds.data_vars) == 1:
variable = list(ds.data_vars)[0]
if variable is None:
variable = [k for k, v in ds.variables.items() if len(v.dims) >= 3][0]
variable = variable.replace("_", "-")
# CORDEX example: tas_EUR-11_ICHEC-EC-EARTH_historical_r3i1p1_DMI-HIRHAM5_v1_day
cordex_pattern = "{variable}_{domain}_{driving_model}_{experiment}_{ensemble}_{model}_{version}_{frequency}"
# CMIP5 example: tas_MPI-ESM-LR_historical_r1i1p1
cmip5_pattern = "{variable}_{model}_{experiment}_{ensemble}"
if ds.attrs["project_id"] in ("CORDEX", "EOBS"):
filename = cordex_pattern.format(
variable=variable,
domain=ds.attrs["CORDEX_domain"],
driving_model=ds.attrs["driving_model_id"],
experiment=ds.attrs["experiment_id"],
ensemble=ds.attrs["driving_model_ensemble_member"],
model=ds.attrs["model_id"],
version=ds.attrs["rcm_version_id"],
frequency=ds.attrs["frequency"],
)
elif ds.attrs["project_id"] == "CMIP5":
ensemble = "r{}i{}p{}".format(
ds.attrs["driving_realization"],
ds.attrs["driving_initialization_method"],
ds.attrs["driving_physics_version"],
)
filename = cmip5_pattern.format(
variable=variable,
model=ds.attrs["driving_model_id"],
experiment=ds.attrs["driving_experiment_id"].replace(",", "+"),
ensemble=ensemble,
)
else:
params = [
variable,
ds.attrs.get("frequency"),
ds.attrs.get("model_id"),
ds.attrs.get("driving_model_id"),
ds.attrs.get("experiment_id", "").replace(",", "+"),
ds.attrs.get("driving_experiment_id", "").replace(",", "+"),
]
params = [k for k in params if k]
filename = "_".join(params)
if "time" in ds:
date_from = ds.time[0].values
date_to = ds.time[-1].values
if "units" in ds.time.attrs:
# times are encoded
units = ds.time.units
calendar = ds.time.attrs.get("calendar", "standard")
date_from = num2date(date_from, units, calendar)
date_to = num2date(date_to, units, calendar)
date_from = pd.to_datetime(str(date_from))
date_to = pd.to_datetime(str(date_to))
filename += f"_{date_from:%Y%m%d}-{date_to:%Y%m%d}"
# sanitize any spaces that came from the source input's metadata
filename = filename.replace(" ", "-")
filename += ".nc"
return filename
def try_opendap(
input: ComplexInput,
*,
chunks="auto",
decode_times=True,
chunk_dims=None,
logging_function=lambda message: None,
) -> xr.Dataset:
"""Try to open the file as an OPeNDAP url and chunk it.
By default, chunks are to be determined by xarray/dask.
If `chunks=None` or `chunks_dims` is given, finch rechunks the dataset according to
the logic of `chunk_dataset`.
Pass `chunks=False` to disable dask entirely on this dataset.
"""
url = input.url
logging_function(f"Try opening DAP link {url}")
if is_opendap_url(url):
path = url
logging_function(f"Opened dataset as an OPeNDAP url: {url}")
else:
if url.startswith("http"):
# Accessing the file property writes it to disk if it's a url
logging_function(f"Downloading dataset for url: {url}")
else:
logging_function(f"Opening as local file: {input.file}")
path = input.file
try:
# Try to open the dataset
ds = xr.open_dataset(path, chunks=chunks or None, decode_times=decode_times)
except NotImplementedError:
if chunks == "auto":
# Some dtypes are not compatible with auto chunking (object, so unbounded strings)
logging_function(
"xarray auto-chunking failed, opening with no chunks and inferring chunks ourselves."
)
chunks = None
ds = xr.open_dataset(path, chunks=None, decode_times=decode_times)
else:
raise
# To handle large number of grid cells (50+) in subsetted data
if "region" in ds.dims and "time" in ds.dims:
chunks = dict(time=-1, region=5)
ds = ds.chunk(chunks)
elif chunks is None or chunk_dims is not None:
ds = ds.chunk(chunk_dataset(ds, max_size=1000000, chunk_dims=chunk_dims))
return ds
def process_threaded(function: Callable, inputs: Iterable):
"""Based on the current configuration, process a list threaded or not."""
threads = int(configuration.get_config_value("finch", "subset_threads"))
if threads > 1:
pool = ThreadPool(processes=threads)
outputs = list(pool.imap_unordered(function, inputs))
pool.close()
pool.join()
else:
outputs = [function(r) for r in inputs]
return outputs
def chunk_dataset(ds, max_size=1000000, chunk_dims=None):
"""Ensure the chunked size of a xarray.Dataset is below a certain size.
Cycle through the dimensions, divide the chunk size by 2 until criteria is met.
If chunk_dims is given, limits the chunking to those dimensions, if they are
found in the dataset.
"""
from functools import reduce
from itertools import cycle
from operator import mul
chunks = dict(ds.sizes)
dims = set(ds.dims).intersection(chunk_dims or ds.dims)
if not dims:
LOGGER.warning(
f"Provided dimension names for chunking ({chunk_dims}) were "
f"not found in dataset dims ({ds.dims}). No chunking was done."
)
return chunks
def chunk_size():
return reduce(mul, chunks.values())
for dim in cycle(dims):
if chunk_size() < max_size:
break
chunks[dim] = max(chunks[dim] // 2, 1)
return chunks
def make_metalink_output(
process: Process, files: List[Path], description: str = None
) -> MetaLink4:
"""Make a MetaLink output from a list of files."""
metalink = MetaLink4(
identity=process.identifier,
description=description,
publisher="Finch",
workdir=process.workdir,
)
for f in files:
mf = MetaFile(identity=f.stem, fmt=FORMATS.NETCDF)
mf.file = str(f)
metalink.append(mf)
return metalink
def is_opendap_url(url):
"""Check if a provided url is an OpenDAP url.
The DAP Standard specifies that a specific tag must be included in the
Content-Description header of every request. This tag is one of:
"dods-dds" | "dods-das" | "dods-data" | "dods-error"
So we can check if the header starts with `dods`.
Even then, some OpenDAP servers seem to not include the specified header...
So we need to let the netCDF4 library actually open the file.
"""
try:
content_description = requests.head(url, timeout=5).headers.get(
"Content-Description"
)
except (ConnectionError, MissingSchema, InvalidSchema):
return False
if content_description:
return content_description.lower().startswith("dods")
else:
return False
# try:
# # For a non-DAP URL, this just hangs python.
# dataset = netCDF4.Dataset(url)
# except OSError:
# return False
# return dataset.disk_format in ("DAP2", "DAP4")
def single_input_or_none(inputs, identifier) -> Optional[str]:
"""Return first input item."""
try:
return inputs[identifier][0].data
except KeyError:
return None
def netcdf_file_list_to_csv(
netcdf_files: Union[List[Path], List[str]],
output_folder,
filename_prefix,
csv_precision: Optional[int] = None,
) -> Tuple[List[str], str]:
"""Write csv files for a list of netcdf files.
Produces one csv file per calendar type, along with a metadata folder in the output_folder.
"""
output_folder = Path(output_folder)
output_folder.mkdir(parents=True, exist_ok=True)
def get_attrs_fallback(ds, *args):
for key in args:
try:
return ds.attrs[key]
except KeyError:
continue
raise KeyError(f"Couldn't find any attribute in [{', '.join(args)}]")
metadata = {}
concat_by_calendar = {}
for file in netcdf_files:
ds = xr.open_dataset(str(file), decode_times=False)
coords = ds.coords
calendar = ds.time.calendar
ds["time"] = xr.decode_cf(ds).time
for variable in ds.data_vars:
# for a specific dataset the keys are different:
# BCCAQv2+ANUSPLIN300_BNU-ESM_historical+rcp85_r1i1p1_19500101-21001231
model = get_attrs_fallback(ds, "driving_model_id", "GCM__model_id")
experiment = get_attrs_fallback(
ds, "driving_experiment_id", "GCM__experiment"
)
experiment = experiment.replace(",", "_")
output_variable = f"{variable}_{model}_{experiment}"
units = ds[variable].units
if units:
output_variable += f"_({units})"
ds = ds.rename({variable: output_variable})
if csv_precision and csv_precision < 0:
ds = ds.round(csv_precision)
csv_precision = 0
df = dataset_to_dataframe(ds)
if calendar not in concat_by_calendar:
# TODO: Why was this there? When we have a time axis, this makes the concat fail.
# if "lat" in df.index.names and "lon" in df.index.names:
# df = df.reset_index(["lat", "lon"])
concat_by_calendar[calendar] = [df]
else:
concat_by_calendar[calendar].append(df[output_variable])
metadata[output_variable] = format_metadata(ds)
output_csv_list = []
for calendar_type, data in concat_by_calendar.items():
output_csv = output_folder / f"{filename_prefix}_{calendar_type}.csv"
concat = pd.concat(data, axis=1)
if "region" in concat.reset_index().columns:
concat = (
concat.reset_index()
.sort_values(["region", "time"])
.set_index(["lat", "lon", "time"])
.drop(columns="region")
)
else:
concat = (
concat.reset_index()
.sort_values(["lat", "lon", "time"])
.set_index(["lat", "lon", "time"])
)
dropna_threshold = 1 # at least one value
concat.dropna(thresh=dropna_threshold, inplace=True)
if csv_precision is not None:
for v in concat:
if v not in coords and is_numeric_dtype(concat[v]):
concat[v] = concat[v].map(
lambda x: f"{x:.{csv_precision}f}" if not pd.isna(x) else ""
)
concat.to_csv(output_csv)
output_csv_list.append(output_csv)
metadata_folder = output_folder / "metadata"
metadata_folder.mkdir(parents=True, exist_ok=True)
for output_variable, info in metadata.items():
metadata_file = metadata_folder / f"{output_variable}.csv"
metadata_file.write_text(info)
return output_csv_list, str(metadata_folder)
def dataset_to_dataframe(ds: xr.Dataset) -> pd.DataFrame:
"""Convert a Dataset while keeping the hour of the day uniform at hour=12."""
if not np.all(ds.time.dt.hour == 12):
attrs = ds.time.attrs
# np.datetime64 doesn't have the 'replace' method
time_values = ds.time.values
if not hasattr(time_values[0], "replace"):
time_values = pd.to_datetime(time_values)
ds["time"] = [y.replace(hour=12) for y in time_values]
ds.time.attrs = attrs
df = ds.to_dataframe().reset_index()
if "realization" not in ds.dims:
new_cols = [ll for ll in ["lat", "lon", "time"] if ll in df.columns]
else:
new_cols = [
ll
for ll in ["lat", "lon", "time", "scenario", "region"]
if ll in df.columns
]
values = [c for c in df.columns if c not in new_cols and c != "realization"]
df = df.pivot(
index=new_cols,
columns="realization",
values=values,
).reset_index()
# pivot table columns are multi-indexes : flatten
df.columns = [":".join(d) if d[1] else d[0] for d in df.columns]
df = df.sort_values(new_cols).set_index(new_cols)
# new_cols.extend([ll for ll in df.columns if ll not in new_cols])
return df
def format_metadata(ds) -> str:
"""For an xarray dataset, return its formatted metadata."""
def _fmt_attrs(obj, name="", comment="# ", tab=" "):
"""Return string of an object's attribute."""
lines = ["", name]
for key, val in obj.attrs.items():
lines.append(
tab + key + ":: " + str(val).replace("\n", "\n" + comment + tab + " ")
)
out = ("\n" + comment + tab).join(lines)
return out
objs = [
({"": ds}, "Global attributes"),
(ds.coords, "Coordinates"),
(ds.data_vars, "Data variables"),
]
out = ""
for obj, name in objs:
out += "# " + name
tab = "" if name == "Global attributes" else " "
for key, val in obj.items():
out += _fmt_attrs(val, key, tab=tab)
out += "\n#\n"
return out
def zip_files(
output_filename, files: Iterable, log_function: Callable[[str, int], None] = None
):
"""Create a zipfile from a list of files or folders.
log_function is a function that receives a message and a percentage.
"""
log_function = log_function or (lambda *a: None)
with zipfile.ZipFile(
output_filename, mode="w", compression=zipfile.ZIP_DEFLATED
) as z:
all_files = []
for file in files:
file = Path(file)
if file.is_dir():
all_files += list(file.rglob("*.*"))
else:
all_files.append(file)
common_folder = None
all_parents = [list(reversed(file.parents)) for file in all_files]
for parents in zip(*all_parents):
if len(set(parents)) == 1:
common_folder = parents[0]
else:
break
n_files = len(all_files)
for n, filename in enumerate(all_files):
percentage = int(n / n_files * 100)
message = f"Zipping file {n + 1} of {n_files}"
log_function(message, percentage)
arcname = filename.relative_to(common_folder) if common_folder else None
z.write(filename, arcname=arcname)
def make_tasmin_tasmax_pairs(
filenames: List[Path],
) -> Generator[Tuple[Path, Path], None, None]:
"""Return pairs of corresponding tasmin-tasmax files based on their filename."""
tasmin_files = [f for f in filenames if "tasmin" in f.name.lower()]
tasmax_files = [f for f in filenames if "tasmax" in f.name.lower()]
for tasmin in tasmin_files[:]:
for tasmax in tasmax_files[:]:
if tasmin.name.lower() == tasmax.name.lower().replace("tasmax", "tasmin"):
yield tasmin, tasmax
tasmax_files.remove(tasmax)
tasmin_files.remove(tasmin)
break
for f in tasmax_files + tasmax_files:
sentry_sdk.capture_message(
f"Couldn't find matching tasmin or tasmax for: {f}", level="error"
)
def fix_broken_time_index(ds: xr.Dataset):
"""Fix for a single broken index in a specific file."""
if "time" not in ds.dims:
return
time_dim = ds.time.values
times_are_encoded = "units" in ds.time.attrs
if times_are_encoded:
wrong_id = np.argwhere(np.isclose(time_dim, 0))
else:
if ds.time.dt.calendar != "noleap":
return
wrong_id = np.argwhere(
time_dim == cftime.DatetimeNoLeap(year=1850, month=1, day=1, hour=0)
)
if wrong_id.size == 0:
return
wrong_id = wrong_id[0, 0]
if wrong_id == 0 or wrong_id == len(ds.time) - 1:
return
daily_gap = 1.0 if times_are_encoded else timedelta(days=1)
is_daily = time_dim[wrong_id + 1] - time_dim[wrong_id - 1] == daily_gap * 2
if is_daily:
fixed_time = time_dim
fixed_time[wrong_id] = time_dim[wrong_id - 1] + daily_gap
attrs = ds.time.attrs
ds["time"] = fixed_time
ds.time.attrs = attrs
def dataset_to_netcdf(
ds: xr.Dataset, output_path: Union[Path, str], compression_level=0
) -> None:
"""Write an :py:class:`xarray.Dataset` dataset to disk, optionally using compression."""
encoding = {}
if "time" in ds.dims:
encoding["time"] = {
"dtype": "single", # better compatibility with OpenDAP in thredds
}
fix_broken_time_index(ds)
if compression_level:
for v in ds.data_vars:
encoding[v] = {"zlib": True, "complevel": compression_level}
# Perform computations
ds.load()
# This is necessary when running with gunicorn to avoid lock-ups
ds.to_netcdf(str(output_path), format="NETCDF4", encoding=encoding)
def update_history(
hist_str: str,
*inputs_list: Union[xr.DataArray, xr.Dataset],
new_name: Optional[str] = None,
**inputs_kws: Union[xr.DataArray, xr.Dataset],
):
r"""Return a history string with the timestamped message and the combination of the history of all inputs.
The new history entry is formatted as "[<timestamp>] <new_name>: <hist_str> - finch version : <finch version>."
Parameters
----------
hist_str : str
The string describing what has been done on the data.
new_name : Optional[str]
The name of the newly created variable or dataset to prefix hist_msg.
\*inputs_list : Union[xr.DataArray, xr.Dataset]
The datasets or variables that were used to produce the new object.
Inputs given that way will be prefixed by their "name" attribute if available.
**inputs_kws : Union[xr.DataArray, xr.Dataset]
Mapping from names to the datasets or variables that were used to produce the new object.
Inputs given that way will be prefixes by the passed name.
Returns
-------
str
The combine history of all inputs starting with `hist_str`.
See Also
--------
merge_attributes
"""
from finch import __version__ # pylint: disable=cyclic-import
merged_history = xclim.core.formatting.merge_attributes(
"history",
*inputs_list,
new_line="\n",
missing_str="",
**inputs_kws,
)
if len(merged_history) > 0 and not merged_history.endswith("\n"):
merged_history += "\n"
merged_history += (
f"[{datetime.now():%Y-%m-%d %H:%M:%S}] {new_name or ''}: "
f"{hist_str} - finch version: {__version__}."
)
return merged_history
def valid_filename(name: Union[Path, str]) -> Union[Path, str]:
"""Remove unsupported characters from a filename.
Returns
-------
str or Path
Examples
--------
>>> valid_filename("summer's tasmin.nc")
'summers_tasmin.nc'
"""
p = Path(name)
s = slugify(p.stem, separator="_")
if not s:
raise ValueError(f"Filename not valid. Got {name}.")
out = p.parent / (s + p.suffix)
if isinstance(name, str):
return str(out)
return out