PAVICS catalog search¶
To find files that meet constraints, PAVICS offer a process called pavicssearch
that searches through a catalog for files matching user-defined criteria. The information for each file is scraped from the attributes of each netCDF file.
---------------------------------------------------------------------------
ServiceException Traceback (most recent call last)
Cell In[1], line 6
3 from birdy import WPSClient
5 url = "https://pavics.ouranos.ca/twitcher/ows/proxy/catalog/wps"
----> 6 wps = WPSClient(url)
7 help(wps.pavicsearch)
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/birdy/client/base.py:147, in WPSClient.__init__(self, url, processes, converters, username, password, headers, auth, verify, cert, progress, version, caps_xml, desc_xml, language, lineage, **kwds)
133 self._wps = WebProcessingService(
134 url,
135 version=version,
(...)
143 **kwds,
144 )
146 try:
--> 147 self._wps.getcapabilities(xml=caps_xml)
148 except ServiceException as e:
149 if "AccessForbidden" in str(e):
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/owslib/wps.py:278, in WebProcessingService.getcapabilities(self, xml)
276 self._capabilities = reader.readFromString(xml)
277 else:
--> 278 self._capabilities = reader.readFromUrl(
279 self.url, headers=self.headers)
281 LOGGER.debug(element_to_string(self._capabilities))
283 # populate the capabilities metadata obects from the XML tree
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/owslib/wps.py:547, in WPSCapabilitiesReader.readFromUrl(self, url, username, password, headers, verify, cert)
540 def readFromUrl(self, url, username=None, password=None,
541 headers=None, verify=None, cert=None):
542 """
543 Method to get and parse a WPS capabilities document, returning an elementtree instance.
544
545 :param str url: WPS service base url, to which is appended the HTTP parameters: service, version, and request.
546 """
--> 547 return self._readFromUrl(url,
548 {'service': 'WPS', 'request':
549 'GetCapabilities', 'version': self.version},
550 self.timeout,
551 username=username, password=password,
552 headers=headers, verify=verify, cert=cert)
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/owslib/wps.py:504, in WPSReader._readFromUrl(self, url, data, timeout, method, username, password, headers, verify, cert)
502 # split URL into base url and query string to use utility function
503 spliturl = request_url.split('?')
--> 504 u = openURL(spliturl[0], spliturl[
505 1], method='Get', username=self.auth.username, password=self.auth.password,
506 headers=headers, verify=self.auth.verify, cert=self.auth.cert, timeout=self.timeout)
507 return etree.fromstring(u.read())
509 elif method == 'Post':
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/owslib/util.py:210, in openURL(url_base, data, method, cookies, username, password, timeout, headers, verify, cert, auth)
207 req = requests.request(method.upper(), url_base, headers=headers, **rkwargs)
209 if req.status_code in [400, 401, 403]:
--> 210 raise ServiceException(req.text)
212 if req.status_code in [404, 500, 502, 503, 504]: # add more if needed
213 req.raise_for_status()
ServiceException: <?xml version="1.0" encoding="utf-8"?>
<ExceptionReport version="1.0.0"
xmlns="http://www.opengis.net/ows/1.1"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.opengis.net/ows/1.1 http://schemas.opengis.net/ows/1.1.0/owsExceptionReport.xsd">
<Exception exceptionCode="NoApplicableCode" locator="NotAcceptable">
<ExceptionText>Request failed: HTTPConnectionPool(host='pavics.ouranos.ca', port=8086): Max retries exceeded with url: /pywps/wps?service=WPS&request=GetCapabilities&version=1.0.0 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fc35acb00d0>: Failed to establish a new connection: [Errno 111] Connection refused'))</ExceptionText>
</Exception>
</ExceptionReport>
Potential search constraints are:
project
experiment
model
frequency
variable
variable_long_name
units
institute
Note that the rip label (realization, initialization, physics), e.g. r5i1p1, is missing from search facets.
The process returns an output dictionary storing the search facets of each file found, as well as a simple list of the links.
Note that it is important to specify type="File"
, otherwise the process will look for datasets, ie file aggregations. At the moment, very few aggregations are available on the PAVICS data server.
# NBVAL_IGNORE_OUTPUT
resp = wps.pavicsearch(
constraints="variable:tasmax,project:CMIP5,experiment:rcp45,model:MPI-ESM-MR,institute:MPI-M,frequency:mon",
limit=100,
type="File",
)
[result, files] = resp.get(asobj=True)
files
['https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/cmip5/MPI-M/MPI-ESM-MR/rcp45/mon/atmos/r2i1p1/tasmax/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-210012.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/secure/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200601-200612.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200701-200712.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/secure/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/cmip5/MPI-M/MPI-ESM-MR/rcp45/mon/atmos/r3i1p1/tasmax/tasmax_Amon_MPI-ESM-MR_rcp45_r3i1p1_200601-210012.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200601-200612.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/secure/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200701-200712.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/cmip5/MPI-M/MPI-ESM-MR/rcp45/mon/atmos/r1i1p1/tasmax/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200601-210012.nc']
# NBVAL_IGNORE_OUTPUT
searchfile = [
f
for f in result["response"]["docs"]
if f["resourcename"]
== "birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc"
]
searchfile[0]
{'cf_standard_name': ['air_temperature'],
'abstract': 'birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'replica': False,
'wms_url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/ncWMS2/wms?SERVICE=WMS&REQUEST=GetCapabilities&VERSION=1.3.0&DATASET=outputs/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'keywords': ['air_temperature',
'mon',
'application/netcdf',
'tasmax',
'thredds',
'CMIP5',
'rcp45',
'MPI-ESM-MR',
'MPI-M'],
'dataset_id': 'testdata.flyingpigeon.cmip5',
'datetime_max': '2006-12-16T12:00:00Z',
'id': '44b680cec0a7d4cc',
'subject': 'Birdhouse Thredds Catalog',
'category': 'thredds',
'opendap_url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'title': 'tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'variable_palette': ['default'],
'variable_min': [0],
'variable_long_name': ['Daily Maximum Near-Surface Air Temperature'],
'source': 'https://pavics.ouranos.ca//twitcher/ows/proxy/thredds/catalog.xml',
'datetime_min': '2006-01-16T12:00:00Z',
'score': 1.0,
'variable_max': [1],
'units': ['K'],
'resourcename': 'birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'type': 'File',
'catalog_url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/flyingpigeon/cmip5/catalog.xml?dataset=birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'experiment': 'rcp45',
'last_modified': '2018-12-21T15:13:38Z',
'content_type': 'application/netcdf',
'_version_': 1658705594373111809,
'variable': ['tasmax'],
'url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'project': 'CMIP5',
'institute': 'MPI-M',
'frequency': 'mon',
'model': 'MPI-ESM-MR',
'latest': True,
'fileserver_url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc'}
for k in sorted(searchfile[0].keys()):
# remove attributes that changes between different servers for the same file
if k not in ["id", "last_modified", "_version_", "source"]:
value = searchfile[0][k]
valuesorted = (
sorted(value)
if (
isinstance(value, collections.abc.Iterable)
and not isinstance(value, str)
)
else value
)
print(f"{k}: {valuesorted}")
abstract: birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
catalog_url: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/flyingpigeon/cmip5/catalog.xml?dataset=birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
category: thredds
cf_standard_name: ['air_temperature']
content_type: application/netcdf
dataset_id: testdata.flyingpigeon.cmip5
datetime_max: 2006-12-16T12:00:00Z
datetime_min: 2006-01-16T12:00:00Z
experiment: rcp45
fileserver_url: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
frequency: mon
institute: MPI-M
keywords: ['CMIP5', 'MPI-ESM-MR', 'MPI-M', 'air_temperature', 'application/netcdf', 'mon', 'rcp45', 'tasmax', 'thredds']
latest: True
model: MPI-ESM-MR
opendap_url: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
project: CMIP5
replica: False
resourcename: birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
score: 1.0
subject: Birdhouse Thredds Catalog
title: tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
type: File
units: ['K']
url: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
variable: ['tasmax']
variable_long_name: ['Daily Maximum Near-Surface Air Temperature']
variable_max: [1]
variable_min: [0]
variable_palette: ['default']
wms_url: https://pavics.ouranos.ca/twitcher/ows/proxy/ncWMS2/wms?SERVICE=WMS&REQUEST=GetCapabilities&VERSION=1.3.0&DATASET=outputs/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc