PAVICS catalog search¶
To find files that meet constraints, PAVICS offer a process called pavicssearch
that searches through a catalog for files matching user-defined criteria. The information for each file is scraped from the attributes of each netCDF file.
---------------------------------------------------------------------------
ServiceException Traceback (most recent call last)
Cell In[1], line 6
3 from birdy import WPSClient
5 url = "https://pavics.ouranos.ca/twitcher/ows/proxy/catalog/wps"
----> 6 wps = WPSClient(url)
7 help(wps.pavicsearch)
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/birdy/client/base.py:147, in WPSClient.__init__(self, url, processes, converters, username, password, headers, auth, verify, cert, progress, version, caps_xml, desc_xml, language, lineage, **kwds)
133 self._wps = WebProcessingService(
134 url,
135 version=version,
(...)
143 **kwds,
144 )
146 try:
--> 147 self._wps.getcapabilities(xml=caps_xml)
148 except ServiceException as e:
149 if "AccessForbidden" in str(e):
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/owslib/wps.py:277, in WebProcessingService.getcapabilities(self, xml)
275 self._capabilities = reader.readFromString(xml)
276 else:
--> 277 self._capabilities = reader.readFromUrl(
278 self.url, headers=self.headers)
280 log.debug(element_to_string(self._capabilities))
282 # populate the capabilities metadata obects from the XML tree
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/owslib/wps.py:546, in WPSCapabilitiesReader.readFromUrl(self, url, username, password, headers, verify, cert)
539 def readFromUrl(self, url, username=None, password=None,
540 headers=None, verify=None, cert=None):
541 """
542 Method to get and parse a WPS capabilities document, returning an elementtree instance.
543
544 :param str url: WPS service base url, to which is appended the HTTP parameters: service, version, and request.
545 """
--> 546 return self._readFromUrl(url,
547 {'service': 'WPS', 'request':
548 'GetCapabilities', 'version': self.version},
549 self.timeout,
550 username=username, password=password,
551 headers=headers, verify=verify, cert=cert)
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/owslib/wps.py:503, in WPSReader._readFromUrl(self, url, data, timeout, method, username, password, headers, verify, cert)
501 # split URL into base url and query string to use utility function
502 spliturl = request_url.split('?')
--> 503 u = openURL(spliturl[0], spliturl[
504 1], method='Get', username=self.auth.username, password=self.auth.password,
505 headers=headers, verify=self.auth.verify, cert=self.auth.cert, timeout=self.timeout)
506 return etree.fromstring(u.read())
508 elif method == 'Post':
File ~/checkouts/readthedocs.org/user_builds/pavics-sdi/conda/latest/lib/python3.10/site-packages/owslib/util.py:212, in openURL(url_base, data, method, cookies, username, password, timeout, headers, verify, cert, auth)
209 req = requests.request(method.upper(), url_base, headers=headers, **rkwargs)
211 if req.status_code in [400, 401]:
--> 212 raise ServiceException(req.text)
214 if req.status_code in [404, 500, 502, 503, 504]: # add more if needed
215 req.raise_for_status()
ServiceException: <?xml version="1.0" encoding="utf-8"?>
<ExceptionReport version="1.0.0"
xmlns="http://www.opengis.net/ows/1.1"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.opengis.net/ows/1.1 http://schemas.opengis.net/ows/1.1.0/owsExceptionReport.xsd">
<Exception exceptionCode="NoApplicableCode" locator="NotAcceptable">
<ExceptionText>Request failed: HTTPConnectionPool(host='pavics.ouranos.ca', port=8086): Max retries exceeded with url: /pywps/wps?service=WPS&request=GetCapabilities&version=1.0.0 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f602326de50>: Failed to establish a new connection: [Errno 111] Connection refused'))</ExceptionText>
</Exception>
</ExceptionReport>
Potential search constraints are:
project
experiment
model
frequency
variable
variable_long_name
units
institute
Note that the rip label (realization, initialization, physics), e.g. r5i1p1, is missing from search facets.
The process returns an output dictionary storing the search facets of each file found, as well as a simple list of the links.
Note that it is important to specify type="File"
, otherwise the process will look for datasets, ie file aggregations. At the moment, very few aggregations are available on the PAVICS data server.
# NBVAL_IGNORE_OUTPUT
resp = wps.pavicsearch(
constraints="variable:tasmax,project:CMIP5,experiment:rcp45,model:MPI-ESM-MR,institute:MPI-M,frequency:mon",
limit=100,
type="File",
)
[result, files] = resp.get(asobj=True)
files
['https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/cmip5/MPI-M/MPI-ESM-MR/rcp45/mon/atmos/r2i1p1/tasmax/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-210012.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/secure/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200601-200612.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200701-200712.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/secure/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/cmip5/MPI-M/MPI-ESM-MR/rcp45/mon/atmos/r3i1p1/tasmax/tasmax_Amon_MPI-ESM-MR_rcp45_r3i1p1_200601-210012.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200601-200612.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/secure/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200701-200712.nc',
'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/cmip5/MPI-M/MPI-ESM-MR/rcp45/mon/atmos/r1i1p1/tasmax/tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200601-210012.nc']
# NBVAL_IGNORE_OUTPUT
searchfile = [
f
for f in result["response"]["docs"]
if f["resourcename"]
== "birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc"
]
searchfile[0]
{'cf_standard_name': ['air_temperature'],
'abstract': 'birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'replica': False,
'wms_url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/ncWMS2/wms?SERVICE=WMS&REQUEST=GetCapabilities&VERSION=1.3.0&DATASET=outputs/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'keywords': ['air_temperature',
'mon',
'application/netcdf',
'tasmax',
'thredds',
'CMIP5',
'rcp45',
'MPI-ESM-MR',
'MPI-M'],
'dataset_id': 'testdata.flyingpigeon.cmip5',
'datetime_max': '2006-12-16T12:00:00Z',
'id': '44b680cec0a7d4cc',
'subject': 'Birdhouse Thredds Catalog',
'category': 'thredds',
'opendap_url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'title': 'tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'variable_palette': ['default'],
'variable_min': [0],
'variable_long_name': ['Daily Maximum Near-Surface Air Temperature'],
'source': 'https://pavics.ouranos.ca//twitcher/ows/proxy/thredds/catalog.xml',
'datetime_min': '2006-01-16T12:00:00Z',
'score': 1.0,
'variable_max': [1],
'units': ['K'],
'resourcename': 'birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'type': 'File',
'catalog_url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/flyingpigeon/cmip5/catalog.xml?dataset=birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'experiment': 'rcp45',
'last_modified': '2018-12-21T15:13:38Z',
'content_type': 'application/netcdf',
'_version_': 1658705594373111809,
'variable': ['tasmax'],
'url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc',
'project': 'CMIP5',
'institute': 'MPI-M',
'frequency': 'mon',
'model': 'MPI-ESM-MR',
'latest': True,
'fileserver_url': 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc'}
for k in sorted(searchfile[0].keys()):
# remove attributes that changes between different servers for the same file
if k not in ["id", "last_modified", "_version_", "source"]:
value = searchfile[0][k]
valuesorted = (
sorted(value)
if (
isinstance(value, collections.abc.Iterable)
and not isinstance(value, str)
)
else value
)
print(f"{k}: {valuesorted}")
abstract: birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
catalog_url: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/flyingpigeon/cmip5/catalog.xml?dataset=birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
category: thredds
cf_standard_name: ['air_temperature']
content_type: application/netcdf
dataset_id: testdata.flyingpigeon.cmip5
datetime_max: 2006-12-16T12:00:00Z
datetime_min: 2006-01-16T12:00:00Z
experiment: rcp45
fileserver_url: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
frequency: mon
institute: MPI-M
keywords: ['CMIP5', 'MPI-ESM-MR', 'MPI-M', 'air_temperature', 'application/netcdf', 'mon', 'rcp45', 'tasmax', 'thredds']
latest: True
model: MPI-ESM-MR
opendap_url: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
project: CMIP5
replica: False
resourcename: birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
score: 1.0
subject: Birdhouse Thredds Catalog
title: tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
type: File
units: ['K']
url: https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc
variable: ['tasmax']
variable_long_name: ['Daily Maximum Near-Surface Air Temperature']
variable_max: [1]
variable_min: [0]
variable_palette: ['default']
wms_url: https://pavics.ouranos.ca/twitcher/ows/proxy/ncWMS2/wms?SERVICE=WMS&REQUEST=GetCapabilities&VERSION=1.3.0&DATASET=outputs/testdata/flyingpigeon/cmip5/tasmax_Amon_MPI-ESM-MR_rcp45_r2i1p1_200601-200612.nc