{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Earth System Grid Federation Data Access\n", "\n", "The Earth System Grid Federation (ESGF) has a search API that can be used by clients to query catalog content matching constraints (see [API documentation](https://github.com/ESGF/esgf.github.io/wiki/ESGF_Search_REST_API)). It's possible to send requests directly to the API using a simple function (see [example](https://esgf2.github.io/cmip6-cookbook/notebooks/foundations/esgf-opendap.html)), but here we'll use a python client named `pyesgf` to interact with the search API and get data from the ESGF THREDDS servers. The following shows examples of typical queries for data. \n", "\n", "If a login username and credentials are required, follow these [instructions](https://esgf-pyclient.readthedocs.io/en/latest/notebooks/examples/logon.html)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of results: 10114\n", "Variables related to humidity: \n" ] }, { "data": { "text/plain": [ "{'tnhusscpbl': 157,\n", " 'tnhusscp': 70,\n", " 'tnhuspbl': 70,\n", " 'tnhusmp': 166,\n", " 'tnhusd': 34,\n", " 'tnhusc': 218,\n", " 'tnhusa': 174,\n", " 'tnhus': 76,\n", " 'hussLut': 34,\n", " 'huss': 1918,\n", " 'hus850': 164,\n", " 'hus': 2294,\n", " 'hursmin': 642,\n", " 'hursmax': 627,\n", " 'hurs': 1918,\n", " 'hur': 1552}" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "\n", "from pyesgf.search import SearchConnection\n", "\n", "# Create a connection for search on ESGF nodes. Note that setting `distrib=True` can lead to weird failures.\n", "conn = SearchConnection(\"https://esgf-node.llnl.gov/esg-search/\", distrib=False)\n", "\n", "# Launch a search query.\n", "# Here we're looking for any variable related to humidity within the CMIP6 SSP2-4.5 experiment.\n", "# Results will be stored in a dictionary with keys defined by the `facets` argument.\n", "ctx = conn.new_context(\n", " project=\"CMIP6\",\n", " experiment_id=\"ssp245\",\n", " query=\"humidity\",\n", " facets=\"variable_id,source_id\",\n", ")\n", "\n", "print(\"Number of results: \", ctx.hit_count)\n", "print(\"Variables related to humidity: \")\n", "ctx.facet_counts[\"variable_id\"]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'UKESM1-0-LL': 123,\n", " 'TaiESM1': 20,\n", " 'NorESM2-MM': 36,\n", " 'NorESM2-LM': 186,\n", " 'NESM3': 21,\n", " 'MRI-ESM2-0': 663,\n", " 'MPI-ESM1-2-LR': 799,\n", " 'MPI-ESM1-2-HR': 51,\n", " 'MIROC6': 1989,\n", " 'MIROC-ES2L': 1861,\n", " 'MCM-UA-1-0': 8,\n", " 'KIOST-ESM': 30,\n", " 'KACE-1-0-G': 68,\n", " 'IPSL-CM6A-LR': 198,\n", " 'INM-CM5-0': 26,\n", " 'INM-CM4-8': 26,\n", " 'IITM-ESM': 17,\n", " 'HadGEM3-GC31-LL': 110,\n", " 'GISS-E2-2-G': 40,\n", " 'GISS-E2-1-H': 84,\n", " 'GISS-E2-1-G-CC': 4,\n", " 'GISS-E2-1-G': 325,\n", " 'GFDL-ESM4': 20,\n", " 'GFDL-CM4': 34,\n", " 'FIO-ESM-2-0': 16,\n", " 'FGOALS-g3': 88,\n", " 'FGOALS-f3-L': 12,\n", " 'EC-Earth3-Veg-LR': 37,\n", " 'EC-Earth3-Veg': 100,\n", " 'EC-Earth3-CC': 64,\n", " 'EC-Earth3': 817,\n", " 'E3SM-1-1': 22,\n", " 'CanESM5-CanOE': 24,\n", " 'CanESM5-1': 30,\n", " 'CanESM5': 1033,\n", " 'CNRM-ESM2-1': 108,\n", " 'CNRM-CM6-1-HR': 19,\n", " 'CNRM-CM6-1': 78,\n", " 'CMCC-ESM2': 20,\n", " 'CMCC-CM2-SR5': 18,\n", " 'CIESM': 9,\n", " 'CESM2-WACCM': 168,\n", " 'CESM2': 184,\n", " 'CAS-ESM2-0': 20,\n", " 'CAMS-CSM1-0': 6,\n", " 'BCC-CSM2-MR': 20,\n", " 'AWI-CM-1-1-MR': 19,\n", " 'ACCESS-ESM1-5': 406,\n", " 'ACCESS-CM2': 57}" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "\n", "# Now let's look for simulations that have the `hurs` variable and pick the first member.\n", "ctx.constrain(variable_id=\"hurs\", ensemble=\"r1i1p1f1\")\n", "ctx.facet_counts[\"source_id\"]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'CMIP6.ScenarioMIP.CCCma.CanESM5.ssp245.r14i1p2f1.Amon.hus.gn.v20190429|crd-esgf-drc.ec.gc.ca'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# We can now refine the search and get datasets corresponding within our search context\n", "results = ctx.constrain(source_id=\"CanESM5\").search()\n", "r = results[0]\n", "r.dataset_id" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['http://crd-esgf-drc.ec.gc.ca/thredds/fileServer/esgD_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp245/r14i1p2f1/Amon/hus/gn/v20190429/hus_Amon_CanESM5_ssp245_r14i1p2f1_gn_201501-210012.nc']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# To get file download links, there's an extra step\n", "file_ctx = r.file_context()\n", "file_ctx.facets = \"*\"\n", "files = file_ctx.search()\n", "[f.download_url for f in files]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['http://crd-esgf-drc.ec.gc.ca/thredds/dodsC/esgD_dataroot/AR6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp245/r14i1p2f1/Amon/hus/gn/v20190429/hus_Amon_CanESM5_ssp245_r14i1p2f1_gn_201501-210012.nc']\n", "None\n" ] } ], "source": [ "# Instead of a download URL, we can also get OPeNDAP links.\n", "urls = [f.opendap_url for f in files]\n", "print(urls)\n", "\n", "# It's sometimes possible to request aggregations of multiple netCDF into one OPeNDAP link,\n", "# but this option is often unavailable.\n", "agg_ctx = r.aggregation_context()\n", "agg_ctx.facets = \"*\"\n", "agg = agg_ctx.search()[0]\n", "print(agg.opendap_url)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:    (time: 1032, bnds: 2, plev: 19, lat: 64, lon: 128)\n",
       "Coordinates:\n",
       "  * time       (time) object 2015-01-16 12:00:00 ... 2100-12-16 12:00:00\n",
       "  * plev       (plev) float64 1e+05 9.25e+04 8.5e+04 7e+04 ... 1e+03 500.0 100.0\n",
       "  * lat        (lat) float64 -87.86 -85.1 -82.31 -79.53 ... 82.31 85.1 87.86\n",
       "  * lon        (lon) float64 0.0 2.812 5.625 8.438 ... 348.8 351.6 354.4 357.2\n",
       "Dimensions without coordinates: bnds\n",
       "Data variables:\n",
       "    time_bnds  (time, bnds) object dask.array<chunksize=(1032, 2), meta=np.ndarray>\n",
       "    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(64, 2), meta=np.ndarray>\n",
       "    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(128, 2), meta=np.ndarray>\n",
       "    hus        (time, plev, lat, lon) float32 dask.array<chunksize=(1032, 19, 64, 128), meta=np.ndarray>\n",
       "Attributes: (12/54)\n",
       "    CCCma_model_hash:                fc4bb7db954c862d023b546e19aec6c588bc0552\n",
       "    CCCma_parent_runid:              p2-his14\n",
       "    CCCma_pycmor_hash:               26c970628162d607fffd14254956ebc6dd3b6f49\n",
       "    CCCma_runid:                     p2-s4514\n",
       "    Conventions:                     CF-1.7 CMIP-6.2\n",
       "    YMDH_branch_time_in_child:       2015:01:01:00\n",
       "    ...                              ...\n",
       "    variable_id:                     hus\n",
       "    variant_label:                   r14i1p2f1\n",
       "    version:                         v20190429\n",
       "    license:                         CMIP6 model data produced by The Governm...\n",
       "    cmor_version:                    3.5.0\n",
       "    DODS_EXTRA.Unlimited_Dimension:  time
" ], "text/plain": [ "\n", "Dimensions: (time: 1032, bnds: 2, plev: 19, lat: 64, lon: 128)\n", "Coordinates:\n", " * time (time) object 2015-01-16 12:00:00 ... 2100-12-16 12:00:00\n", " * plev (plev) float64 1e+05 9.25e+04 8.5e+04 7e+04 ... 1e+03 500.0 100.0\n", " * lat (lat) float64 -87.86 -85.1 -82.31 -79.53 ... 82.31 85.1 87.86\n", " * lon (lon) float64 0.0 2.812 5.625 8.438 ... 348.8 351.6 354.4 357.2\n", "Dimensions without coordinates: bnds\n", "Data variables:\n", " time_bnds (time, bnds) object dask.array\n", " lat_bnds (lat, bnds) float64 dask.array\n", " lon_bnds (lon, bnds) float64 dask.array\n", " hus (time, plev, lat, lon) float32 dask.array\n", "Attributes: (12/54)\n", " CCCma_model_hash: fc4bb7db954c862d023b546e19aec6c588bc0552\n", " CCCma_parent_runid: p2-his14\n", " CCCma_pycmor_hash: 26c970628162d607fffd14254956ebc6dd3b6f49\n", " CCCma_runid: p2-s4514\n", " Conventions: CF-1.7 CMIP-6.2\n", " YMDH_branch_time_in_child: 2015:01:01:00\n", " ... ...\n", " variable_id: hus\n", " variant_label: r14i1p2f1\n", " version: v20190429\n", " license: CMIP6 model data produced by The Governm...\n", " cmor_version: 3.5.0\n", " DODS_EXTRA.Unlimited_Dimension: time" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Open the opendap link with xarray\n", "import xarray as xr\n", "\n", "ds = xr.open_mfdataset(urls)\n", "ds" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 4 }