diff --git a/PRR/smartch4_signatures_modified.ipynb b/PRR/smartch4_signatures_modified.ipynb new file mode 100755 index 0000000..c299541 --- /dev/null +++ b/PRR/smartch4_signatures_modified.ipynb @@ -0,0 +1,11203 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1f92ff5a", + "metadata": {}, + "source": [ + "# SmartCH4 project - ÎŽ13CH4 signatures\n", + "\n", + "This notebook shows how to \n", + "\n", + "1) Combine a number of .nc files into a .zarr\n", + "2) generate a valid STAC collection, which is a requirement to upload research outcomes to the [ESA Project Results Repository (PRR)](https://eoresults.esa.int/). The code below demonstrates how to perform the necessary steps using real data from the ESA project **SMART-CH4**. The focus of SMART-CH4 is to build upon previous experience and projects in satellite-based methane quantification, aiming to enhance emission products derived from satellites.\n", + "\n", + "\n", + "Check the [EarthCODE documentation](https://earthcode.esa.int/), and [PRR STAC introduction example](https://esa-earthcode.github.io/tutorials/prr-stac-introduction) for a more general introduction to STAC and the ESA PRR.\n", + "\n", + "\n", + "\n", + "🔗 Check the project website: [SMART-CH4 – Website](https://smart-ch4.lsce.ipsl.fr/) \n", + "\n", + "\n", + "🔗 Check the eo4society page: [SMART-CH4 – eo4society](https://eo4society.esa.int/projects/smart-ch4/)\n", + "\n", + "\n", + "#### Acknowledgment \n", + "We gratefully acknowledge the **SMART-CH4** for providing access to the data used in this example, as well as their support in creating it." + ] + }, + { + "cell_type": "markdown", + "id": "a77cab24", + "metadata": {}, + "source": [ + "# 1. Combine the data into a single .zarr file" + ] + }, + { + "cell_type": "code", + "execution_count": 244, + "id": "3333ec9c", + "metadata": {}, + "outputs": [], + "source": [ + "# import libraries\n", + "import xarray as xr\n", + "from pystac import Item, Collection\n", + "import pystac\n", + "from datetime import datetime\n", + "from shapely.geometry import box, mapping\n", + "from xstac import xarray_to_stac\n", + "import glob\n", + "import json\n", + "import shapely\n", + "import numpy as np\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "import os\n", + "import scipy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5bdd14f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/home/etapin/TĂ©lĂ©chargements/5_zipped/data',\n", + " '/home/etapin/TĂ©lĂ©chargements/5_zipped/metadata']" + ] + }, + "execution_count": 245, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import glob\n", + "datadir = '/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/*'\n", + "subdirectories = glob.glob(datadir)\n", + "subdirectories" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81575763", + "metadata": {}, + "outputs": [], + "source": [ + "agw_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-agw' + '/*')\n", + "bb_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-bb' + '/*')\n", + "ff_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-ff' + '/*')\n", + "nat_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-nat' + '/*')\n", + "wet_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-wet' + '/*')\n", + "\n", + "short_names = [\"agw\", \"bb\", \"ffg\", \"nat\", \"wet\"]\n", + "long_names = [\n", + " \"agricultural and waste\",\n", + " \"biomass burning\",\n", + " \"fossil fuel and geological\",\n", + " \"natural non-wetland\",\n", + " \"wetlands\"\n", + "]\n", + "\n", + "subsector_files = glob.glob('/mnt/c/Users/krase/Downloads/smart_CH4_d13c_ch4/5_zipped/data/smart_CH4_d13c_ch4-subsectors' + '/*')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ac4b6ff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2022.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2021.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2020.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2002.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2016.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2011.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2017.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2013.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2001.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2008.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2009.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2012.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2000.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.1998.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2006.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2018.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2004.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2005.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2010.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2003.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2007.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2014.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2019.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.1999.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-agw/d13c.AGW.2015.1x1.nc']\n", + "['/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2020.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2016.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2010.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2003.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2019.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2014.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2007.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2021.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2005.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2002.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2000.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2017.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2009.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2008.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2011.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.1999.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2001.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2013.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2004.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2022.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.1998.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2015.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2012.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2006.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-bb/d13c.BB.2018.1x1.nc']\n", + "['/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2004.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2005.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2001.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2015.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2006.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2021.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2009.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2020.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2008.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2014.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2016.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2010.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2019.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.1999.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2017.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2022.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.1998.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2012.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2013.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2003.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2000.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2002.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2018.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2011.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-ff/d13c.FF.2007.1x1.nc']\n", + "['/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2012.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.1998.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2013.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2015.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2008.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2009.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2004.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2001.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2021.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2014.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2016.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2006.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2019.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2018.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2010.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2003.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2000.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2022.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2017.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2007.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.1999.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2011.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2020.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2002.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-nat/d13c.NAT.2005.1x1.nc']\n", + "['/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2015.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2003.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2018.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2000.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2008.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2006.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2010.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2002.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2014.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2022.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2020.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2004.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.1998.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2001.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2021.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2009.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2019.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2011.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.1999.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2007.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2013.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2016.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2005.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2012.1x1.nc', '/home/etapin/TĂ©lĂ©chargements/5_zipped/data/smart_CH4_d13c_ch4-wet/d13c.WET.2017.1x1.nc']\n" + ] + } + ], + "source": [ + "dss = []\n", + "\n", + "for sname,lname,files in zip(short_names, long_names, [agw_files, bb_files, ff_files, nat_files, wet_files]):\n", + " print(files)\n", + " ds = xr.open_mfdataset(\n", + " files,\n", + " concat_dim=\"time\",\n", + " combine=\"nested\",\n", + " compat=\"equals\",\n", + " join=\"exact\"\n", + " )\n", + " # replace acronym with long name\n", + " ds.sign_d13c.attrs['long_name'] = ds.sign_d13c.long_name.replace(sname.upper(), lname)\n", + " # assign all the sigma values to each variable\n", + " ds.sign_d13c.attrs.update({k:v for k,v in ds.attrs.items() if k.startswith('sigma_')})\n", + " # rename sign_d13c to have accronym extension\n", + " ds = ds.rename_vars({'sign_d13c': f'sign_d13c_{sname}'})\n", + "\n", + " ds.attrs = {\n", + " \"title\": \"d13C(CH4) isotopic signature\",\n", + " \"history\": \"Sub-sectors aggregated into sectors (flux-weighted). NaN values filled with flux-weighted global mean.\",\n", + " \"time_period\": \"Monthly data for 1998-2022\",\n", + " \"fluxes_source_for_flux_weighted\": \"AGRICULTURAL_WASTE, BIOFUELS, BIOMASS, COAL, LANDFILLS, LIVESTOCK, OILGASIND, RICE, WASTE_WATER: EDGARv8 (Crippa et al. 2023), GEOLOGICAL, OCEANIC, TERMITES, WETLAND: Prior fluxes provided with the Global Methane budget inversion protocol (Martinez et al. 2024)\",\n", + " \"signature_source\" : \"LIVESTOCK, BIOFUELS, BIOMASS: LAN ET. AL (2021), WASTE_WATER, LANDFILLS, AGRICULTURAL_WASTE, RICE: MENOUD ET AL. (2022), COAL, OILGASIND: LAN ET. AL (2021), MENOUD ET AL. (2022), WETLAND: OH ET AL. (2022), OCEANIC: SANSONE ET AL. (2001), TERMITES: THOMPSON ET AL. (2018), GEOLOGICAL: ETIOPE ET AL. (2019)\",\n", + " \"created_by\": \"Emeline Tapin on 29 September 2025\",\n", + " \"creator_email\": \"emeline.tapin@lsce.ipsl.fr\",\n", + " \"institution\": \"LSCE (Laboratoire des Sciences du Climat et de l’Environnement)\",\n", + " \"conventions\": \"CF-1.9\"\n", + " }\n", + " \n", + " dss.append(ds)" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "id": "9045a7da", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 778MB\n",
+       "Dimensions:        (latitude: 180, longitude: 360, time: 300)\n",
+       "Coordinates:\n",
+       "  * latitude       (latitude) float64 1kB -89.5 -88.5 -87.5 ... 87.5 88.5 89.5\n",
+       "  * longitude      (longitude) float64 3kB -179.5 -178.5 -177.5 ... 178.5 179.5\n",
+       "  * time           (time) datetime64[ns] 2kB 1998-01-01 ... 2022-12-01\n",
+       "Data variables:\n",
+       "    sign_d13c_agw  (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 180, 360), meta=np.ndarray>\n",
+       "    sign_d13c_bb   (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 180, 360), meta=np.ndarray>\n",
+       "    sign_d13c_ffg  (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 180, 360), meta=np.ndarray>\n",
+       "    sign_d13c_nat  (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 180, 360), meta=np.ndarray>\n",
+       "    sign_d13c_wet  (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 180, 360), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    title:                            d13C(CH4) isotopic signature\n",
+       "    history:                          Sub-sectors aggregated into sectors (fl...\n",
+       "    time_period:                      Monthly data for 1998-2022\n",
+       "    fluxes_source_for_flux_weighted:  AGRICULTURAL_WASTE, BIOFUELS, BIOMASS, ...\n",
+       "    signature_source:                 LIVESTOCK, BIOFUELS, BIOMASS: LAN ET. A...\n",
+       "    created_by:                       Emeline Tapin on 29 September 2025\n",
+       "    creator_email:                    emeline.tapin@lsce.ipsl.fr\n",
+       "    institution:                      LSCE (Laboratoire des Sciences du Clima...\n",
+       "    conventions:                      CF-1.9
" + ], + "text/plain": [ + " Size: 778MB\n", + "Dimensions: (latitude: 180, longitude: 360, time: 300)\n", + "Coordinates:\n", + " * latitude (latitude) float64 1kB -89.5 -88.5 -87.5 ... 87.5 88.5 89.5\n", + " * longitude (longitude) float64 3kB -179.5 -178.5 -177.5 ... 178.5 179.5\n", + " * time (time) datetime64[ns] 2kB 1998-01-01 ... 2022-12-01\n", + "Data variables:\n", + " sign_d13c_agw (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_bb (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_ffg (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_nat (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_wet (time, latitude, longitude) float64 156MB dask.array\n", + "Attributes:\n", + " title: d13C(CH4) isotopic signature\n", + " history: Sub-sectors aggregated into sectors (fl...\n", + " time_period: Monthly data for 1998-2022\n", + " fluxes_source_for_flux_weighted: AGRICULTURAL_WASTE, BIOFUELS, BIOMASS, ...\n", + " signature_source: LIVESTOCK, BIOFUELS, BIOMASS: LAN ET. A...\n", + " created_by: Emeline Tapin on 29 September 2025\n", + " creator_email: emeline.tapin@lsce.ipsl.fr\n", + " institution: LSCE (Laboratoire des Sciences du Clima...\n", + " conventions: CF-1.9" + ] + }, + "execution_count": 248, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = xr.merge(dss)\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 249, + "id": "9d12443f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing subsector: geological\n", + "Processing subsector: coal\n", + "Processing subsector: termites\n", + "Processing subsector: oceanic\n", + "Processing subsector: rice\n", + "Processing subsector: landfills\n", + "Processing subsector: agricultural_waste\n", + "Processing subsector: livestock\n", + "Processing subsector: wetlands\n", + "Processing subsector: biomassbiofuels\n", + "Processing subsector: waste_water\n", + "Processing subsector: oilandgas\n", + "\n", + "Merging subsector data with main dataset...\n", + "\n", + " Final dataset now contains 17 variables:\n", + " - sign_d13c_agw: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_bb: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_ffg: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_nat: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_wet: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_geological: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_coal: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_termites: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_oceanic: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_rice: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_landfills: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_agricultural_waste: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_livestock: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_wetlands: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_biomassbiofuels: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_waste_water: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n", + " - sign_d13c_sub_sector_oilandgas: ('time', 'latitude', 'longitude'), dtype=float64, chunks=((12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), (90, 90), (180, 180))\n" + ] + } + ], + "source": [ + "### Process subsectors\n", + "\n", + "# Load and process all subsector datasets\n", + "subsector_datasets = []\n", + "\n", + "for subsector_file in subsector_files:\n", + " # Extract filename and subsector name\n", + " filename = os.path.basename(subsector_file)\n", + " subsector_name = filename.split('.')[1]\n", + " print(f\"Processing subsector: {subsector_name}\")\n", + "\n", + " subsector_ds = xr.open_dataset(\n", + " subsector_file,\n", + " chunks={\"time\": 12, \"latitude\": 180, \"longitude\": 360}\n", + " )\n", + " # Check and align the time dimension\n", + " if 'time' in subsector_ds.dims:\n", + " if len(subsector_ds.time) == 24: # for annual data\n", + " subsector_ds = subsector_ds.reindex(\n", + " time=ds.time, method=None, fill_value=np.nan\n", + " )\n", + " subsector_ds = subsector_ds.ffill(dim='time')\n", + "\n", + " else: \n", + " subsector_ds = subsector_ds.interp(time=ds.time, method='linear')\n", + " else:\n", + " # Replicate the data along the main dataset time dimension\n", + " subsector_ds_list = [subsector_ds for _ in range(len(ds.time))]\n", + " subsector_ds = xr.concat(subsector_ds_list, dim='time')\n", + " subsector_ds = subsector_ds.assign_coords(time=ds.time)\n", + "\n", + " # Reorder dimensions to match (time, latitude, longitude)\n", + " subsector_ds = subsector_ds.transpose(\"time\", \"latitude\", \"longitude\")\n", + "\n", + " # Define standardized variable name\n", + " var_name = f'sign_d13c_sub_sector_{subsector_name}'\n", + " subsector_ds = subsector_ds.rename({'sign_d13c': var_name})\n", + "\n", + " # Harmonize variable attributes\n", + " subsector_ds[var_name].attrs['long_name'] = f\"Monthly ÎŽ13C(CH4) isotopic signature for {subsector_name.replace('_', ' ')}\"\n", + " subsector_ds[var_name].attrs.update({k:v for k,v in subsector_ds.attrs.items() if k.startswith('std_')})\n", + " subsector_datasets.append(subsector_ds)\n", + "\n", + "# Clean and consistent merge with main dataset\n", + "print(\"\\nMerging subsector data with main dataset...\")\n", + "for subsector_ds in subsector_datasets:\n", + " ds = xr.merge([ds, subsector_ds], compat='override', join='exact')\n", + " \n", + "# Rechunk all variables to the same chunk size\n", + "ds = ds.chunk({\"time\": 12, \"latitude\": 90, \"longitude\": 180})\n", + "\n", + "# Final verification\n", + "print(f\"\\n Final dataset now contains {len(ds.data_vars)} variables:\")\n", + "for var in ds.data_vars:\n", + " v = ds[var]\n", + " print(f\" - {var}: {v.dims}, dtype={v.dtype}, chunks={getattr(v.data, 'chunks', None)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "6161e2c3", + "metadata": {}, + "source": [ + "## 2. Generate metadata\n", + "\n", + "The root STAC Collection provides a general description of all project outputs which will be stored on the PRR.\n", + "The PRR STAC Collection template enforces some required fields that you need to provide in order to build its valid description. Most of these metadata fields should already be available and can be extracted from your data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "id": "4029fcb4-4298-4236-8413-923d6bf64c11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + " <Collection id=d13c-ch4-signatures-smart-ch4>\n", + "
\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 237, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create the parent collection\n", + "collectionid = \"d13c-ch4-signatures-smart-ch4\"\n", + "\n", + "\n", + "collection = Collection.from_dict(\n", + " \n", + "{\n", + " \"type\": \"Collection\",\n", + " \"id\": collectionid,\n", + " \"stac_version\": \"1.1.0\",\n", + " \"title\": \"Global delta-13C(CH4) Source Signatures\",\n", + " \"description\": \"This dataset provides monthly gridded ÎŽ13C(CH₄) isotopic signatures (‰ vs V-PDB) for five methane emission sectors: Agricultural & Waste (AGW), Fossil Fuel & Geological (FFG), Biomass Burning (BB), Natural non-wetland (NAT), and Wetlands (WET), and their subsequent sub-sectors, at 1°×1° spatial resolution for the period 1998–2022. Each sector file contains monthly flux-weighted ÎŽ13C(CH₄) signatures computed from prior CH₄ emissions used in the Global Methane Budget inversion protocol (Martinez et al., 2024), EDGARv8 (Crippa et al., 2023), and GFED4s (van Wees et al., 2022). Sub-sector ÎŽ13C-CH₄ source values are derived from recent literature (Lan et al., 2021; Menoud et al., 2022; Oh et al., 2022; Thompson et al., 2018; Etiope et al., 2019; Sansone et al., 2001). The dataset also includes uncertainty components: propagated (σprop), aggregation (σagg), and total (σtotal), expressed in ‰ vs V-PDB. Data are suitable for use in atmospheric inversion and isotope modeling frameworks.\",\n", + " \"extent\": {\n", + " \"spatial\": {\n", + " \"bbox\": [\n", + " [-180, -90, 180, 90]\n", + " ]\n", + " },\n", + " \"temporal\": {\n", + " \"interval\": [\n", + " [\n", + " \"1998-01-01T00:00:00Z\",\n", + " \"2022-12-31T23:59:59Z\"\n", + " ]\n", + " ]\n", + " }\n", + " },\n", + " \"license\": \"CC-BY-4.0\",\n", + " \"links\": []\n", + "\n", + "}\n", + "\n", + ")\n", + "\n", + "collection # visualise the metadata of your collection " + ] + }, + { + "cell_type": "markdown", + "id": "2ab07efc", + "metadata": {}, + "source": [ + "## Create STAC Items and STAC Assets from original dataset \n", + "\n", + "The second step is to describe the different files as STAC Items and Assets. Take your time to decide how your data should be categorised to improve usability of the data, and ensure intuitive navigation through different items in the collections. There are multiple strategies for doing this and this tutorial demonstrate one of the possible ways of doing that. Examples of how other ESA projects are doing this are available in the [EarthCODE documentation](https://esa-earthcode.github.io/tutorials/prr-stac-introduction) ." + ] + }, + { + "cell_type": "code", + "execution_count": 238, + "id": "f3030914-50f9-493e-abfd-4f7cca4fb1ab", + "metadata": {}, + "outputs": [], + "source": [ + "bbox = [-180, -90, 180, 90]\n", + "geometry = json.loads(json.dumps(shapely.box(*bbox).__geo_interface__))" + ] + }, + { + "cell_type": "code", + "execution_count": 239, + "id": "ee61a0f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + " <Link rel=item target=<Item id=d13c-ch4-signatures-smart-ch4-zarr>>\n", + "
\n", + "\n", + "
" + ], + "text/plain": [ + ">" + ] + }, + "execution_count": 239, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "template = {\n", + "\n", + " \"id\": f\"{collectionid}-zarr\",\n", + " \"type\": \"Feature\",\n", + " \"stac_version\": \"1.0.0\",\n", + " \"properties\": {\n", + " \"title\": 'd13C(CH4) isotopic signatures',\n", + " \"history\":ds.attrs['history'],\n", + " 'time_period': \"Monthly data from 1998 to 2022\",\n", + " \"fluxes_source_for_flux_weighted\": ds.attrs['fluxes_source_for_flux_weighted'],\n", + " \"signature_source\": ds.attrs['signature_source'],\n", + " \"start_datetime\": \"1998-01-01T00:00:00Z\",\n", + " \"end_datetime\": \"2022-12-31T23:59:59Z\",\n", + " \"license\": \"CC-BY-4.0\",\n", + " 'Conventions': 'CF-1.9',\n", + " \"references\": [\n", + " \"Lan et al. (2021)\",\n", + " \"Menoud et al. (2022)\",\n", + " \"Oh et al. (2022)\",\n", + " \"Thompson et al. (2018)\",\n", + " \"Etiope et al. (2019)\",\n", + " \"Sansone et al. (2001)\",\n", + " \"Crippa et al. (2023)\",\n", + " \"Martinez et al. (2024)\"\n", + " ],\n", + " },\n", + " \"geometry\": geometry,\n", + " \"bbox\": bbox,\n", + " \"assets\": {\n", + " \"data\": {\n", + " \"href\": f\"./{collectionid}/ds.zarr\", # or local path\n", + " \"type\": \"application/vnd+zarr\",\n", + " \"roles\": [\"data\"],\n", + " \"title\": \"Zarr Store of d13C(CH4) signatures\"\n", + " }\n", + " }\n", + "}\n", + "# 3. Generate the STAC Item\n", + "item = xarray_to_stac(\n", + " ds,\n", + " template,\n", + " temporal_dimension=\"time\" if 'time' in ds.coords else False,\n", + " x_dimension='longitude',\n", + " y_dimension='latitude',\n", + " reference_system=False\n", + ")\n", + "\n", + "# validate and add the STAC Item to the collection\n", + "item.validate()\n", + "collection.add_item(item)" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "id": "2c2ee4de-afd1-41fb-9afd-73ccbfe8ef98", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + " <Collection id=d13c-ch4-signatures-smart-ch4>\n", + "
\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 240, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collection" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "id": "3c358a3e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + " <Item id=d13c-ch4-signatures-smart-ch4-zarr>\n", + "
\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 241, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "item" + ] + }, + { + "cell_type": "code", + "execution_count": 242, + "id": "66fe3b40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 3GB\n",
+       "Dimensions:                                  (latitude: 180, longitude: 360,\n",
+       "                                              time: 300)\n",
+       "Coordinates:\n",
+       "  * latitude                                 (latitude) float64 1kB -89.5 ......\n",
+       "  * longitude                                (longitude) float64 3kB -179.5 ....\n",
+       "  * time                                     (time) datetime64[ns] 2kB 1998-0...\n",
+       "Data variables: (12/17)\n",
+       "    sign_d13c_agw                            (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_bb                             (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_ffg                            (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_nat                            (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_wet                            (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_sub_sector_geological          (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    ...                                       ...\n",
+       "    sign_d13c_sub_sector_agricultural_waste  (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_sub_sector_livestock           (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_sub_sector_wetlands            (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_sub_sector_biomassbiofuels     (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_sub_sector_waste_water         (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "    sign_d13c_sub_sector_oilandgas           (time, latitude, longitude) float64 156MB dask.array<chunksize=(12, 90, 180), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    title:                            d13C(CH4) isotopic signature\n",
+       "    history:                          Sub-sectors aggregated into sectors (fl...\n",
+       "    time_period:                      Monthly data for 1998-2022\n",
+       "    fluxes_source_for_flux_weighted:  AGRICULTURAL_WASTE, BIOFUELS, BIOMASS, ...\n",
+       "    signature_source:                 LIVESTOCK, BIOFUELS, BIOMASS: LAN ET. A...\n",
+       "    created_by:                       Emeline Tapin on 29 September 2025\n",
+       "    creator_email:                    emeline.tapin@lsce.ipsl.fr\n",
+       "    institution:                      LSCE (Laboratoire des Sciences du Clima...\n",
+       "    conventions:                      CF-1.9
" + ], + "text/plain": [ + " Size: 3GB\n", + "Dimensions: (latitude: 180, longitude: 360,\n", + " time: 300)\n", + "Coordinates:\n", + " * latitude (latitude) float64 1kB -89.5 ......\n", + " * longitude (longitude) float64 3kB -179.5 ....\n", + " * time (time) datetime64[ns] 2kB 1998-0...\n", + "Data variables: (12/17)\n", + " sign_d13c_agw (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_bb (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_ffg (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_nat (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_wet (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_sub_sector_geological (time, latitude, longitude) float64 156MB dask.array\n", + " ... ...\n", + " sign_d13c_sub_sector_agricultural_waste (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_sub_sector_livestock (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_sub_sector_wetlands (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_sub_sector_biomassbiofuels (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_sub_sector_waste_water (time, latitude, longitude) float64 156MB dask.array\n", + " sign_d13c_sub_sector_oilandgas (time, latitude, longitude) float64 156MB dask.array\n", + "Attributes:\n", + " title: d13C(CH4) isotopic signature\n", + " history: Sub-sectors aggregated into sectors (fl...\n", + " time_period: Monthly data for 1998-2022\n", + " fluxes_source_for_flux_weighted: AGRICULTURAL_WASTE, BIOFUELS, BIOMASS, ...\n", + " signature_source: LIVESTOCK, BIOFUELS, BIOMASS: LAN ET. A...\n", + " created_by: Emeline Tapin on 29 September 2025\n", + " creator_email: emeline.tapin@lsce.ipsl.fr\n", + " institution: LSCE (Laboratoire des Sciences du Clima...\n", + " conventions: CF-1.9" + ] + }, + "execution_count": 242, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0520979f", + "metadata": {}, + "outputs": [], + "source": [ + "# save the full self-contained collection\n", + "collection.normalize_and_save(\n", + " root_href=f'../../prr_preview/{collectionid}',\n", + " catalog_type=pystac.CatalogType.SELF_CONTAINED\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6114f728-d75a-4447-87a9-673bf7b219ef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + " <Collection id=d13c-ch4-signatures-smart-ch4>\n", + "
\n", + "\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e530c429", + "metadata": {}, + "outputs": [], + "source": [ + "# verify data is correctly aligned" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb635e98", + "metadata": {}, + "outputs": [], + "source": [ + "assert np.isclose(\n", + " #original data\n", + " xr.open_dataset(ff_files[3]).sign_d13c.sel(time='2001-05-01').values,\n", + " # data from the zarr store\n", + " ds.sign_d13c_ffg.sel(time='2001-05-01').values\n", + ").all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b716252", + "metadata": {}, + "outputs": [], + "source": [ + "ds.to_zarr('../../data/d13c-ch4-signatures-smart-ch4.zarr', mode='w')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pangeo", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/myst.yml b/myst.yml index dbcd6e9..4aae667 100644 --- a/myst.yml +++ b/myst.yml @@ -19,6 +19,7 @@ project: file: PRR/index.md children: - file: PRR/PRR_STAC_introduction.ipynb + - file: PRR/smartch4_signatures_modified.ipynb - file: PRR/prr_zarr.ipynb - file: PRR/Creating STAC Catalog_from_PRR_example.ipynb - file: PRR/TCCAS_v2.ipynb