{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "36946ed5accfe23f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:31.104506100Z",
     "start_time": "2024-02-13T15:01:30.716982800Z"
    },
    "collapsed": false,
    "tags": [
     "remove-cell"
    ]
   },
   "outputs": [],
   "source": [
    "#|hide\n",
    "#|default_exp pyrnet"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d78bebdfa9212994",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# PyrNet high level data\n",
    "In the following high-level functions to read and examine PyrNet data are collected."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b973162c8d5f940e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:32.991656300Z",
     "start_time": "2024-02-13T15:01:30.729509300Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "from collections.abc import Iterable\n",
    "from xml.dom import minidom\n",
    "from urllib.request import urlopen\n",
    "import parse\n",
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import xarray as xr\n",
    "from scipy.interpolate import interp1d\n",
    "from toolz import valfilter, cons, merge, merge_with\n",
    "#import pkg_resources as pkg_res\n",
    "import importlib.resources\n",
    "import warnings\n",
    "\n",
    "# python -m pip install git+https://github.com/hdeneke/trosat-base.git#egg=trosat-base\n",
    "from trosat import sunpos as sp\n",
    "\n",
    "from pyrnet import utils as pyrutils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f98572272d198f36",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.505973200Z",
     "start_time": "2024-02-13T15:01:32.995645800Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# extra imports for demonstration\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.dates as mdates\n",
    "import datetime as dt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3674d953e66a80c5",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Load Data from Thredds-Server\n",
    "Acquire processed data from server"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e5b666afb1398ea7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.527999300Z",
     "start_time": "2024-02-13T15:01:33.511956800Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "# campaign file name map for hdcp2 data\n",
    "campaign_pfx = {\n",
    "    'eifel': 'hope',\n",
    "    'hope_juelich': 'hope',\n",
    "    'hope_melpitz': 'hopm',\n",
    "    'lindenberg': 'ioprao',\n",
    "    'melcol': 'mcol',\n",
    "}\n",
    "\n",
    "# TROPOS thredds urls templates\n",
    "DATA_URL = \"https://tds.tropos.de/thredds/dodsC/scccJher/{dt:%Y}_{campaign}/\"\n",
    "FNAME_FMT_HDCP2 = '{campaign_pfx}_trop_pyrnet00_l1_rsds_v00_{dt:%Y%m%d}000000.nc'\n",
    "\n",
    "# configuration constants\n",
    "SOLCONST = 1359.0 # Solar constant in Wm-2\n",
    "MAX_MISSING = 1000 # Maximum allowed number of missing records\n",
    "MIN_GOOD = 85400 # Minimum allowed number of good records\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d90ccb72313dc7cd",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### Thredds lookups"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca980a19c8c6e3cd",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.570907Z",
     "start_time": "2024-02-13T15:01:33.542893Z"
    },
    "collapsed": false,
    "tags": [
     "hide-input"
    ]
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "#|dropcode\n",
    "def get_elements(url, tag_name='dataset', attribute_name='urlPath'):\n",
    "  \"\"\"Get elements from an XML file\"\"\"\n",
    "  # usock = urllib2.urlopen(url)\n",
    "  usock = urlopen(url)\n",
    "  xmldoc = minidom.parse(usock)\n",
    "  usock.close()\n",
    "  tags = xmldoc.getElementsByTagName(tag_name)\n",
    "  attributes=[]\n",
    "  for tag in tags:\n",
    "    attribute = tag.getAttribute(attribute_name)\n",
    "    attributes.append(attribute)\n",
    "  return attributes\n",
    "\n",
    "def parse_thredds_catalog(url, fname_format):\n",
    "    \"\"\"Parse Thredds server catalog and return pd.Dataframe of file name format variables.\"\"\"\n",
    "    fname_format = fname_format.replace(\"%Y-%m-%d\",\"ti\")\n",
    "    tfiles = get_elements(url)\n",
    "    results = False\n",
    "    for fn in tfiles:\n",
    "        fn = os.path.basename(fn)\n",
    "        res = parse.parse(fname_format, fn)\n",
    "        if res is None:\n",
    "            continue\n",
    "        if not results:\n",
    "            results = {k:[v] for k,v in res.named.items()}\n",
    "        else:\n",
    "            results = merge_with(lambda x: list(cons(x[1],x[0])), results, res.named)\n",
    "    return pd.DataFrame.from_dict(results)\n",
    "\n",
    "def lookup_fnames(date, *, station, lvl, campaign, collection):\n",
    "    \"\"\"Parse Thredds server files and return list of filenames matching the date, station, campaign and collection configuration.\"\"\"\n",
    "    date = pyrutils.to_datetime64(date)\n",
    "\n",
    "    fn = os.path.join(importlib.resources.files(\"pyrnet\"), \"share/pyrnet_config.json\")\n",
    "    pyrcfg = pyrutils.read_json(fn)\n",
    "\n",
    "    # construct catalog url\n",
    "    catalog_url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)\n",
    "    catalog_url = catalog_url.replace(\"dodsC\",\"catalog\")\n",
    "    catalog_url += f\"{lvl}/catalog.xml\"\n",
    "    catalog = parse_thredds_catalog(catalog_url, pyrcfg[f\"output_{lvl}\"])\n",
    "\n",
    "    if station is None:\n",
    "        try:\n",
    "            nlvl = f\"{lvl}_network\"\n",
    "            c = parse_thredds_catalog(catalog_url.replace(f\"/{lvl}/\",f\"/{nlvl}/\"),\n",
    "                                      pyrcfg[f\"output_{nlvl}\"])\n",
    "            c = c.query(f\"dt=='{pd.to_datetime(date):%Y-%m-%d}'\")\n",
    "            if c.size==0:\n",
    "                raise ValueError\n",
    "            if collection is None:\n",
    "                col = np.nanmax(c['collection'])\n",
    "            else:\n",
    "                col = collection\n",
    "            fnames = [\n",
    "                pyrcfg[f\"output_{nlvl}\"].format(\n",
    "                    dt=pd.to_datetime(date),\n",
    "                    campaign=campaign,\n",
    "                    collection=col,\n",
    "                    sfx=\"nc\"\n",
    "                )\n",
    "            ]\n",
    "            url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)\n",
    "            fnames = [url + f\"{nlvl}/\"+ fn for fn in fnames]\n",
    "            return fnames\n",
    "        except:\n",
    "            station = np.unique(catalog[\"station\"].values)\n",
    "\n",
    "    if not isinstance(station, Iterable):\n",
    "        station=[station]\n",
    "\n",
    "    # file name blueprint\n",
    "\n",
    "    fnames = []\n",
    "    for st in station:\n",
    "        c = catalog.query(f'station=={st}').reset_index()\n",
    "        if c.size==0:\n",
    "            warnings.warn(f\"File of station {st} does not exist.\")\n",
    "            continue\n",
    "\n",
    "        if collection is None:\n",
    "            col = np.nanmax(c['collection'])\n",
    "        else:\n",
    "            col = collection\n",
    "\n",
    "        c = c.query(f\"collection=={col}\").reset_index()\n",
    "        if c.size==0:\n",
    "            warnings.warn(f\"File of station {st}, collection {col} does not exist.\")\n",
    "            continue\n",
    "\n",
    "        if lvl=='l1a':\n",
    "            c = catalog.query(f'station=={st} & collection=={col}').reset_index()\n",
    "            startdts = c[\"startdt\"]\n",
    "            enddts = c[\"enddt\"]\n",
    "            # get file index with maintenance interval including date\n",
    "            idate_start = np.sum(date>=startdts)-1\n",
    "            idate_end = np.sum(date>enddts)\n",
    "            if (idate_start==-1) or (idate_end==enddts.size):\n",
    "                warnings.warn(f\"File of station {st}, level {lvl} at date {date} does not exist.\")\n",
    "                continue\n",
    "\n",
    "            fnames.append(\n",
    "                pyrcfg[f\"output_{lvl}\"].format(\n",
    "                    startdt=pd.to_datetime(startdts[idate_end]),\n",
    "                    enddt=pd.to_datetime(enddts[idate_end]),\n",
    "                    campaign=campaign,\n",
    "                    station=st,\n",
    "                    collection=col,\n",
    "                    sfx=\"nc\"\n",
    "                )\n",
    "            )\n",
    "            if idate_end!=idate_start: # date is on a maintenance day -> combine two datasets\n",
    "                fnames.append(\n",
    "                    pyrcfg[f\"output_{lvl}\"].format(\n",
    "                        startdt=pd.to_datetime(startdts[idate_start]),\n",
    "                        enddt=pd.to_datetime(enddts[idate_start]),\n",
    "                        campaign=campaign,\n",
    "                        station=st,\n",
    "                        collection=col,\n",
    "                        sfx=\"nc\"\n",
    "                    )\n",
    "                )\n",
    "\n",
    "        else:\n",
    "            c = c.query(f\"dt=='{pd.to_datetime(date):%Y-%m-%d}'\")\n",
    "            if c.size==0:\n",
    "                warnings.warn(f\"File of station {st}, collection {col} at date {date} does not exist.\")\n",
    "                continue\n",
    "            fnames.append(\n",
    "                pyrcfg[f\"output_{lvl}\"].format(\n",
    "                    dt=pd.to_datetime(date),\n",
    "                    campaign=campaign,\n",
    "                    station=st,\n",
    "                    collection=col,\n",
    "                    sfx=\"nc\"\n",
    "                )\n",
    "            )\n",
    "    url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)\n",
    "    fnames = [url + f\"{lvl}/\"+ fn for fn in fnames]\n",
    "    return fnames"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a7cea8b7fd07680f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.570907Z",
     "start_time": "2024-02-13T15:01:33.549393400Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|dropout\n",
    "# date = dt.date(2019,8,7)\n",
    "# campaign='metpvnet'\n",
    "# lvl='l1b'\n",
    "# \n",
    "# # construct catalog url\n",
    "# url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)\n",
    "# url = url.replace(\"dodsC\",\"catalog\")\n",
    "# url += f\"{lvl}/catalog.xml\"\n",
    "# \n",
    "# tfiles = get_elements(url)\n",
    "# tfiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e335bf65d4d5b31c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.570907Z",
     "start_time": "2024-02-13T15:01:33.559782100Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|dropout\n",
    "fn = os.path.join(importlib.resources.files(\"pyrnet\"), \"share/pyrnet_config.json\")\n",
    "pyrcfg = pyrutils.read_json(fn)\n",
    "# catalog = parse_thredds_catalog(url,pyrcfg[\"output_l1b\"])\n",
    "# catalog.query('station==10')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "42cc9c78afd10065",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.624009200Z",
     "start_time": "2024-02-13T15:01:33.568401600Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# catalog.query('dt==\"2019-07-05\"')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "4adb9198633dbd9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.625006300Z",
     "start_time": "2024-02-13T15:01:33.581291700Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|dropout\n",
    "lvl='l1a'\n",
    "station=[86,87,88]\n",
    "date = dt.datetime(2019,7,31)\n",
    "campaign = 'metpvnet'\n",
    "collection= None\n",
    "\n",
    "# lookup_fnames(date,station=station,lvl=lvl,campaign=campaign,collection=collection)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2fd7aca29ec85f6f",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "#### Example Reading multiple days and stations from Thredds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a904d5dff5e0e8c8",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.626204900Z",
     "start_time": "2024-02-13T15:01:33.592177Z"
    },
    "collapsed": false,
    "tags": [
     "hide-input",
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|dropout\n",
    "#|dropcode\n",
    "# lvl='l1b'\n",
    "# stations = None\n",
    "# stations = [8,9]\n",
    "# dates = [dt.datetime(2013,4,10),dt.datetime(2013,4,9)]\n",
    "# campaign = 'hope_juelich'\n",
    "# collection= None\n",
    "# \n",
    "# if not isinstance(dates,Iterable):\n",
    "#     dates = [dates]\n",
    "# fnames = []\n",
    "# for date in dates:\n",
    "#     fnames.extend(\n",
    "#         lookup_fnames(\n",
    "#             date=date,\n",
    "#             station=stations,\n",
    "#             lvl=lvl,\n",
    "#             campaign=campaign,\n",
    "#             collection=collection\n",
    "#         )\n",
    "#     )\n",
    "# urls = np.unique(fnames)\n",
    "# urls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9038e6c5e7674b6c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.660854800Z",
     "start_time": "2024-02-13T15:01:33.600654200Z"
    },
    "collapsed": false,
    "tags": [
     "hide-input",
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|dropout\n",
    "#|dropcode\n",
    "# stations = np.arange(1,101)\n",
    "# for i,url in enumerate(urls):\n",
    "#     # read from thredds server\n",
    "#     dst = xr.open_dataset(url)\n",
    "# \n",
    "#     # drop not needed variables\n",
    "#     # keep_vars = ['ghi','gti','szen']\n",
    "#     # drop_vars = [v for v in dst if v not in keep_vars]\n",
    "#     # dst = dst.drop_vars(drop_vars)\n",
    "# \n",
    "#     # unify time and station dimension to speed up merging\n",
    "#     date = dst.time.values[0].astype(\"datetime64[D]\")\n",
    "#     timeidx = pd.date_range(date, date + np.timedelta64(1, 'D'), freq='1s', inclusive='left')\n",
    "#     dst = dst.interp(time=timeidx)\n",
    "#     dst = dst.reindex({\"station\": stations})\n",
    "# \n",
    "#     # add gti for single stations\n",
    "#     if \"gti\" not in dst:\n",
    "#         dst = dst.assign({\n",
    "#             \"gti\": ((\"time\",\"station\"), np.full(dst.ghi.values.shape,np.nan))\n",
    "#         })\n",
    "# \n",
    "#     # merge\n",
    "#     if i == 0:\n",
    "#         ds = dst.copy()\n",
    "#     else:\n",
    "#         ds = xr.concat((ds,dst),dim='time', data_vars='minimal', coords='minimal', compat='override')\n",
    "# ds = ds.dropna(dim=\"station\",how='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "3a2b7b10b40af33c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.660854800Z",
     "start_time": "2024-02-13T15:01:33.610043400Z"
    },
    "collapsed": false,
    "tags": [
     "hide-input",
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|dropout\n",
    "#|dropcode\n",
    "# ds"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25552cd144e97e61",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### Data processed by the pyrnet package\n",
    "Data (re-)processed by the pyrnet package is available at the TROPOS thredds server. The following function grant easy access.\n",
    "\n",
    "1. Filename lookup on thredds server:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "9996a84d72b70649",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.695379100Z",
     "start_time": "2024-02-13T15:01:33.618027Z"
    },
    "collapsed": false,
    "tags": [
     "hide-input"
    ]
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "#|dropcode\n",
    "\n",
    "def read_thredds(dates, *, campaign, stations=None, lvl='l1b', collection=None, freq=\"1s\", drop_vars=None):\n",
    "    \"\"\"\n",
    "    Read PyrNet data (processed with pyrnet package) from the TROPOS thredds server. Returns one xarray Dataset merged to match the dates and stations input.\n",
    "    Parameters\n",
    "    ----------\n",
    "    dates: list, ndarray, or scalar of type float, datetime or datetime64\n",
    "        A representation of time. If float, interpreted as Julian date.\n",
    "    campaign: str\n",
    "        Campaign identifier.\n",
    "    stations: list, ndarray, or scalar of type int or None\n",
    "        PyrNet station numbers. If None, read all stations available.\n",
    "    lvl: str\n",
    "        Data processing level -> 'l1a', 'l1b'. The default is 'l1b'.\n",
    "    collection: int or None\n",
    "        Collection number. If None, the latest available collection is looked up. The default is None.\n",
    "    freq: str\n",
    "        Pandas date frequencey description string. The default is '1s'.\n",
    "    drop_vars: list of string or None\n",
    "        List of variables to drop from datasets to speed up merging process.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    xarray.Dataset\n",
    "        Merged Dataset including all dates and stations specified by the input.\n",
    "    \"\"\"\n",
    "\n",
    "    if not isinstance(dates, Iterable):\n",
    "        dates = [dates]\n",
    "\n",
    "    if lvl=='l1a':\n",
    "        timevar = 'gpstime'\n",
    "    elif lvl=='l1b':\n",
    "        timevar = 'time'\n",
    "    else:\n",
    "        raise ValueError(f\"lvl {lvl} not implemented.\")\n",
    "\n",
    "    fnames = []\n",
    "    for date in dates:\n",
    "        fnames.extend(\n",
    "            lookup_fnames(\n",
    "                date=date,\n",
    "                station=stations,\n",
    "                lvl=lvl,\n",
    "                campaign=campaign,\n",
    "                collection=collection\n",
    "            )\n",
    "        )\n",
    "    urls = np.unique(fnames)\n",
    "\n",
    "    if len(urls)==0:\n",
    "        return None\n",
    "\n",
    "    stations = np.arange(1,101)\n",
    "    for i,url in enumerate(urls):\n",
    "        # read from thredds server\n",
    "        dst = xr.open_dataset(url)\n",
    "\n",
    "        # drop not needed variables\n",
    "        if drop_vars is not None:\n",
    "            dst = dst.drop_vars(drop_vars)\n",
    "\n",
    "        # unify time and station dimension to speed up merging\n",
    "        date = dst[timevar].values[0].astype(\"datetime64[D]\")\n",
    "        timeidx = pd.date_range(date, date + np.timedelta64(1, 'D'), freq=freq, inclusive='left')\n",
    "        dst = dst.reindex({timevar: timeidx}, method='nearest',tolerance=np.timedelta64(1,'ms'))\n",
    "        dst = dst.reindex({\"station\": stations})\n",
    "\n",
    "        # add gti for single stations\n",
    "        if \"gti\" not in dst:\n",
    "            dst = dst.assign({\n",
    "                \"gti\": (dst.ghi.dims, np.full(dst.ghi.values.shape,np.nan)),\n",
    "                \"qc_flag_gti\": (dst.qc_flag_ghi.dims, np.full(dst.qc_flag_ghi.values.shape,0)),\n",
    "                \"maintenance_flag_gti\": (dst.maintenance_flag_ghi.dims, np.full(dst.maintenance_flag_ghi.values.shape,0))\n",
    "            })\n",
    "\n",
    "        # merge\n",
    "        if i == 0:\n",
    "            ds = dst.copy()\n",
    "        else:\n",
    "            overwrite_vars = [v for v in dst if timevar not in dst[v].dims]\n",
    "            # ds = ds.concat(dst,dim='time',\n",
    "            #                data_vars='minimal',\n",
    "            #                coords='minimal',\n",
    "            #                overwrite_vars=overwrite_vars)\n",
    "            ds = ds.merge(dst,\n",
    "                          compat='no_conflicts',\n",
    "                          overwrite_vars=overwrite_vars)\n",
    "            dst.close()\n",
    "    ds = ds.dropna(dim=\"station\",how='all')\n",
    "    return ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fa1c333fda95f48a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.696376Z",
     "start_time": "2024-02-13T15:01:33.634840800Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# ds = xr.open_dataset(urls[0])\n",
    "# for url in urls[1:]:\n",
    "#     dst = xr.open_dataset(url)\n",
    "#     st = dst.station.values[0]\n",
    "#     stime = dst.time\n",
    "#     if st not in ds.station.values:\n",
    "#         ds = xr.concat((ds,dst), dim=\"station\", coords=\"minimal\")\n",
    "#     elif dst.time.values[0] not in ds.time.values:\n",
    "#         ds = xr.concat((ds,dst), dim=\"time\", coords=\"minimal\")\n",
    "#     elif dst.time.values[0] not in ds.sel(station=st).dropna(\"time\").time.values:\n",
    "#         ds = xr.merge((ds,dst))\n",
    "#     else:\n",
    "#         continue\n",
    "# ds"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56441af6b4b468",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### Data processed during HDCP2\n",
    "During HDCP2 the raw Pyrnet data was processed with several IDL scripts manually. It is a legacy dataset, which can be accessed under the *old* directory on the Thredds server, as follows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "f01ed473be05541b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.696376Z",
     "start_time": "2024-02-13T15:01:33.647744100Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "def read_hdcp2( dt, fill_gaps=True, campaign='hope_juelich'):\n",
    "    \"\"\"\n",
    "    Read HDCP2-formatted datafiles from the pyranometer network\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    dt: datetime.date\n",
    "        The date of the data to read\n",
    "    fill_gaps: bool\n",
    "        A flag indicating whether gaps should be filled by interpolation\n",
    "    campaign: str\n",
    "        specify campaign ['eifel','hope_juelich','hope_melpitz','lindenberg','melcol']\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    dataset : xarray.Dataset\n",
    "        The pyranometer network observations\n",
    "    \"\"\"\n",
    "    # load dataset\n",
    "    fname = DATA_URL + \"old/nc/\"+ FNAME_FMT_HDCP2\n",
    "    fname = fname.format(dt=dt,\n",
    "                         campaign=campaign,\n",
    "                         campaign_pfx=campaign_pfx[campaign])\n",
    "    ds = xr.open_dataset(fname, mask_and_scale=False)\n",
    "\n",
    "    # select good stations\n",
    "    igood = (np.sum(ds.rsds.data<-900.0,axis=0)<MAX_MISSING)&(np.sum(ds.rsds_flag==1,axis=0)>MIN_GOOD)\n",
    "    ds = ds.isel(nstations=igood)\n",
    "\n",
    "    # fill gaps if requested\n",
    "    if fill_gaps==True:\n",
    "        x = (ds.time-ds.time[0])/np.timedelta64(1,'s')\n",
    "        for i in np.arange(ds.dims['nstations']):\n",
    "            y = ds.rsds.data[:,i]\n",
    "            m = y>-990.0\n",
    "            if not np.all(m):\n",
    "                f = interp1d(x[m],y[m],'linear',bounds_error=False,fill_value='extrapolate')\n",
    "                ds.rsds[~m,i]=f(x[~m])\n",
    "    # add additional DataArrays\n",
    "    jd = (ds.time.data-np.datetime64(sp.EPOCH_JD2000_0))/np.timedelta64(1,'D')\n",
    "    ds['esd'] = sp.earth_sun_distance(jd[0]+0.5)\n",
    "    szen = sp.sun_angles(jd[:,None],ds.lat.data[None,:],ds.lon.data[None,:])[0]\n",
    "    ds['szen']    = xr.DataArray(szen,dims=('time','nstations'),coords={'time':ds.time.data})\n",
    "    ds['mu0']     = np.cos(np.deg2rad(ds.szen))\n",
    "    ds['gtrans']  = ds.rsds/ds.esd**2/SOLCONST/ds['mu0']\n",
    "    return ds.rename({'rsds_flag':'qaflag','rsds':'ghi'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "6276d9e2377d4fb0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.696376Z",
     "start_time": "2024-02-13T15:01:33.655865500Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "# read pyrnet data and add coordinates\n",
    "def read_pyrnet(date, campaign):\n",
    "    \"\"\" Read pyrnet data and add coordinates\n",
    "    \"\"\"\n",
    "    pyr = read_hdcp2(date, campaign=campaign)\n",
    "    x,y = pyrutils.get_xy_coords(pyr.lon,pyr.lat)\n",
    "    pyr['x'] = xr.DataArray(x,dims=('nstations'))\n",
    "    pyr['y'] = xr.DataArray(y,dims=('nstations'))\n",
    "    return pyr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "ecc570a67b383d13",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.720900400Z",
     "start_time": "2024-02-13T15:01:33.662244Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|dropout\n",
    "# date = dt.datetime(2013,7,15)\n",
    "# pyr  = read_pyrnet(date, 'hope_juelich')\n",
    "# pyr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "5e81193ea3077f6b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.720900400Z",
     "start_time": "2024-02-13T15:01:33.668591500Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# plot pyranometer locations\n",
    "# tstart = int(10.5*3600)\n",
    "# tstop  = tstart+2*3600\n",
    "# tslice = slice(tstart,tstop)\n",
    "# \n",
    "# plt.figure()\n",
    "# p = plt.plot(pyr.x.data,pyr.y.data,'+')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "40b70bdcea924c18",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.721900800Z",
     "start_time": "2024-02-13T15:01:33.674890Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# tstart = int(13.0*3600)\n",
    "# tstop  = tstart+2*3600\n",
    "# tslice = slice(tstart,tstop)\n",
    "# \n",
    "# tlim = [\n",
    "#     mdates.date2num(pyr.time.data[tslice][0]),\n",
    "#     mdates.date2num(pyr.time.data[tslice][-1])\n",
    "# ]\n",
    "# \n",
    "# fig,ax = plt.subplots(1,1)\n",
    "# \n",
    "# i_sy = np.argsort(-pyr.y.data)\n",
    "# im1 = ax.imshow(pyr.gtrans[tslice,i_sy].T, extent=[tlim[0],tlim[1], 1, 1+len(i_sy)], aspect=\"auto\", cmap=\"gray_r\", vmin=0.0, vmax=0.8)\n",
    "# _ = ax.set_ylabel(\"# Station\")\n",
    "# \n",
    "# \n",
    "# date_fmt = mdates.DateFormatter('%H:%M')\n",
    "# xticks = ax.get_xticks()\n",
    "# ax.xaxis_date()\n",
    "# ax.xaxis.set_major_formatter(date_fmt)\n",
    "# _ = ax.set_xlabel(\"Time [UTC]\")\n",
    "# fig.colorbar(im1, ax=ax)\n",
    "# fig.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d0c08c515a7b066",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Read Calibration Files\n",
    "Calibration factors are collected in share/pyrnet_calibration.json. The following function looks up the nearest calibration in time and fill missing values with earlier calibrations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "9fc1ed1948293e95",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.721900800Z",
     "start_time": "2024-02-13T15:01:33.683869500Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order of calibration lookup\n",
      "2019-05-01 -> ['2017-04' '2019-04']\n",
      "2018-01-01 -> ['2017-04']\n"
     ]
    }
   ],
   "source": [
    "# pyrnet_calibration.json structure\n",
    "calib = {\n",
    "    \"2017-04\":{\n",
    "        \"001\":[7.1,7.2],\n",
    "        \"002\":[7.51,7.61],\n",
    "        \"003\":[6.9,6.91],\n",
    "        \"CC\": [-2,1,-2,2] # cosine correction coefficients\n",
    "    },\n",
    "    \"2019-04\": {\n",
    "        \"001\":[7,None],\n",
    "        \"002\":[7.5,7.6]\n",
    "    }\n",
    "}\n",
    "\n",
    "date1 = np.datetime64(\"2019-05-01\") # closer to \"2019-04\" calibration\n",
    "date2 = np.datetime64(\"2018-01-01\") # closer to \"2017-04\" calibration\n",
    "\n",
    "# parse calibration dates\n",
    "cdates = pd.to_datetime(list(calib.keys())).values\n",
    "\n",
    "# sort calib keys beginning with nearest\n",
    "# skeys1 = np.array(list(calib.keys()))[np.argsort(np.abs(date1 - cdates))][::-1]\n",
    "# skeys2 = np.array(list(calib.keys()))[np.argsort(np.abs(date2 - cdates))][::-1]\n",
    "\n",
    "# lookup recents\n",
    "isort = np.argsort(cdates)\n",
    "skeys1 = np.array(list(calib.keys()))[isort][:np.sum(date1>cdates)]\n",
    "skeys2 = np.array(list(calib.keys()))[isort][:np.sum(date2>cdates)]\n",
    "\n",
    "print(\"Order of calibration lookup\")\n",
    "print(date1, '->' ,skeys1)\n",
    "print(date2, '->' ,skeys2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b6ae83a2634aae20",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Lookup calibration, update with the most recent calibration but fill with earlier calibration if necessary."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "eff303a7e4acfe44",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.829353900Z",
     "start_time": "2024-02-13T15:01:33.696376Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'001': [7, 7.2], '002': [7.5, 7.6], '003': [6.9, 6.91], 'CC': [-2, 1, -2, 2]}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# example for date1\n",
    "for i, key in enumerate(skeys1):\n",
    "    if i==0:\n",
    "        c = calib[key].copy()\n",
    "        continue\n",
    "\n",
    "    isNone = lambda x: np.any([xi is None for xi in x])\n",
    "    isNotNone = lambda x: np.all([xi is not None for xi in x])\n",
    "    # update with newer calibrations which not include None values\n",
    "    c.update(valfilter(isNotNone, calib[key]))\n",
    "\n",
    "    # update only not None values\n",
    "    for k,v in valfilter(isNone, calib[key]).items():\n",
    "        newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]\n",
    "        c.update({k:newv})\n",
    "c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "3898818704cb65f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.830351600Z",
     "start_time": "2024-02-13T15:01:33.708871600Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'001': [7.1, 7.2],\n",
       " '002': [7.51, 7.61],\n",
       " '003': [6.9, 6.91],\n",
       " 'CC': [-2, 1, -2, 2]}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# example for date2\n",
    "for i, key in enumerate(skeys2[::-1]):\n",
    "    if i==0:\n",
    "        c = calib[key].copy()\n",
    "        continue\n",
    "\n",
    "    isNone = lambda x: np.any([xi is None for xi in x])\n",
    "    isNotNone = lambda x: np.all([xi is not None for xi in x])\n",
    "    # update with newer calibrations which not include None values\n",
    "    c.update(valfilter(isNotNone, calib[key]))\n",
    "\n",
    "    # update only not None values\n",
    "    for k,v in valfilter(isNone, calib[key]).items():\n",
    "        newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]\n",
    "        c.update({k:newv})\n",
    "c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "b9ee1b114a4a2729",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.847813100Z",
     "start_time": "2024-02-13T15:01:33.716874300Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'2017-04': {'001': [7.1, 7.2],\n",
       "  '002': [7.51, 7.61],\n",
       "  '003': [6.9, 6.91],\n",
       "  'CC': [-2, 1, -2, 2]},\n",
       " '2019-04': {'001': [7, None], '002': [7.5, 7.6]}}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "calib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "79d3e0f70fe8397d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.847813100Z",
     "start_time": "2024-02-13T15:01:33.737986300Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "def read_calibration(cfile:str, cdate):\n",
    "    \"\"\"\n",
    "    Parse calibration json file\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    cfile: str\n",
    "        Path of the calibration.json\n",
    "    cdate: list, ndarray, or scalar of type float, datetime or datetime64\n",
    "        A representation of time. If float, interpreted as Julian date.\n",
    "    Returns\n",
    "    -------\n",
    "    dict\n",
    "        Calibration dictionary sorted by box number.\n",
    "    \"\"\"\n",
    "    cdate = pyrutils.to_datetime64(cdate)\n",
    "    calib = pyrutils.read_json(cfile)\n",
    "    # parse calibration dates\n",
    "    cdates = pd.to_datetime(list(calib.keys()), yearfirst=True).values\n",
    "\n",
    "    # sort calib keys beginning with nearest\n",
    "    # skeys = np.array(list(calib.keys()))[np.argsort(np.abs(cdate - cdates))][::-1]\n",
    "    # lookup most recent key\n",
    "    isort = np.argsort(cdates)\n",
    "    skeys = np.array(list(calib.keys()))[isort][:np.sum(cdate>cdates)]\n",
    "    # lookup calibration factors\n",
    "    for i, key in enumerate(skeys):\n",
    "        if i==0:\n",
    "            c = calib[key].copy()\n",
    "            continue\n",
    "        isNone = lambda x: np.any([xi is None for xi in x])\n",
    "        isNotNone = lambda x: np.all([xi is not None for xi in x])\n",
    "        # update with newer calibrations which not include None values\n",
    "        c.update(valfilter(isNotNone, calib[key]))\n",
    "        # update only not None values\n",
    "        for k,v in valfilter(isNone, calib[key]).items():\n",
    "            newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]\n",
    "            c.update({k:newv})\n",
    "    return c"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "40c927728666f998",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Example using the package default pyrnet_calibration.json:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd11b3fe16d09539",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.848810300Z",
     "start_time": "2024-02-13T15:01:33.744568600Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'001': [7.73, 6.98],\n",
       " '002': [7.41, None],\n",
       " '003': [7.51, None],\n",
       " '004': [7.62, 8.23],\n",
       " '005': [7.48, 6.37],\n",
       " '006': [6.71, None],\n",
       " '007': [7.52, 7.2],\n",
       " '008': [7.58, None],\n",
       " '009': [7.59, 6.85],\n",
       " '010': [7.6, 6.74],\n",
       " '011': [7.32, None],\n",
       " '012': [7.6, 6.87],\n",
       " '013': [7.16, None],\n",
       " '014': [7.71, 7.3],\n",
       " '015': [7.59, 6.77],\n",
       " '016': [7.62, None],\n",
       " '017': [7.28, None],\n",
       " '018': [7.57, 6.9],\n",
       " '019': [7.57, None],\n",
       " '020': [7.32, None],\n",
       " '021': [7.4, None],\n",
       " '022': [7.43, None],\n",
       " '023': [7.46, None],\n",
       " '024': [7.73, 6.59],\n",
       " '025': [7.46, 7.14],\n",
       " '026': [7.64, 7.05],\n",
       " '027': [None, None],\n",
       " '028': [7.21, None],\n",
       " '029': [7.61, None],\n",
       " '030': [7.3, None],\n",
       " '031': [7.23, None],\n",
       " '032': [7.7, 6.94],\n",
       " '033': [7.74, 6.89],\n",
       " '034': [6.8, None],\n",
       " '035': [7.62, 7.1],\n",
       " '036': [None, None],\n",
       " '037': [7.72, 6.73],\n",
       " '038': [7.62, 6.93],\n",
       " '039': [7.27, None],\n",
       " '040': [7.15, None],\n",
       " '041': [7.38, None],\n",
       " '042': [7.15, None],\n",
       " '043': [7.41, 7.2],\n",
       " '044': [7.26, 7.05],\n",
       " '045': [7.52, None],\n",
       " '046': [7.7, 7.13],\n",
       " '047': [7.47, 7.4],\n",
       " '048': [7.1, None],\n",
       " '049': [7.74, 7.32],\n",
       " '050': [7.51, None],\n",
       " '051': [7.67, None],\n",
       " '052': [7.46, None],\n",
       " '053': [7.63, 6.48],\n",
       " '054': [7.58, 7.22],\n",
       " '055': [7.41, 6.51],\n",
       " '056': [7, None],\n",
       " '057': [6.95, None],\n",
       " '058': [7.62, 6.9],\n",
       " '059': [7.07, None],\n",
       " '060': [7.64, 6.62],\n",
       " '061': [7.53, None],\n",
       " '062': [7.59, None],\n",
       " '063': [7.63, None],\n",
       " '064': [7.38, None],\n",
       " '065': [7.47, None],\n",
       " '066': [7.48, None],\n",
       " '067': [7.46, None],\n",
       " '068': [7.01, None],\n",
       " '069': [7.33, None],\n",
       " '070': [6.88, None],\n",
       " '071': [7.6, None],\n",
       " '072': [7.58, None],\n",
       " '073': [7.41, None],\n",
       " '074': [7.61, None],\n",
       " '075': [6.91, None],\n",
       " '076': [7.63, None],\n",
       " '077': [7.51, None],\n",
       " '078': [6.93, None],\n",
       " '079': [7.62, None],\n",
       " '080': [7.3, None],\n",
       " '081': [7.64, None],\n",
       " '082': [None, None],\n",
       " '083': [7.44, None],\n",
       " '084': [7.46, None],\n",
       " '085': [7.67, None],\n",
       " '086': [7.45, 6.69],\n",
       " '087': [7.42, 7.11],\n",
       " '088': [7, None],\n",
       " '089': [7.59, None],\n",
       " '090': [7.64, None],\n",
       " '091': [7.47, None],\n",
       " '092': [7.35, None],\n",
       " '093': [7.51, None],\n",
       " '094': [6.95, None],\n",
       " '095': [7.47, None],\n",
       " '096': [7.65, None],\n",
       " '097': [6.9, None],\n",
       " '098': [None, None],\n",
       " '099': [7.51, None],\n",
       " '100': [7.32, None],\n",
       " 'CC': [1.45, -3.04, 5.59, -3.01]}"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#|dropout\n",
    "fn = os.path.join(importlib.resources.files(\"pyrnet\"), \"share/pyrnet_calibration.json\")\n",
    "read_calibration(fn,cdate=np.datetime64(\"2018-09-10\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "378f9088c2a848b4",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Read Box Serial numbers\n",
    "Similar to reading the calibration, pyranometers attached to each box are stored in .json format. Reassigning of pyranometers to certain boxes might happen. Different to the calibration we will look up the most recent entry (not in the future)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "6375f01b07a796f4",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.848810300Z",
     "start_time": "2024-02-13T15:01:33.761575600Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Order of serial lookup\n",
      "2019-03-01 -> ['2017-04']\n",
      "2019-05-01 -> ['2017-04' '2019-04']\n",
      "Serials map:\n",
      "2019-03-01 -> {'001': ['S11', 'S21'], '002': ['S21', 'S22'], '003': ['S31', None]}\n",
      "2019-05-01 -> {'001': ['S11', None], '002': ['S21', 'S33'], '003': ['S31', None]}\n"
     ]
    }
   ],
   "source": [
    "# pyrnet_calibration.json structure\n",
    "pyrnetmap = {\n",
    "    \"2019-04\": {\n",
    "        \"001\":[\"S11\",None],\n",
    "        \"002\":[\"S21\",\"S33\"]\n",
    "    },\n",
    "    \"2017-04\":{\n",
    "        \"001\":[\"S11\",\"S21\"],\n",
    "        \"002\":[\"S21\",\"S22\"],\n",
    "        \"003\":[\"S31\",None]\n",
    "    },\n",
    "}\n",
    "\n",
    "date1 = np.datetime64(\"2019-03-01\") # before \"2019-04\"\n",
    "date2 = np.datetime64(\"2019-05-01\") # after \"2019-04\"\n",
    "\n",
    "# parse key dates\n",
    "# require sort for lookup later\n",
    "cdates = pd.to_datetime(list(pyrnetmap.keys())).values\n",
    "isort = np.argsort(cdates)\n",
    "\n",
    "# lookup most recent key\n",
    "skeys1 = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date1>cdates)]\n",
    "skeys2 = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date2>cdates)]\n",
    "print(\"Order of serial lookup\")\n",
    "print(date1, '->' ,skeys1)\n",
    "print(date2, '->' ,skeys2)\n",
    "\n",
    "print(\"Serials map:\")\n",
    "\n",
    "print(date1, '->', merge([pyrnetmap[key] for key in skeys1]))\n",
    "print(date2, '->', merge([pyrnetmap[key] for key in skeys2]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fca4e8265724ffa3",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "Now we only have to look it up"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "3f1da6f2d52240f5",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.848810300Z",
     "start_time": "2024-02-13T15:01:33.772288400Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "def get_pyrnet_mapping(fn:str, date):\n",
    "    \"\"\"\n",
    "    Parse box - serial number mapping  json file\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    fn: str\n",
    "        Path of the mapping.json\n",
    "    date: list, ndarray, or scalar of type float, datetime or datetime64\n",
    "        A representation of time. If float, interpreted as Julian date.\n",
    "    Returns\n",
    "    -------\n",
    "    dict\n",
    "        Calibration dictionary sorted by box number.\n",
    "    \"\"\"\n",
    "    date = pyrutils.to_datetime64(date)\n",
    "    pyrnetmap = pyrutils.read_json(fn)\n",
    "    # parse key dates\n",
    "    # require sort for lookup later\n",
    "    cdates = pd.to_datetime(list(pyrnetmap.keys()), yearfirst=True).values\n",
    "    isort = np.argsort(cdates)\n",
    "\n",
    "    # lookup most recent key\n",
    "    skeys = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date>cdates)]\n",
    "\n",
    "    # merge and update with the most recent map\n",
    "    return  merge([pyrnetmap[key] for key in skeys])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c0c248823ee154d",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Lookup Serial, Boxnumber, calibration at certain date\n",
    "Utility to lookup  box metadata for a certain date.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "425a33a949919bfb",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.849808100Z",
     "start_time": "2024-02-13T15:01:33.780098800Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "def meta_lookup(date,*,serial=None,box=None,cfile=None, mapfile=None):\n",
    "    if cfile is None:\n",
    "        cfile = os.path.join(importlib.resources.files(\"pyrnet\"), \"share/pyrnet_calibration.json\")\n",
    "    if mapfile is None:\n",
    "        mapfile = os.path.join(importlib.resources.files(\"pyrnet\"), \"share/pyrnet_station_map.json\")\n",
    "\n",
    "    map = get_pyrnet_mapping(mapfile,date)\n",
    "    calib = read_calibration(cfile,date)\n",
    "    \n",
    "    CC = calib[\"CC\"] if \"CC\" in calib else [1]\n",
    "\n",
    "    if serial is None and box is not None:\n",
    "        box=int(box)\n",
    "        return f\"{box:03d}\", map[f\"{box:03d}\"], calib[f\"{box:03d}\"], CC\n",
    "    elif serial is not None and box is None:\n",
    "        res = valfilter(lambda x: serial in x, map)\n",
    "        box = list(res.keys())[0]\n",
    "        serial = res[box]\n",
    "        return box,serial,calib[box], CC\n",
    "    else:\n",
    "        raise ValueError(\"At least one of [station,box] have to be specified.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "d49c529b4af4793e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:33.862772Z",
     "start_time": "2024-02-13T15:01:33.787082800Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('006', ['S12078.061', ''], [6.71, None], [1.45, -3.04, 5.59, -3.01])\n",
      "('010', ['S12128.010', 'S12137.019'], [7.6, 6.74], [1.45, -3.04, 5.59, -3.01])\n"
     ]
    }
   ],
   "source": [
    "date = np.datetime64(\"2018-10-01\")\n",
    "print(meta_lookup(date,serial=\"S12078.061\"))\n",
    "print(meta_lookup(date,box=10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "d0dd3c3cfe24dc98",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-02-13T15:01:34.584653400Z",
     "start_time": "2024-02-13T15:01:33.808885900Z"
    },
    "collapsed": false,
    "tags": [
     "remove-cell",
     "hide-input",
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|hide\n",
    "import nbdev.export\n",
    "import nbformat as nbf\n",
    "name = \"pyrnet\"\n",
    "\n",
    "# Export python module\n",
    "nbdev.export.nb_export( f\"{name}.ipynb\" ,f\"../../src/pyrnet\")\n",
    "\n",
    "# Export to docs\n",
    "ntbk = nbf.read(f\"{name}.ipynb\", nbf.NO_CONVERT)\n",
    "\n",
    "text_search_dict = {\n",
    "    \"#|hide\": \"remove-cell\",  # Remove the whole cell\n",
    "    \"#|dropcode\": \"hide-input\",  # Hide the input w/ a button to show\n",
    "    \"#|dropout\": \"hide-output\"  # Hide the output w/ a button to show\n",
    "}\n",
    "for cell in ntbk.cells:\n",
    "    cell_tags = cell.get('metadata', {}).get('tags', [])\n",
    "    for key, val in text_search_dict.items():\n",
    "            if key in cell['source']:\n",
    "                if val not in cell_tags:\n",
    "                    cell_tags.append(val)\n",
    "    if len(cell_tags) > 0:\n",
    "        cell['metadata']['tags'] = cell_tags\n",
    "    nbf.write(ntbk, f\"../../docs/source/nbs/{name}.ipynb\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pyrnet",
   "language": "python",
   "name": "pyrnet"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}