{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "# Utils\n", "Utility functions for PyrNet" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "tags": [ "remove-cell" ] }, "outputs": [], "source": [ "#|hide\n", "#|default_exp utils" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#|export\n", "from numpy.typing import ArrayLike, NDArray\n", "import numpy as np\n", "from scipy.signal.windows import gaussian\n", "import jstyleson as json\n", "from addict import Dict as adict\n", "from operator import itemgetter\n", "from toolz import keyfilter\n", "\n", "# python -m pip install git+https://github.com/hdeneke/trosat-base.git#egg=trosat-base\n", "import trosat.sunpos as sp" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# extra imports for demonstration\n", "import sys\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import datetime as dt\n", "import pkg_resources as pkg_res\n", "\n", "import pyrnet" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Representation of Time\n", "Unifying various representations of time to numpy.datetime64 comes in handy when handling user inputs." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "ename": "NameError", "evalue": "name 'np' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#|export\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m EPOCH_JD_2000_0 \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime64(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2000-01-01T12:00\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_datetime64\u001b[39m(time, epoch\u001b[38;5;241m=\u001b[39mEPOCH_JD_2000_0):\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;03m Convert various representations of time to datetime64.\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;124;03m datetime64 or ndarray of datetime64\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n", "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined" ] } ], "source": [ "#|export\n", "EPOCH_JD_2000_0 = np.datetime64(\"2000-01-01T12:00\")\n", "def to_datetime64(time, epoch=EPOCH_JD_2000_0):\n", " \"\"\"\n", " Convert various representations of time to datetime64.\n", "\n", " Parameters\n", " ----------\n", " time : list, ndarray, or scalar of type float, datetime or datetime64\n", " A representation of time. If float, interpreted as Julian date.\n", " epoch : np.datetime64, default JD2000.0\n", " The epoch to use for the calculation\n", "\n", " Returns\n", " -------\n", " datetime64 or ndarray of datetime64\n", " \"\"\"\n", " jd = sp.to_julday(time, epoch=epoch)\n", " jdms = np.int64(86_400_000*jd)\n", " return epoch + jdms.astype('timedelta64[ms]')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# testing to_datetime64\n", "date_jd = 5203.5 # 2014-04-01T00:00\n", "date_dt = dt.datetime(2014,4,1,12,10)\n", "date_pd = pd.date_range(\"2014-04-01\",\"2014-04-03\",freq='1d')\n", "date_list = [dt.date(2014,4,1), dt.date(2014,4,2)]\n", "assert np.datetime64(\"2014-04-01T00:00\")==to_datetime64(date_jd)\n", "assert np.datetime64(\"2014-04-01T12:10\")==to_datetime64(date_dt)\n", "assert np.array_equal(\n", " np.array([np.datetime64(\"2014-04-01\"),\n", " np.datetime64(\"2014-04-02\"),\n", " np.datetime64(\"2014-04-03\")]\n", " ).astype(\"datetime64[ms]\"),\n", " to_datetime64(date_pd))\n", "assert np.array_equal(\n", " np.array([np.datetime64(\"2014-04-01\"),\n", " np.datetime64(\"2014-04-02\")]\n", " ).astype(\"datetime64[ms]\"),\n", " to_datetime64(date_list))" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Json netcdf config tools\n", "The netCDF attributes and encoding variables are stored in [CF-Compliance](https://cfconventions.org/) json files. The following utility functions are used to parse the json and return attribute and encoding dictionary's to be used with xarray." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def read_json(fpath: str, *, object_hook: type = adict, cls = None) -> dict:\n", " \"\"\" Parse json file to python dict.\n", " \"\"\"\n", " with open(fpath,\"r\") as f:\n", " js = json.load(f, object_hook=object_hook, cls=cls)\n", " return js\n", "\n", "def pick(whitelist: list[str], d: dict) -> dict:\n", " \"\"\" Keep only whitelisted keys from input dict.\n", " \"\"\"\n", " return keyfilter(lambda k: k in whitelist, d)\n", "\n", "def omit(blacklist: list[str], d: dict) -> dict:\n", " \"\"\" Omit blacklisted keys from input dict.\n", " \"\"\"\n", " return keyfilter(lambda k: k not in blacklist, d)\n", "\n", "def get_var_attrs(d: dict) -> dict:\n", " \"\"\"\n", " Parse cf-compliance dictionary.\n", "\n", " Parameters\n", " ----------\n", " d: dict\n", " Dict parsed from cf-meta json.\n", "\n", " Returns\n", " -------\n", " dict\n", " Dict with netcdf attributes for each variable.\n", " \"\"\"\n", " get_vars = itemgetter(\"variables\")\n", " get_attrs = itemgetter(\"attributes\")\n", " vattrs = {k: get_attrs(v) for k,v in get_vars(d).items()}\n", " for k,v in get_vars(d).items():\n", " vattrs[k].update({\n", " \"dtype\": v[\"type\"],\n", " \"gzip\":True,\n", " \"complevel\":6\n", " })\n", " return vattrs\n", "\n", "def get_attrs_enc(d : dict) -> (dict,dict):\n", " \"\"\" Split variable attributes in attributes and encoding-attributes.\n", " \"\"\"\n", " _enc_attrs = {\n", " \"scale_factor\",\n", " \"add_offset\",\n", " \"_FillValue\",\n", " \"dtype\",\n", " \"zlib\",\n", " \"gzip\",\n", " \"complevel\",\n", " \"calendar\",\n", " }\n", " # extract variable attributes\n", " vattrs = {k: omit(_enc_attrs, v) for k, v in d.items()}\n", " # extract variable encoding\n", " vencode = {k: pick(_enc_attrs, v) for k, v in d.items()}\n", " return vattrs, vencode" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "### Usage:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "tags": [ "hide-output" ] }, "outputs": [], "source": [ "#|dropout\n", "fn = pkg_res.resource_filename(\"pyrnet\", \"share/pyrnet_cfmeta.json\")\n", "\n", "config = dict(\n", " contributor_name = \"Jon Doe; Roger Rogers\",\n", " contributor_role = \"Set-up; Tear-down\",\n", " project = \"Notebook Example\",\n", " creator_name = \"Adam Alpha\",\n", " dt = np.datetime64(\"now\").item(),\n", " sdate = dt.datetime(2014,4,1,12,0),\n", " edate = dt.datetime(2014,4,2,18,0),\n", " notes = \"This is just an example.\",\n", ")\n", "\n", "# parse the json file\n", "cfdict = read_json(fn)\n", "\n", "# get global attributes:\n", "gattrs = cfdict['attributes']\n", "\n", "# apply config\n", "gattrs = {k:v.format_map(config) for k,v in gattrs.items()}\n", "\n", "gattrs" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "tags": [ "hide-output" ] }, "outputs": [], "source": [ "#|dropout\n", "# get variable attributes\n", "d = get_var_attrs(cfdict)\n", "\n", "# split encoding attributes\n", "vattrs, vencode = get_attrs_enc(d)\n", "\n", "vattrs, vencode" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Euclidian distances of stations" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def pairwise_distance_matrix( x: ArrayLike, y: ArrayLike ) -> NDArray:\n", " \"\"\"\n", " Get square matrix with Euclidian distances of stations\n", "\n", " Parameters\n", " ----------\n", " x: array_like\n", " X coordinates\n", " y: array_like\n", " Y coordinates\n", "\n", " Returns\n", " -------\n", " ndarray\n", " A square matrix with Euclidian distances of stations\n", " \"\"\"\n", " x = np.array(x)\n", " y = np.array(y)\n", " return np.sqrt( (x[None,:]-x[:,None])**2+(y[None,:]-y[:,None])**2 )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "x = [0, 1, 2]\n", "y = [0, 0, 0]\n", "dist = pairwise_distance_matrix(x,y)\n", "print(dist)\n", "assert dist[0,0]==dist[1,1]==dist[2,2]==0\n", "assert dist[0,1]==dist[1,0]==1\n", "assert dist[0,2]==dist[2,0]==2" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Fourier utility\n", "\n", "### Create a gaussian window\n", "Convert a scale parameter *J* to FWHM of a normal distribution\n", "\n", "$\\mathrm{FWHM} = 2 \\sqrt{2 \\ln 2} \\sigma = f \\sigma$\n", "\n", "define FWHM with scale parameter *J*, so that :\n", "$\\mathrm{FWHM} = 60*2^J$\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for J in range(-2,4):\n", " print(f\"J = {J:2d} -> FWHM(J) = {60.*2**J}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "J=2\n", "f = 2.0*np.sqrt(2*np.log(2))\n", "sig = (60.0/f)*2**J\n", "\n", "# FWHM from scale parameter\n", "FWHM = 60*2**J\n", "\n", "# normalized gaussian distribution\n", "g = gaussian(3*FWHM, sig, sym=False)\n", "# distribution density (sums up to 1)\n", "gd = g/(np.sqrt(2*np.pi)*sig)\n", "print(f\"sum of distribution density: {np.sum(gd):.3f}\")\n", "\n", "x0 = np.argmax(gd)\n", "x1, x2 = x0-.5*FWHM, x0+.5*FWHM\n", "plt.figure()\n", "_ = plt.plot(g/np.sqrt(2*np.pi)/sig, label='gaussian distribution density')\n", "_ = plt.hlines(.5*np.max(gd),x1,x2,color='k', label=f'FWHM={FWHM}')\n", "_ = plt.grid()\n", "_ = plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# shift array, so that it starts with the maxima\n", "gd_rolled = np.roll(gd, np.floor_divide(3*FWHM, 2))\n", "\n", "# calculate the frequency response of the Gaussian window\n", "gd_fft = np.fft.fft(gd_rolled)\n", "fft_freq = np.fft.fftfreq(gd_rolled.shape[0])\n", "\n", "fig, axs = plt.subplots(2,1)\n", "_ = axs[0].set_title(\"Gaussian distribution density\")\n", "_ = axs[0].plot(gd_rolled)\n", "_ = axs[0].grid(True)\n", "_ = axs[0].set_xlabel('Sample')\n", "_ = axs[0].set_ylabel('Amplitude')\n", "\n", "_ = axs[1].set_title(\"Frequency response of the Gaussian window\")\n", "_ = axs[1].plot(fft_freq,gd_fft,'b.')\n", "_ = axs[1].set_xlabel('normalized frequency (cycles per sample)')\n", "_ = axs[1].set_ylabel('normalized magnitude (dB)')\n", "_ = axs[1].set_xlim([-.05,.05])\n", "fig.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def gauss_fwin_fwhm(fwhm: float, N: int = 86400) -> NDArray:\n", " \"\"\"\n", " Convert scale parameter to FWHM of Normal distribution see\n", " [https://en.wikipedia.org/wiki/Full_width_at_half_maximum#Normal_distribution]\n", "\n", " Parameters\n", " ----------\n", " fwhm: float\n", " FWHM of the gaussian distribution\n", " N: int\n", " Number of points in the output window. If zero, an empty array is returned. An exception is thrown when it is negative.\n", " The default is 86400 (seconds per day).\n", "\n", " Returns\n", " -------\n", " ndarray\n", " Frequency response of the gaussian window\n", " \"\"\"\n", " f = 2.0*np.sqrt(2*np.log(2))\n", " sig = fwhm/f\n", " g = gaussian(N, sig, sym=False)/np.sqrt(2*np.pi)/sig\n", " return np.fft.fft(np.roll(g, np.floor_divide(N, 2)))\n", "\n", "def gauss_fwin(J: float, N: int=86400) -> NDArray:\n", " \"\"\"\n", " Convert scale parameter to FWHM of Normal distribution see\n", " [https://en.wikipedia.org/wiki/Full_width_at_half_maximum#Normal_distribution]\n", "\n", " Parameters\n", " ----------\n", " J: float\n", " scale parameter for FWHM, with FWHM=60*2**J\n", " N: int\n", " Number of points in the output window. If zero, an empty array is returned. An exception is thrown when it is negative.\n", " The default is 86400 (seconds per day).\n", "\n", " Returns\n", " -------\n", " ndarray\n", " Frequency response of the gaussian window\n", " \"\"\"\n", " fwhm = 60.*2**J\n", " return gauss_fwin_fwhm(fwhm, N=N)\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "### Smoothing Data by convolution" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def smooth_fwhm(y: ArrayLike, fwhm: float, axis: int = 0) -> NDArray:\n", " \"\"\"\n", " Smooth data with gaussian window by convolution\n", "\n", " Parameters\n", " ----------\n", " y: array_like\n", " Input array.\n", " fwhm: float\n", " FWHM of the gaussian window to be convolved with the input array.\n", " axis: int, optional\n", " Axis over which to smoothing is applied.\n", "\n", " Returns\n", " -------\n", " ndarray\n", " Smoothed array of the same shape as the input array `y`.\n", " \"\"\"\n", " Y = np.fft.fft(y, axis=axis)\n", " W = gauss_fwin_fwhm(fwhm, y.shape[axis])\n", " if y.ndim > 1:\n", " W = np.expand_dims(W, axis=axis).T\n", " return np.fft.ifft(Y * W, axis=axis).real\n", "\n", "def smooth(y: ArrayLike, J: float, axis: int = 0) -> NDArray:\n", " \"\"\"\n", " Smooth data with gaussian window by convolution\n", "\n", " Parameters\n", " ----------\n", " y: array_like\n", " Input array.\n", " J: float\n", " Scale parameter for the FWHM (FWHM=60*2**J) of the\n", " gaussian window to be convolved with the input array.\n", " axis: int, optional\n", " Axis over which to smoothing is applied.\n", "\n", " Returns\n", " -------\n", " ndarray\n", " Smoothed array of the same shape as the input array `y`.\n", " \"\"\"\n", " fwhm = 60.*2**J\n", " return smooth_fwhm(y, fwhm, axis=axis)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "axis=0\n", "sig = np.repeat([0., 1., 0.], 100)\n", "sig2 = np.vstack((sig*2,sig*4))\n", "\n", "fwhm = 10\n", "filtered = smooth_fwhm(sig, fwhm, axis=0)\n", "filtered2 = smooth_fwhm(sig2, fwhm, axis=1)\n", "\n", "\n", "fig, ax = plt.subplots(1,1)\n", "_ = ax.plot(sig, label='signal')\n", "_ = ax.plot(sig2.T, label='signal')\n", "_ = ax.plot(filtered, label='smoothed signal')\n", "_ = ax.plot(filtered2.T, label='smoothed signal')\n", "_ = ax.legend()\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from pyrnet import pyrnet\n", "date = dt.datetime(2013,7,15)\n", "pyr = pyrnet.read_pyrnet(date, 'hope_juelich')\n", "\n", "z0 = pyr.ghi.data[:,0]\n", "z1 = smooth(pyr.ghi.data, 0, axis=0)\n", "z2 = smooth(pyr.ghi.data.T, 5, axis=-1)\n", "\n", "fig, ax = plt.subplots(1,1)\n", "_ = ax.plot(z0,'k')\n", "_ = ax.plot(z1[:,0],'g')\n", "_ = ax.plot(z2[0,:],'r')\n", "_ = ax.grid(True)\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "tags": [ "remove-cell", "hide-input", "hide-output" ] }, "outputs": [], "source": [ "#|hide\n", "# Export module\n", "# Requires *nbdev* to export and update the *../lib/utils.py* module\n", "import nbdev.export\n", "import nbformat as nbf\n", "name = \"utils\"\n", "\n", "# Export python module\n", "nbdev.export.nb_export( f\"{name}.ipynb\" ,f\"../../src/pyrnet\")\n", "\n", "# Export to docs\n", "ntbk = nbf.read(f\"{name}.ipynb\", nbf.NO_CONVERT)\n", "\n", "text_search_dict = {\n", " \"#|hide\": \"remove-cell\", # Remove the whole cell\n", " \"#|dropcode\": \"hide-input\", # Hide the input w/ a button to show\n", " \"#|dropout\": \"hide-output\" # Hide the output w/ a button to show\n", "}\n", "for cell in ntbk.cells:\n", " cell_tags = cell.get('metadata', {}).get('tags', [])\n", " for key, val in text_search_dict.items():\n", " if key in cell['source']:\n", " if val not in cell_tags:\n", " cell_tags.append(val)\n", " if len(cell_tags) > 0:\n", " cell['metadata']['tags'] = cell_tags\n", " nbf.write(ntbk, f\"../../docs/source/nbs/{name}.ipynb\")" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }