2. PyrNet high level data

In the following high-level functions to read and examine PyrNet data are collected.

#|export
from collections.abc import Iterable
from xml.dom import minidom
from urllib.request import urlopen
import parse
import os
import numpy as np
import pandas as pd
import xarray as xr
from scipy.interpolate import interp1d
from toolz import valfilter, cons, merge, merge_with
#import pkg_resources as pkg_res
import importlib.resources
import warnings

# python -m pip install git+https://github.com/hdeneke/trosat-base.git#egg=trosat-base
from trosat import sunpos as sp

from pyrnet import utils as pyrutils
# extra imports for demonstration
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt

2.1. Load Data from Thredds-Server

Acquire processed data from server

#|export
# campaign file name map for hdcp2 data
campaign_pfx = {
    'eifel': 'hope',
    'hope_juelich': 'hope',
    'hope_melpitz': 'hopm',
    'lindenberg': 'ioprao',
    'melcol': 'mcol',
}

# TROPOS thredds urls templates
DATA_URL = "https://tds.tropos.de/thredds/dodsC/scccJher/{dt:%Y}_{campaign}/"
FNAME_FMT_HDCP2 = '{campaign_pfx}_trop_pyrnet00_l1_rsds_v00_{dt:%Y%m%d}000000.nc'

# configuration constants
SOLCONST = 1359.0 # Solar constant in Wm-2
MAX_MISSING = 1000 # Maximum allowed number of missing records
MIN_GOOD = 85400 # Minimum allowed number of good records

2.1.1. Thredds lookups

Hide code cell source

#|export
#|dropcode
def get_elements(url, tag_name='dataset', attribute_name='urlPath'):
  """Get elements from an XML file"""
  # usock = urllib2.urlopen(url)
  usock = urlopen(url)
  xmldoc = minidom.parse(usock)
  usock.close()
  tags = xmldoc.getElementsByTagName(tag_name)
  attributes=[]
  for tag in tags:
    attribute = tag.getAttribute(attribute_name)
    attributes.append(attribute)
  return attributes

def parse_thredds_catalog(url, fname_format):
    """Parse Thredds server catalog and return pd.Dataframe of file name format variables."""
    fname_format = fname_format.replace("%Y-%m-%d","ti")
    tfiles = get_elements(url)
    results = False
    for fn in tfiles:
        fn = os.path.basename(fn)
        res = parse.parse(fname_format, fn)
        if res is None:
            continue
        if not results:
            results = {k:[v] for k,v in res.named.items()}
        else:
            results = merge_with(lambda x: list(cons(x[1],x[0])), results, res.named)
    return pd.DataFrame.from_dict(results)

def lookup_fnames(date, *, station, lvl, campaign, collection):
    """Parse Thredds server files and return list of filenames matching the date, station, campaign and collection configuration."""
    date = pyrutils.to_datetime64(date)

    fn = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_config.json")
    pyrcfg = pyrutils.read_json(fn)

    # construct catalog url
    catalog_url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)
    catalog_url = catalog_url.replace("dodsC","catalog")
    catalog_url += f"{lvl}/catalog.xml"
    catalog = parse_thredds_catalog(catalog_url, pyrcfg[f"output_{lvl}"])

    if station is None:
        try:
            nlvl = f"{lvl}_network"
            c = parse_thredds_catalog(catalog_url.replace(f"/{lvl}/",f"/{nlvl}/"),
                                      pyrcfg[f"output_{nlvl}"])
            c = c.query(f"dt=='{pd.to_datetime(date):%Y-%m-%d}'")
            if c.size==0:
                raise ValueError
            if collection is None:
                col = np.nanmax(c['collection'])
            else:
                col = collection
            fnames = [
                pyrcfg[f"output_{nlvl}"].format(
                    dt=pd.to_datetime(date),
                    campaign=campaign,
                    collection=col,
                    sfx="nc"
                )
            ]
            url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)
            fnames = [url + f"{nlvl}/"+ fn for fn in fnames]
            return fnames
        except:
            station = np.unique(catalog["station"].values)

    if not isinstance(station, Iterable):
        station=[station]

    # file name blueprint

    fnames = []
    for st in station:
        c = catalog.query(f'station=={st}').reset_index()
        if c.size==0:
            warnings.warn(f"File of station {st} does not exist.")
            continue

        if collection is None:
            col = np.nanmax(c['collection'])
        else:
            col = collection

        c = c.query(f"collection=={col}").reset_index()
        if c.size==0:
            warnings.warn(f"File of station {st}, collection {col} does not exist.")
            continue

        if lvl=='l1a':
            c = catalog.query(f'station=={st} & collection=={col}').reset_index()
            startdts = c["startdt"]
            enddts = c["enddt"]
            # get file index with maintenance interval including date
            idate_start = np.sum(date>=startdts)-1
            idate_end = np.sum(date>enddts)
            if (idate_start==-1) or (idate_end==enddts.size):
                warnings.warn(f"File of station {st}, level {lvl} at date {date} does not exist.")
                continue

            fnames.append(
                pyrcfg[f"output_{lvl}"].format(
                    startdt=pd.to_datetime(startdts[idate_end]),
                    enddt=pd.to_datetime(enddts[idate_end]),
                    campaign=campaign,
                    station=st,
                    collection=col,
                    sfx="nc"
                )
            )
            if idate_end!=idate_start: # date is on a maintenance day -> combine two datasets
                fnames.append(
                    pyrcfg[f"output_{lvl}"].format(
                        startdt=pd.to_datetime(startdts[idate_start]),
                        enddt=pd.to_datetime(enddts[idate_start]),
                        campaign=campaign,
                        station=st,
                        collection=col,
                        sfx="nc"
                    )
                )

        else:
            c = c.query(f"dt=='{pd.to_datetime(date):%Y-%m-%d}'")
            if c.size==0:
                warnings.warn(f"File of station {st}, collection {col} at date {date} does not exist.")
                continue
            fnames.append(
                pyrcfg[f"output_{lvl}"].format(
                    dt=pd.to_datetime(date),
                    campaign=campaign,
                    station=st,
                    collection=col,
                    sfx="nc"
                )
            )
    url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)
    fnames = [url + f"{lvl}/"+ fn for fn in fnames]
    return fnames
#|dropout
# date = dt.date(2019,8,7)
# campaign='metpvnet'
# lvl='l1b'
# 
# # construct catalog url
# url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)
# url = url.replace("dodsC","catalog")
# url += f"{lvl}/catalog.xml"
# 
# tfiles = get_elements(url)
# tfiles
#|dropout
fn = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_config.json")
pyrcfg = pyrutils.read_json(fn)
# catalog = parse_thredds_catalog(url,pyrcfg["output_l1b"])
# catalog.query('station==10')
# catalog.query('dt=="2019-07-05"')
#|dropout
lvl='l1a'
station=[86,87,88]
date = dt.datetime(2019,7,31)
campaign = 'metpvnet'
collection= None

# lookup_fnames(date,station=station,lvl=lvl,campaign=campaign,collection=collection)

2.1.1.1. Example Reading multiple days and stations from Thredds

Hide code cell source

#|dropout
#|dropcode
# lvl='l1b'
# stations = None
# stations = [8,9]
# dates = [dt.datetime(2013,4,10),dt.datetime(2013,4,9)]
# campaign = 'hope_juelich'
# collection= None
# 
# if not isinstance(dates,Iterable):
#     dates = [dates]
# fnames = []
# for date in dates:
#     fnames.extend(
#         lookup_fnames(
#             date=date,
#             station=stations,
#             lvl=lvl,
#             campaign=campaign,
#             collection=collection
#         )
#     )
# urls = np.unique(fnames)
# urls

Hide code cell source

#|dropout
#|dropcode
# stations = np.arange(1,101)
# for i,url in enumerate(urls):
#     # read from thredds server
#     dst = xr.open_dataset(url)
# 
#     # drop not needed variables
#     # keep_vars = ['ghi','gti','szen']
#     # drop_vars = [v for v in dst if v not in keep_vars]
#     # dst = dst.drop_vars(drop_vars)
# 
#     # unify time and station dimension to speed up merging
#     date = dst.time.values[0].astype("datetime64[D]")
#     timeidx = pd.date_range(date, date + np.timedelta64(1, 'D'), freq='1s', inclusive='left')
#     dst = dst.interp(time=timeidx)
#     dst = dst.reindex({"station": stations})
# 
#     # add gti for single stations
#     if "gti" not in dst:
#         dst = dst.assign({
#             "gti": (("time","station"), np.full(dst.ghi.values.shape,np.nan))
#         })
# 
#     # merge
#     if i == 0:
#         ds = dst.copy()
#     else:
#         ds = xr.concat((ds,dst),dim='time', data_vars='minimal', coords='minimal', compat='override')
# ds = ds.dropna(dim="station",how='all')

Hide code cell source

#|dropout
#|dropcode
# ds

2.1.2. Data processed by the pyrnet package

Data (re-)processed by the pyrnet package is available at the TROPOS thredds server. The following function grant easy access.

  1. Filename lookup on thredds server:

Hide code cell source

#|export
#|dropcode

def read_thredds(dates, *, campaign, stations=None, lvl='l1b', collection=None, freq="1s", drop_vars=None):
    """
    Read PyrNet data (processed with pyrnet package) from the TROPOS thredds server. Returns one xarray Dataset merged to match the dates and stations input.
    Parameters
    ----------
    dates: list, ndarray, or scalar of type float, datetime or datetime64
        A representation of time. If float, interpreted as Julian date.
    campaign: str
        Campaign identifier.
    stations: list, ndarray, or scalar of type int or None
        PyrNet station numbers. If None, read all stations available.
    lvl: str
        Data processing level -> 'l1a', 'l1b'. The default is 'l1b'.
    collection: int or None
        Collection number. If None, the latest available collection is looked up. The default is None.
    freq: str
        Pandas date frequencey description string. The default is '1s'.
    drop_vars: list of string or None
        List of variables to drop from datasets to speed up merging process.

    Returns
    -------
    xarray.Dataset
        Merged Dataset including all dates and stations specified by the input.
    """

    if not isinstance(dates, Iterable):
        dates = [dates]

    if lvl=='l1a':
        timevar = 'gpstime'
    elif lvl=='l1b':
        timevar = 'time'
    else:
        raise ValueError(f"lvl {lvl} not implemented.")

    fnames = []
    for date in dates:
        fnames.extend(
            lookup_fnames(
                date=date,
                station=stations,
                lvl=lvl,
                campaign=campaign,
                collection=collection
            )
        )
    urls = np.unique(fnames)

    if len(urls)==0:
        return None

    stations = np.arange(1,101)
    for i,url in enumerate(urls):
        # read from thredds server
        dst = xr.open_dataset(url)

        # drop not needed variables
        if drop_vars is not None:
            dst = dst.drop_vars(drop_vars)

        # unify time and station dimension to speed up merging
        date = dst[timevar].values[0].astype("datetime64[D]")
        timeidx = pd.date_range(date, date + np.timedelta64(1, 'D'), freq=freq, inclusive='left')
        dst = dst.reindex({timevar: timeidx}, method='nearest',tolerance=np.timedelta64(1,'ms'))
        dst = dst.reindex({"station": stations})

        # add gti for single stations
        if "gti" not in dst:
            dst = dst.assign({
                "gti": (dst.ghi.dims, np.full(dst.ghi.values.shape,np.nan)),
                "qc_flag_gti": (dst.qc_flag_ghi.dims, np.full(dst.qc_flag_ghi.values.shape,0)),
                "maintenance_flag_gti": (dst.maintenance_flag_ghi.dims, np.full(dst.maintenance_flag_ghi.values.shape,0))
            })

        # merge
        if i == 0:
            ds = dst.copy()
        else:
            overwrite_vars = [v for v in dst if timevar not in dst[v].dims]
            # ds = ds.concat(dst,dim='time',
            #                data_vars='minimal',
            #                coords='minimal',
            #                overwrite_vars=overwrite_vars)
            ds = ds.merge(dst,
                          compat='no_conflicts',
                          overwrite_vars=overwrite_vars)
            dst.close()
    ds = ds.dropna(dim="station",how='all')
    return ds
# ds = xr.open_dataset(urls[0])
# for url in urls[1:]:
#     dst = xr.open_dataset(url)
#     st = dst.station.values[0]
#     stime = dst.time
#     if st not in ds.station.values:
#         ds = xr.concat((ds,dst), dim="station", coords="minimal")
#     elif dst.time.values[0] not in ds.time.values:
#         ds = xr.concat((ds,dst), dim="time", coords="minimal")
#     elif dst.time.values[0] not in ds.sel(station=st).dropna("time").time.values:
#         ds = xr.merge((ds,dst))
#     else:
#         continue
# ds

2.1.3. Data processed during HDCP2

During HDCP2 the raw Pyrnet data was processed with several IDL scripts manually. It is a legacy dataset, which can be accessed under the old directory on the Thredds server, as follows:

#|export
def read_hdcp2( dt, fill_gaps=True, campaign='hope_juelich'):
    """
    Read HDCP2-formatted datafiles from the pyranometer network

    Parameters
    ----------
    dt: datetime.date
        The date of the data to read
    fill_gaps: bool
        A flag indicating whether gaps should be filled by interpolation
    campaign: str
        specify campaign ['eifel','hope_juelich','hope_melpitz','lindenberg','melcol']

    Returns
    -------
    dataset : xarray.Dataset
        The pyranometer network observations
    """
    # load dataset
    fname = DATA_URL + "old/nc/"+ FNAME_FMT_HDCP2
    fname = fname.format(dt=dt,
                         campaign=campaign,
                         campaign_pfx=campaign_pfx[campaign])
    ds = xr.open_dataset(fname, mask_and_scale=False)

    # select good stations
    igood = (np.sum(ds.rsds.data<-900.0,axis=0)<MAX_MISSING)&(np.sum(ds.rsds_flag==1,axis=0)>MIN_GOOD)
    ds = ds.isel(nstations=igood)

    # fill gaps if requested
    if fill_gaps==True:
        x = (ds.time-ds.time[0])/np.timedelta64(1,'s')
        for i in np.arange(ds.dims['nstations']):
            y = ds.rsds.data[:,i]
            m = y>-990.0
            if not np.all(m):
                f = interp1d(x[m],y[m],'linear',bounds_error=False,fill_value='extrapolate')
                ds.rsds[~m,i]=f(x[~m])
    # add additional DataArrays
    jd = (ds.time.data-np.datetime64(sp.EPOCH_JD2000_0))/np.timedelta64(1,'D')
    ds['esd'] = sp.earth_sun_distance(jd[0]+0.5)
    szen = sp.sun_angles(jd[:,None],ds.lat.data[None,:],ds.lon.data[None,:])[0]
    ds['szen']    = xr.DataArray(szen,dims=('time','nstations'),coords={'time':ds.time.data})
    ds['mu0']     = np.cos(np.deg2rad(ds.szen))
    ds['gtrans']  = ds.rsds/ds.esd**2/SOLCONST/ds['mu0']
    return ds.rename({'rsds_flag':'qaflag','rsds':'ghi'})
#|export
# read pyrnet data and add coordinates
def read_pyrnet(date, campaign):
    """ Read pyrnet data and add coordinates
    """
    pyr = read_hdcp2(date, campaign=campaign)
    x,y = pyrutils.get_xy_coords(pyr.lon,pyr.lat)
    pyr['x'] = xr.DataArray(x,dims=('nstations'))
    pyr['y'] = xr.DataArray(y,dims=('nstations'))
    return pyr
#|dropout
# date = dt.datetime(2013,7,15)
# pyr  = read_pyrnet(date, 'hope_juelich')
# pyr
# plot pyranometer locations
# tstart = int(10.5*3600)
# tstop  = tstart+2*3600
# tslice = slice(tstart,tstop)
# 
# plt.figure()
# p = plt.plot(pyr.x.data,pyr.y.data,'+')
# tstart = int(13.0*3600)
# tstop  = tstart+2*3600
# tslice = slice(tstart,tstop)
# 
# tlim = [
#     mdates.date2num(pyr.time.data[tslice][0]),
#     mdates.date2num(pyr.time.data[tslice][-1])
# ]
# 
# fig,ax = plt.subplots(1,1)
# 
# i_sy = np.argsort(-pyr.y.data)
# im1 = ax.imshow(pyr.gtrans[tslice,i_sy].T, extent=[tlim[0],tlim[1], 1, 1+len(i_sy)], aspect="auto", cmap="gray_r", vmin=0.0, vmax=0.8)
# _ = ax.set_ylabel("# Station")
# 
# 
# date_fmt = mdates.DateFormatter('%H:%M')
# xticks = ax.get_xticks()
# ax.xaxis_date()
# ax.xaxis.set_major_formatter(date_fmt)
# _ = ax.set_xlabel("Time [UTC]")
# fig.colorbar(im1, ax=ax)
# fig.show()

2.2. Read Calibration Files

Calibration factors are collected in share/pyrnet_calibration.json. The following function looks up the nearest calibration in time and fill missing values with earlier calibrations

# pyrnet_calibration.json structure
calib = {
    "2017-04":{
        "001":[7.1,7.2],
        "002":[7.51,7.61],
        "003":[6.9,6.91],
        "CC": [-2,1,-2,2] # cosine correction coefficients
    },
    "2019-04": {
        "001":[7,None],
        "002":[7.5,7.6]
    }
}

date1 = np.datetime64("2019-05-01") # closer to "2019-04" calibration
date2 = np.datetime64("2018-01-01") # closer to "2017-04" calibration

# parse calibration dates
cdates = pd.to_datetime(list(calib.keys())).values

# sort calib keys beginning with nearest
# skeys1 = np.array(list(calib.keys()))[np.argsort(np.abs(date1 - cdates))][::-1]
# skeys2 = np.array(list(calib.keys()))[np.argsort(np.abs(date2 - cdates))][::-1]

# lookup recents
isort = np.argsort(cdates)
skeys1 = np.array(list(calib.keys()))[isort][:np.sum(date1>cdates)]
skeys2 = np.array(list(calib.keys()))[isort][:np.sum(date2>cdates)]

print("Order of calibration lookup")
print(date1, '->' ,skeys1)
print(date2, '->' ,skeys2)
Order of calibration lookup
2019-05-01 -> ['2017-04' '2019-04']
2018-01-01 -> ['2017-04']

Lookup calibration, update with the most recent calibration but fill with earlier calibration if necessary.

# example for date1
for i, key in enumerate(skeys1):
    if i==0:
        c = calib[key].copy()
        continue

    isNone = lambda x: np.any([xi is None for xi in x])
    isNotNone = lambda x: np.all([xi is not None for xi in x])
    # update with newer calibrations which not include None values
    c.update(valfilter(isNotNone, calib[key]))

    # update only not None values
    for k,v in valfilter(isNone, calib[key]).items():
        newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]
        c.update({k:newv})
c
{'001': [7, 7.2], '002': [7.5, 7.6], '003': [6.9, 6.91], 'CC': [-2, 1, -2, 2]}
# example for date2
for i, key in enumerate(skeys2[::-1]):
    if i==0:
        c = calib[key].copy()
        continue

    isNone = lambda x: np.any([xi is None for xi in x])
    isNotNone = lambda x: np.all([xi is not None for xi in x])
    # update with newer calibrations which not include None values
    c.update(valfilter(isNotNone, calib[key]))

    # update only not None values
    for k,v in valfilter(isNone, calib[key]).items():
        newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]
        c.update({k:newv})
c
{'001': [7.1, 7.2],
 '002': [7.51, 7.61],
 '003': [6.9, 6.91],
 'CC': [-2, 1, -2, 2]}
calib
{'2017-04': {'001': [7.1, 7.2],
  '002': [7.51, 7.61],
  '003': [6.9, 6.91],
  'CC': [-2, 1, -2, 2]},
 '2019-04': {'001': [7, None], '002': [7.5, 7.6]}}
#|export
def read_calibration(cfile:str, cdate):
    """
    Parse calibration json file

    Parameters
    ----------
    cfile: str
        Path of the calibration.json
    cdate: list, ndarray, or scalar of type float, datetime or datetime64
        A representation of time. If float, interpreted as Julian date.
    Returns
    -------
    dict
        Calibration dictionary sorted by box number.
    """
    cdate = pyrutils.to_datetime64(cdate)
    calib = pyrutils.read_json(cfile)
    # parse calibration dates
    cdates = pd.to_datetime(list(calib.keys()), yearfirst=True).values

    # sort calib keys beginning with nearest
    # skeys = np.array(list(calib.keys()))[np.argsort(np.abs(cdate - cdates))][::-1]
    # lookup most recent key
    isort = np.argsort(cdates)
    skeys = np.array(list(calib.keys()))[isort][:np.sum(cdate>cdates)]
    # lookup calibration factors
    for i, key in enumerate(skeys):
        if i==0:
            c = calib[key].copy()
            continue
        isNone = lambda x: np.any([xi is None for xi in x])
        isNotNone = lambda x: np.all([xi is not None for xi in x])
        # update with newer calibrations which not include None values
        c.update(valfilter(isNotNone, calib[key]))
        # update only not None values
        for k,v in valfilter(isNone, calib[key]).items():
            newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]
            c.update({k:newv})
    return c

Example using the package default pyrnet_calibration.json:

#|dropout
fn = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_calibration.json")
read_calibration(fn,cdate=np.datetime64("2018-09-10"))

Hide code cell output

{'001': [7.73, 6.98],
 '002': [7.41, None],
 '003': [7.51, None],
 '004': [7.62, 8.23],
 '005': [7.48, 6.37],
 '006': [6.71, None],
 '007': [7.52, 7.2],
 '008': [7.58, None],
 '009': [7.59, 6.85],
 '010': [7.6, 6.74],
 '011': [7.32, None],
 '012': [7.6, 6.87],
 '013': [7.16, None],
 '014': [7.71, 7.3],
 '015': [7.59, 6.77],
 '016': [7.62, None],
 '017': [7.28, None],
 '018': [7.57, 6.9],
 '019': [7.57, None],
 '020': [7.32, None],
 '021': [7.4, None],
 '022': [7.43, None],
 '023': [7.46, None],
 '024': [7.73, 6.59],
 '025': [7.46, 7.14],
 '026': [7.64, 7.05],
 '027': [None, None],
 '028': [7.21, None],
 '029': [7.61, None],
 '030': [7.3, None],
 '031': [7.23, None],
 '032': [7.7, 6.94],
 '033': [7.74, 6.89],
 '034': [6.8, None],
 '035': [7.62, 7.1],
 '036': [None, None],
 '037': [7.72, 6.73],
 '038': [7.62, 6.93],
 '039': [7.27, None],
 '040': [7.15, None],
 '041': [7.38, None],
 '042': [7.15, None],
 '043': [7.41, 7.2],
 '044': [7.26, 7.05],
 '045': [7.52, None],
 '046': [7.7, 7.13],
 '047': [7.47, 7.4],
 '048': [7.1, None],
 '049': [7.74, 7.32],
 '050': [7.51, None],
 '051': [7.67, None],
 '052': [7.46, None],
 '053': [7.63, 6.48],
 '054': [7.58, 7.22],
 '055': [7.41, 6.51],
 '056': [7, None],
 '057': [6.95, None],
 '058': [7.62, 6.9],
 '059': [7.07, None],
 '060': [7.64, 6.62],
 '061': [7.53, None],
 '062': [7.59, None],
 '063': [7.63, None],
 '064': [7.38, None],
 '065': [7.47, None],
 '066': [7.48, None],
 '067': [7.46, None],
 '068': [7.01, None],
 '069': [7.33, None],
 '070': [6.88, None],
 '071': [7.6, None],
 '072': [7.58, None],
 '073': [7.41, None],
 '074': [7.61, None],
 '075': [6.91, None],
 '076': [7.63, None],
 '077': [7.51, None],
 '078': [6.93, None],
 '079': [7.62, None],
 '080': [7.3, None],
 '081': [7.64, None],
 '082': [None, None],
 '083': [7.44, None],
 '084': [7.46, None],
 '085': [7.67, None],
 '086': [7.45, 6.69],
 '087': [7.42, 7.11],
 '088': [7, None],
 '089': [7.59, None],
 '090': [7.64, None],
 '091': [7.47, None],
 '092': [7.35, None],
 '093': [7.51, None],
 '094': [6.95, None],
 '095': [7.47, None],
 '096': [7.65, None],
 '097': [6.9, None],
 '098': [None, None],
 '099': [7.51, None],
 '100': [7.32, None],
 'CC': [1.45, -3.04, 5.59, -3.01]}

2.3. Read Box Serial numbers

Similar to reading the calibration, pyranometers attached to each box are stored in .json format. Reassigning of pyranometers to certain boxes might happen. Different to the calibration we will look up the most recent entry (not in the future)

# pyrnet_calibration.json structure
pyrnetmap = {
    "2019-04": {
        "001":["S11",None],
        "002":["S21","S33"]
    },
    "2017-04":{
        "001":["S11","S21"],
        "002":["S21","S22"],
        "003":["S31",None]
    },
}

date1 = np.datetime64("2019-03-01") # before "2019-04"
date2 = np.datetime64("2019-05-01") # after "2019-04"

# parse key dates
# require sort for lookup later
cdates = pd.to_datetime(list(pyrnetmap.keys())).values
isort = np.argsort(cdates)

# lookup most recent key
skeys1 = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date1>cdates)]
skeys2 = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date2>cdates)]
print("Order of serial lookup")
print(date1, '->' ,skeys1)
print(date2, '->' ,skeys2)

print("Serials map:")

print(date1, '->', merge([pyrnetmap[key] for key in skeys1]))
print(date2, '->', merge([pyrnetmap[key] for key in skeys2]))
Order of serial lookup
2019-03-01 -> ['2017-04']
2019-05-01 -> ['2017-04' '2019-04']
Serials map:
2019-03-01 -> {'001': ['S11', 'S21'], '002': ['S21', 'S22'], '003': ['S31', None]}
2019-05-01 -> {'001': ['S11', None], '002': ['S21', 'S33'], '003': ['S31', None]}

Now we only have to look it up

#|export
def get_pyrnet_mapping(fn:str, date):
    """
    Parse box - serial number mapping  json file

    Parameters
    ----------
    fn: str
        Path of the mapping.json
    date: list, ndarray, or scalar of type float, datetime or datetime64
        A representation of time. If float, interpreted as Julian date.
    Returns
    -------
    dict
        Calibration dictionary sorted by box number.
    """
    date = pyrutils.to_datetime64(date)
    pyrnetmap = pyrutils.read_json(fn)
    # parse key dates
    # require sort for lookup later
    cdates = pd.to_datetime(list(pyrnetmap.keys()), yearfirst=True).values
    isort = np.argsort(cdates)

    # lookup most recent key
    skeys = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date>cdates)]

    # merge and update with the most recent map
    return  merge([pyrnetmap[key] for key in skeys])

2.4. Lookup Serial, Boxnumber, calibration at certain date

Utility to lookup box metadata for a certain date.

#|export
def meta_lookup(date,*,serial=None,box=None,cfile=None, mapfile=None):
    if cfile is None:
        cfile = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_calibration.json")
    if mapfile is None:
        mapfile = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_station_map.json")

    map = get_pyrnet_mapping(mapfile,date)
    calib = read_calibration(cfile,date)
    
    CC = calib["CC"] if "CC" in calib else [1]

    if serial is None and box is not None:
        box=int(box)
        return f"{box:03d}", map[f"{box:03d}"], calib[f"{box:03d}"], CC
    elif serial is not None and box is None:
        res = valfilter(lambda x: serial in x, map)
        box = list(res.keys())[0]
        serial = res[box]
        return box,serial,calib[box], CC
    else:
        raise ValueError("At least one of [station,box] have to be specified.")
date = np.datetime64("2018-10-01")
print(meta_lookup(date,serial="S12078.061"))
print(meta_lookup(date,box=10))
('006', ['S12078.061', ''], [6.71, None], [1.45, -3.04, 5.59, -3.01])
('010', ['S12128.010', 'S12137.019'], [7.6, 6.74], [1.45, -3.04, 5.59, -3.01])