2. PyrNet high level data¶
In the following high-level functions to read and examine PyrNet data are collected.
#|export
from collections.abc import Iterable
from xml.dom import minidom
from urllib.request import urlopen
import parse
import os
import numpy as np
import pandas as pd
import xarray as xr
from scipy.interpolate import interp1d
from toolz import valfilter, cons, merge, merge_with
#import pkg_resources as pkg_res
import importlib.resources
import warnings
# python -m pip install git+https://github.com/hdeneke/trosat-base.git#egg=trosat-base
from trosat import sunpos as sp
from pyrnet import utils as pyrutils
# extra imports for demonstration
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
2.1. Load Data from Thredds-Server¶
Acquire processed data from server
#|export
# campaign file name map for hdcp2 data
campaign_pfx = {
'eifel': 'hope',
'hope_juelich': 'hope',
'hope_melpitz': 'hopm',
'lindenberg': 'ioprao',
'melcol': 'mcol',
}
# TROPOS thredds urls templates
DATA_URL = "https://tds.tropos.de/thredds/dodsC/scccJher/{dt:%Y}_{campaign}/"
FNAME_FMT_HDCP2 = '{campaign_pfx}_trop_pyrnet00_l1_rsds_v00_{dt:%Y%m%d}000000.nc'
# configuration constants
SOLCONST = 1359.0 # Solar constant in Wm-2
MAX_MISSING = 1000 # Maximum allowed number of missing records
MIN_GOOD = 85400 # Minimum allowed number of good records
2.1.1. Thredds lookups¶
#|dropout
# date = dt.date(2019,8,7)
# campaign='metpvnet'
# lvl='l1b'
#
# # construct catalog url
# url = DATA_URL.format(dt=pd.to_datetime(date),campaign=campaign)
# url = url.replace("dodsC","catalog")
# url += f"{lvl}/catalog.xml"
#
# tfiles = get_elements(url)
# tfiles
#|dropout
fn = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_config.json")
pyrcfg = pyrutils.read_json(fn)
# catalog = parse_thredds_catalog(url,pyrcfg["output_l1b"])
# catalog.query('station==10')
# catalog.query('dt=="2019-07-05"')
#|dropout
lvl='l1a'
station=[86,87,88]
date = dt.datetime(2019,7,31)
campaign = 'metpvnet'
collection= None
# lookup_fnames(date,station=station,lvl=lvl,campaign=campaign,collection=collection)
2.1.1.1. Example Reading multiple days and stations from Thredds¶
2.1.2. Data processed by the pyrnet package¶
Data (re-)processed by the pyrnet package is available at the TROPOS thredds server. The following function grant easy access.
Filename lookup on thredds server:
# ds = xr.open_dataset(urls[0])
# for url in urls[1:]:
# dst = xr.open_dataset(url)
# st = dst.station.values[0]
# stime = dst.time
# if st not in ds.station.values:
# ds = xr.concat((ds,dst), dim="station", coords="minimal")
# elif dst.time.values[0] not in ds.time.values:
# ds = xr.concat((ds,dst), dim="time", coords="minimal")
# elif dst.time.values[0] not in ds.sel(station=st).dropna("time").time.values:
# ds = xr.merge((ds,dst))
# else:
# continue
# ds
2.1.3. Data processed during HDCP2¶
During HDCP2 the raw Pyrnet data was processed with several IDL scripts manually. It is a legacy dataset, which can be accessed under the old directory on the Thredds server, as follows:
#|export
def read_hdcp2( dt, fill_gaps=True, campaign='hope_juelich'):
"""
Read HDCP2-formatted datafiles from the pyranometer network
Parameters
----------
dt: datetime.date
The date of the data to read
fill_gaps: bool
A flag indicating whether gaps should be filled by interpolation
campaign: str
specify campaign ['eifel','hope_juelich','hope_melpitz','lindenberg','melcol']
Returns
-------
dataset : xarray.Dataset
The pyranometer network observations
"""
# load dataset
fname = DATA_URL + "old/nc/"+ FNAME_FMT_HDCP2
fname = fname.format(dt=dt,
campaign=campaign,
campaign_pfx=campaign_pfx[campaign])
ds = xr.open_dataset(fname, mask_and_scale=False)
# select good stations
igood = (np.sum(ds.rsds.data<-900.0,axis=0)<MAX_MISSING)&(np.sum(ds.rsds_flag==1,axis=0)>MIN_GOOD)
ds = ds.isel(nstations=igood)
# fill gaps if requested
if fill_gaps==True:
x = (ds.time-ds.time[0])/np.timedelta64(1,'s')
for i in np.arange(ds.dims['nstations']):
y = ds.rsds.data[:,i]
m = y>-990.0
if not np.all(m):
f = interp1d(x[m],y[m],'linear',bounds_error=False,fill_value='extrapolate')
ds.rsds[~m,i]=f(x[~m])
# add additional DataArrays
jd = (ds.time.data-np.datetime64(sp.EPOCH_JD2000_0))/np.timedelta64(1,'D')
ds['esd'] = sp.earth_sun_distance(jd[0]+0.5)
szen = sp.sun_angles(jd[:,None],ds.lat.data[None,:],ds.lon.data[None,:])[0]
ds['szen'] = xr.DataArray(szen,dims=('time','nstations'),coords={'time':ds.time.data})
ds['mu0'] = np.cos(np.deg2rad(ds.szen))
ds['gtrans'] = ds.rsds/ds.esd**2/SOLCONST/ds['mu0']
return ds.rename({'rsds_flag':'qaflag','rsds':'ghi'})
#|export
# read pyrnet data and add coordinates
def read_pyrnet(date, campaign):
""" Read pyrnet data and add coordinates
"""
pyr = read_hdcp2(date, campaign=campaign)
x,y = pyrutils.get_xy_coords(pyr.lon,pyr.lat)
pyr['x'] = xr.DataArray(x,dims=('nstations'))
pyr['y'] = xr.DataArray(y,dims=('nstations'))
return pyr
#|dropout
# date = dt.datetime(2013,7,15)
# pyr = read_pyrnet(date, 'hope_juelich')
# pyr
# plot pyranometer locations
# tstart = int(10.5*3600)
# tstop = tstart+2*3600
# tslice = slice(tstart,tstop)
#
# plt.figure()
# p = plt.plot(pyr.x.data,pyr.y.data,'+')
# tstart = int(13.0*3600)
# tstop = tstart+2*3600
# tslice = slice(tstart,tstop)
#
# tlim = [
# mdates.date2num(pyr.time.data[tslice][0]),
# mdates.date2num(pyr.time.data[tslice][-1])
# ]
#
# fig,ax = plt.subplots(1,1)
#
# i_sy = np.argsort(-pyr.y.data)
# im1 = ax.imshow(pyr.gtrans[tslice,i_sy].T, extent=[tlim[0],tlim[1], 1, 1+len(i_sy)], aspect="auto", cmap="gray_r", vmin=0.0, vmax=0.8)
# _ = ax.set_ylabel("# Station")
#
#
# date_fmt = mdates.DateFormatter('%H:%M')
# xticks = ax.get_xticks()
# ax.xaxis_date()
# ax.xaxis.set_major_formatter(date_fmt)
# _ = ax.set_xlabel("Time [UTC]")
# fig.colorbar(im1, ax=ax)
# fig.show()
2.2. Read Calibration Files¶
Calibration factors are collected in share/pyrnet_calibration.json. The following function looks up the nearest calibration in time and fill missing values with earlier calibrations
# pyrnet_calibration.json structure
calib = {
"2017-04":{
"001":[7.1,7.2],
"002":[7.51,7.61],
"003":[6.9,6.91],
"CC": [-2,1,-2,2] # cosine correction coefficients
},
"2019-04": {
"001":[7,None],
"002":[7.5,7.6]
}
}
date1 = np.datetime64("2019-05-01") # closer to "2019-04" calibration
date2 = np.datetime64("2018-01-01") # closer to "2017-04" calibration
# parse calibration dates
cdates = pd.to_datetime(list(calib.keys())).values
# sort calib keys beginning with nearest
# skeys1 = np.array(list(calib.keys()))[np.argsort(np.abs(date1 - cdates))][::-1]
# skeys2 = np.array(list(calib.keys()))[np.argsort(np.abs(date2 - cdates))][::-1]
# lookup recents
isort = np.argsort(cdates)
skeys1 = np.array(list(calib.keys()))[isort][:np.sum(date1>cdates)]
skeys2 = np.array(list(calib.keys()))[isort][:np.sum(date2>cdates)]
print("Order of calibration lookup")
print(date1, '->' ,skeys1)
print(date2, '->' ,skeys2)
Order of calibration lookup
2019-05-01 -> ['2017-04' '2019-04']
2018-01-01 -> ['2017-04']
Lookup calibration, update with the most recent calibration but fill with earlier calibration if necessary.
# example for date1
for i, key in enumerate(skeys1):
if i==0:
c = calib[key].copy()
continue
isNone = lambda x: np.any([xi is None for xi in x])
isNotNone = lambda x: np.all([xi is not None for xi in x])
# update with newer calibrations which not include None values
c.update(valfilter(isNotNone, calib[key]))
# update only not None values
for k,v in valfilter(isNone, calib[key]).items():
newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]
c.update({k:newv})
c
{'001': [7, 7.2], '002': [7.5, 7.6], '003': [6.9, 6.91], 'CC': [-2, 1, -2, 2]}
# example for date2
for i, key in enumerate(skeys2[::-1]):
if i==0:
c = calib[key].copy()
continue
isNone = lambda x: np.any([xi is None for xi in x])
isNotNone = lambda x: np.all([xi is not None for xi in x])
# update with newer calibrations which not include None values
c.update(valfilter(isNotNone, calib[key]))
# update only not None values
for k,v in valfilter(isNone, calib[key]).items():
newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]
c.update({k:newv})
c
{'001': [7.1, 7.2],
'002': [7.51, 7.61],
'003': [6.9, 6.91],
'CC': [-2, 1, -2, 2]}
calib
{'2017-04': {'001': [7.1, 7.2],
'002': [7.51, 7.61],
'003': [6.9, 6.91],
'CC': [-2, 1, -2, 2]},
'2019-04': {'001': [7, None], '002': [7.5, 7.6]}}
#|export
def read_calibration(cfile:str, cdate):
"""
Parse calibration json file
Parameters
----------
cfile: str
Path of the calibration.json
cdate: list, ndarray, or scalar of type float, datetime or datetime64
A representation of time. If float, interpreted as Julian date.
Returns
-------
dict
Calibration dictionary sorted by box number.
"""
cdate = pyrutils.to_datetime64(cdate)
calib = pyrutils.read_json(cfile)
# parse calibration dates
cdates = pd.to_datetime(list(calib.keys()), yearfirst=True).values
# sort calib keys beginning with nearest
# skeys = np.array(list(calib.keys()))[np.argsort(np.abs(cdate - cdates))][::-1]
# lookup most recent key
isort = np.argsort(cdates)
skeys = np.array(list(calib.keys()))[isort][:np.sum(cdate>cdates)]
# lookup calibration factors
for i, key in enumerate(skeys):
if i==0:
c = calib[key].copy()
continue
isNone = lambda x: np.any([xi is None for xi in x])
isNotNone = lambda x: np.all([xi is not None for xi in x])
# update with newer calibrations which not include None values
c.update(valfilter(isNotNone, calib[key]))
# update only not None values
for k,v in valfilter(isNone, calib[key]).items():
newv = [c[k][i] if vi is None else vi for i,vi in enumerate(v)]
c.update({k:newv})
return c
Example using the package default pyrnet_calibration.json:
#|dropout
fn = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_calibration.json")
read_calibration(fn,cdate=np.datetime64("2018-09-10"))
2.3. Read Box Serial numbers¶
Similar to reading the calibration, pyranometers attached to each box are stored in .json format. Reassigning of pyranometers to certain boxes might happen. Different to the calibration we will look up the most recent entry (not in the future)
# pyrnet_calibration.json structure
pyrnetmap = {
"2019-04": {
"001":["S11",None],
"002":["S21","S33"]
},
"2017-04":{
"001":["S11","S21"],
"002":["S21","S22"],
"003":["S31",None]
},
}
date1 = np.datetime64("2019-03-01") # before "2019-04"
date2 = np.datetime64("2019-05-01") # after "2019-04"
# parse key dates
# require sort for lookup later
cdates = pd.to_datetime(list(pyrnetmap.keys())).values
isort = np.argsort(cdates)
# lookup most recent key
skeys1 = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date1>cdates)]
skeys2 = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date2>cdates)]
print("Order of serial lookup")
print(date1, '->' ,skeys1)
print(date2, '->' ,skeys2)
print("Serials map:")
print(date1, '->', merge([pyrnetmap[key] for key in skeys1]))
print(date2, '->', merge([pyrnetmap[key] for key in skeys2]))
Order of serial lookup
2019-03-01 -> ['2017-04']
2019-05-01 -> ['2017-04' '2019-04']
Serials map:
2019-03-01 -> {'001': ['S11', 'S21'], '002': ['S21', 'S22'], '003': ['S31', None]}
2019-05-01 -> {'001': ['S11', None], '002': ['S21', 'S33'], '003': ['S31', None]}
Now we only have to look it up
#|export
def get_pyrnet_mapping(fn:str, date):
"""
Parse box - serial number mapping json file
Parameters
----------
fn: str
Path of the mapping.json
date: list, ndarray, or scalar of type float, datetime or datetime64
A representation of time. If float, interpreted as Julian date.
Returns
-------
dict
Calibration dictionary sorted by box number.
"""
date = pyrutils.to_datetime64(date)
pyrnetmap = pyrutils.read_json(fn)
# parse key dates
# require sort for lookup later
cdates = pd.to_datetime(list(pyrnetmap.keys()), yearfirst=True).values
isort = np.argsort(cdates)
# lookup most recent key
skeys = np.array(list(pyrnetmap.keys()))[isort][:np.sum(date>cdates)]
# merge and update with the most recent map
return merge([pyrnetmap[key] for key in skeys])
2.4. Lookup Serial, Boxnumber, calibration at certain date¶
Utility to lookup box metadata for a certain date.
#|export
def meta_lookup(date,*,serial=None,box=None,cfile=None, mapfile=None):
if cfile is None:
cfile = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_calibration.json")
if mapfile is None:
mapfile = os.path.join(importlib.resources.files("pyrnet"), "share/pyrnet_station_map.json")
map = get_pyrnet_mapping(mapfile,date)
calib = read_calibration(cfile,date)
CC = calib["CC"] if "CC" in calib else [1]
if serial is None and box is not None:
box=int(box)
return f"{box:03d}", map[f"{box:03d}"], calib[f"{box:03d}"], CC
elif serial is not None and box is None:
res = valfilter(lambda x: serial in x, map)
box = list(res.keys())[0]
serial = res[box]
return box,serial,calib[box], CC
else:
raise ValueError("At least one of [station,box] have to be specified.")
date = np.datetime64("2018-10-01")
print(meta_lookup(date,serial="S12078.061"))
print(meta_lookup(date,box=10))
('006', ['S12078.061', ''], [6.71, None], [1.45, -3.04, 5.59, -3.01])
('010', ['S12128.010', 'S12137.019'], [7.6, 6.74], [1.45, -3.04, 5.59, -3.01])