6. Maintenance Reports

Parse Maintenance reports from LimeSurvey and legacy spreadsheet formats.

#|export
import pandas as pd
from pandas._typing import (
    FilePath,
    ReadCsvBuffer,
)
import datetime as dt
import numpy as np
from toolz import assoc_in

from pyrnet import utils

6.1. Survey Export

In the following, the functions are designed to work with the survey response export in the .csv format:

  • Field separator: “Semicolon”

  • Responses: “Answer codes”

  • Headings: “Question code”

The responses can be exported manually from the website …

fn = "../../example_data/results-survey224783.csv"

df = pd.read_csv(fn, sep=';')
df = df.fillna("None")
df
id submitdate lastpage startlanguage seed startdate datestamp Q00 Q01 MainQ01 ... interviewtime groupTime57 Q00Time Q01Time groupTime59 MainQ01Time MainQ02Time groupTime58 ExtraQ01Time ExtraQ02Time
0 1 2023-05-01 16:09:06 1 en 632878730 2023-05-01 16:08:46 2023-05-01 16:09:06 1 222 AO02 ... 19.89 None None None None None None None None None
1 2 2023-05-08 15:13:33 1 en 1428982518 2023-05-08 15:02:53 2023-05-08 15:13:33 1 this is a test AO01 ... 837.45 None None None None None None None None None
2 3 2023-05-08 16:08:20 1 en 852861659 2023-05-08 16:08:09 2023-05-08 16:08:20 2 222 AO03 ... 11.13 None None None None None None None None None
3 4 2023-05-08 16:09:06 1 en 632878730 2023-05-08 16:08:46 2023-05-08 16:09:06 1 222 AO02 ... 19.89 None None None None None None None None None
4 5 2023-05-18 16:09:06 1 en 632878730 2023-05-18 16:08:46 2023-05-18 16:09:06 1 222 AO02 ... 19.89 None None None None None None None None None

5 rows × 27 columns

… or via the limepy python package.

# import limepy
# import getpass
# from io import StringIO
# 
# pwd = getpass.getpass("LimeSurvey BeRichter Password: ")
# if pwd != '':
#     url = "https://lgs-car.limesurvey.net/admin/remotecontrol"
#     csv = limepy.download.get_responses(
#         base_url=url,
#         user_name="BeRichter",
#         password=pwd,
#         user_id=1,
#         sid=224783
#     )
#     df = pd.read_csv(StringIO(csv), sep=';')
#     df = df[~df.submitdate.isnull()].reset_index()
#     df = df.fillna("None")
# 
# else:
#     print("No password, no data.")
#|dropout
# df
#|export
def get_responses(
        *,
        fn: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str]|None = None,
        online: dict|None = None
) -> pd.DataFrame:
    """
    Get LimeSurvey responses as pandas Dataframe providing a file or online download information.

    Parameters
    ----------
    fn: str, path object or file-like object
        Any pandas readable representation of the LimeSurvey response export file
        (.csv, sep=;, answer and question codes).
    online: dict
        Dictionary of information required to download the responses via limepy:
            * base_url -> limesurvey remote_control url
            * user_name -> account name
            * password
            * user_id -> ID of account user (usually 1)
            * sid -> Survey ID

        Minimal information stored in *online* is the base_url, other information will then be filled via user input promt.

    Returns
    -------
    pd.Dataframe
        parsed responses csv file
    """
    if fn is not None:
        # legacy support:
        if fn.endswith(".xls") or fn.endswith(".xlsx"):
            return parse_legacy_logbook(fn)
        filepath_or_buffer = fn
    elif online is not None:
        import limepy
        import getpass
        from io import StringIO
        if "base_url" not in online:
            raise ValueError
        if "user_name" not in online:
            online.update({'user_name': input("LimeSurvey Account Name: ")})
        if "password" not in online:
            online.update({'password': getpass.getpass("LimeSurvey Password: ")})
        if "user_id" not in online:
            online.update({"user_id": input("LimeSurvey User ID: ")})
        if "sid" not in online:
            online.update({'sid': input("LimeSurvey Survey ID: ")})

        csv = limepy.download.get_responses(**online)
        filepath_or_buffer = StringIO(csv)
    else:
        raise ValueError
    df = pd.read_csv(filepath_or_buffer, sep=';')
    df = df[~df.submitdate.isnull()].reset_index(drop=True)
    df = df.fillna("None")
    return df
# url = "https://lgs-car.limesurvey.net/admin/remotecontrol"
# get_responses(
#     online=dict(
#         base_url=url,
#         user_id=1,
#         sid=224783
#     )
# )

6.1.1. Support legacy xls or xlsx logbook file

Prior to LimeSurvey, the maintenance notes and marks are collected within a spreadsheet file. The following functions add legacy support, transfromig the xls notation into a pandas Dataframe ready to be parsed with

#|export
def read_logbook(lfile):
    '''
    Load logbook file and store it as dictionary of rec arrays with stID keys.

    Parameters
    ----------
    cfile: string
        path and filename of loogbook file -> should be .xls file
        each sheet represent a maintenance periode.
        First row of .xls file have to include column names
        *box*, *date*, *clean*, *clean_tilt*, *level*, *level_tilt*, *Hangle*, *Vangle*, *notes*

    Returns
    -------
    logbook: dict
        dict of recarray for each station ID including quality flags from each maintenance cicle.
    '''
    dtype_log =[
        ('box',         np.uint8),
        ('site',        'U50'),
        ('serial_pyr',  'U50'),
        ('serial_pyr_tilt', 'U50'),
        ('user',          'U50'),
        ('campaign',    'U50'),
        ('date',        'datetime64[ms]' ),
        ('clean',       np.uint8),
        ('clean_tilt',  np.uint8),
        ('level',       np.uint8),
        ('level_tilt',  np.uint8),
        ('Hangle',      'f8'),
        ('Vangle',      'f8'),
        ('notes',       'U50')
    ]
    def _parse_name(name):
        key=name.lower().strip()
        if key in ['date']:
            return 'date'
        elif key in ['clean','cleanliness','clean(pyr1)','clean1']:
            return 'clean'
        elif key in ['clean_tilt','clean2','clean(pyr2)']:
            return 'clean_tilt'
        elif key in ['level','level(pyr1)','level1']:
            return 'level'
        elif key in ['level_tilt','level(pyr2)','level2']:
            return 'level_tilt'
        elif key in ['box','station','id','pyrbox','pyranometerbox']:
            return 'box'
        elif key in ['hangle','azimuth','azi','horizontal_angle']:
            return 'Hangle'
        elif key in ['vangle','zenith','zen','vertical_angle']:
            return 'Vangle'
        elif key in ['notes','note','description']:
            return 'notes'
        elif key in ['serial','serial1','serial_pyr','pyranometerID','pyrID']:
            return 'serial_pyr'
        elif key in ['serial2','serial_pyr2','serial_pyr_tilt']:
            return 'serial_pyr_tilt'
        elif key in ['site','location']:
            return 'site'
        elif key in ['user','author']:
            return 'user'
        elif key in ['campaign']:
            return 'campaign'
        elif key == 'index':
            return False
        else:
            return False
    def _hstack2(arrays):
        return arrays[0].__array_wrap__(np.hstack(arrays))

    logbook={}
    df=pd.read_excel(lfile,sheet_name=None)#,engine='openpyxl')
    for sheet in df.keys():# read all sheets from xls file
        sh = df[sheet].dropna(axis=0,how='all',subset=['date']) #remove empty lines
        sh = sh.dropna(axis=1,how='all') # remove empty columns
        for row in sh.itertuples(index=True,name='Pandas'):
            A=np.zeros(1,dtype=dtype_log).view(np.recarray)
            for name,value in row._asdict().items():
                key=_parse_name(name)
                if key:
                    if dict(dtype_log)[key]==np.uint8 and np.isnan(value):
                        value=9
                    A[key]=value
            if str(A.box[0]) in logbook.keys():
                logbook.update({str(A.box[0]):_hstack2([logbook[str(A.box[0])],A])})
            else:
                logbook.update({str(A.box[0]):A})
    return logbook
#|export
def parse_legacy_logbook(fn):
    df = None
    lb = read_logbook(fn)
    for box in lb:
        lbb = lb[box]
        N = lbb['date'].shape[0]
        faccept = [1,2,3,4]
        dfb = pd.DataFrame(
            {"datestamp": lbb['date'],
             "Q00": int(box),
             "Q01": lbb['notes'],
             "MainQ01[comment]": np.repeat("",N),
             "MainQ02[comment]": np.repeat("",N),
             "ExtraQ01[comment]": np.repeat("",N),
             "ExtraQ02[comment]": np.repeat("",N),
             "MainQ01": [f"AO0{f}" if f in faccept else "None" for f in lbb['clean']],
             "MainQ02": [f"AO0{f}" if f in faccept else "None" for f in lbb['level']],
             "ExtraQ01": [f"AO0{f}" if f in faccept else "None" for f in lbb['clean_tilt']],
             "ExtraQ02": [f"AO0{f}" if f in faccept else "None" for f in lbb['level_tilt']],
             }
        )
        if df is None:
            df = dfb.copy()
        else:
            df = pd.concat((df,dfb),ignore_index=True)
    df = df.fillna("None")
    return df
#|dropout
fn_lb = "../../example_data/legacy_logbook.xls"

lb = read_logbook(fn_lb)
lb

Hide code cell output

{'1': rec.array([(1, 'TROPOS-roof', 'S12128.001', 'S12137.049', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '4': rec.array([(4, 'TROPOS-roof', 'S12128.004', 'S12137.001', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'moisture in horizontal pyranometer'),
            (4, 'TROPOS-roof', 'S12128.004', 'S12137.001', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T13:00:00.000', 2, 2, 2, 2, 180., 0., 'additional note')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '5': rec.array([(5, 'TROPOS-roof', 'S12128.005', 'S12137.047', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '7': rec.array([(7, 'TROPOS-roof', 'S12128.007', 'S12128.031', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '9': rec.array([(9, 'TROPOS-roof', 'S12128.009', 'S12137.012', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '10': rec.array([(10, 'TROPOS-roof', 'S12128.010', 'S12137.019', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '12': rec.array([(12, 'TROPOS-roof', 'S12128.012', 'S12137.050', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '14': rec.array([(14, 'TROPOS-roof', 'S12128.014', 'S12137.024', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '15': rec.array([(15, 'TROPOS-roof', 'S12128.015', 'S12128.040', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '18': rec.array([(18, 'TROPOS-roof', 'S12128.018', 'S12137.045', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '24': rec.array([(24, 'TROPOS-roof', 'S12128.024', 'S12137.044', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '25': rec.array([(25, 'TROPOS-roof', 'S12128.025', 'S12137.046', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '26': rec.array([(26, 'TROPOS-roof', 'S12128.026', 'S12137.040', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '32': rec.array([(32, 'TROPOS-roof', 'S12128.032', 'S12137.042', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '33': rec.array([(33, 'TROPOS-roof', 'S12128.033', 'S12137.048', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '35': rec.array([(35, 'TROPOS-roof', 'S12128.035', 'S12128.042', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '37': rec.array([(37, 'TROPOS-roof', 'S12128.037', 'S12128.034', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '38': rec.array([(38, 'TROPOS-roof', 'S12128.038', 'S12128.039', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '43': rec.array([(43, 'TROPOS-roof', 'S12128.043', 'S12128.030', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '44': rec.array([(44, 'TROPOS-roof', 'S12128.044', 'S12137.043', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '46': rec.array([(46, 'TROPOS-roof', 'S12128.046', 'S12128.050', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '47': rec.array([(47, 'TROPOS-roof', 'S12128.047', 'S12128.036', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '49': rec.array([(49, 'TROPOS-roof', 'S12128.049', 'S12128.045', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'scratches on horizontal pyranometer')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '53': rec.array([(53, 'TROPOS-roof', 'S12137.003', 'S12137.007', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '54': rec.array([(54, 'TROPOS-roof', 'S12137.004', 'S12128.048', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '55': rec.array([(55, 'TROPOS-roof', 'S12137.005', 'S12137.006', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '58': rec.array([(58, 'TROPOS-roof', 'S12137.008', 'S12137.002', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'scratches on tiltet pyranometer')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '60': rec.array([(60, 'TROPOS-roof', 'S12137.010', 'S12137.009', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '86': rec.array([(86, 'TROPOS-roof', 'S12137.036', 'S12137.038', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),
 '87': rec.array([(87, 'TROPOS-roof', 'S12137.037', 'S12137.041', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],
           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')])}
#|dropout
parse_legacy_logbook(fn_lb)

Hide code cell output

datestamp Q00 Q01 MainQ01[comment] MainQ02[comment] ExtraQ01[comment] ExtraQ02[comment] MainQ01 MainQ02 ExtraQ01 ExtraQ02
0 2019-06-17 12:00:00 1 nan AO01 AO01 AO01 AO01
1 2019-06-17 12:00:00 4 moisture in horizontal pyranometer AO01 AO01 AO01 AO01
2 2019-06-17 13:00:00 4 additional note AO02 AO02 AO02 AO02
3 2019-06-17 12:00:00 5 nan AO01 AO01 AO01 AO01
4 2019-06-17 12:00:00 7 nan AO01 AO01 AO01 AO01
5 2019-06-17 12:00:00 9 nan AO01 AO01 AO01 AO01
6 2019-06-17 12:00:00 10 nan AO01 AO01 AO01 AO01
7 2019-06-17 12:00:00 12 nan AO01 AO01 AO01 AO01
8 2019-06-17 12:00:00 14 nan AO01 AO01 AO01 AO01
9 2019-06-17 12:00:00 15 nan AO01 AO01 AO01 AO01
10 2019-06-17 12:00:00 18 nan AO01 AO01 AO01 AO01
11 2019-06-17 12:00:00 24 nan AO01 AO01 AO01 AO01
12 2019-06-17 12:00:00 25 nan AO01 AO01 AO01 AO01
13 2019-06-17 12:00:00 26 nan AO01 AO01 AO01 AO01
14 2019-06-17 12:00:00 32 nan AO01 AO01 AO01 AO01
15 2019-06-17 12:00:00 33 nan AO01 AO01 AO01 AO01
16 2019-06-17 12:00:00 35 nan AO01 AO01 AO01 AO01
17 2019-06-17 12:00:00 37 nan AO01 AO01 AO01 AO01
18 2019-06-17 12:00:00 38 nan AO01 AO01 AO01 AO01
19 2019-06-17 12:00:00 43 nan AO01 AO01 AO01 AO01
20 2019-06-17 12:00:00 44 nan AO01 AO01 AO01 AO01
21 2019-06-17 12:00:00 46 nan AO01 AO01 AO01 AO01
22 2019-06-17 12:00:00 47 nan AO01 AO01 AO01 AO01
23 2019-06-17 12:00:00 49 scratches on horizontal pyranometer AO01 AO01 AO01 AO01
24 2019-06-17 12:00:00 53 nan AO01 AO01 AO01 AO01
25 2019-06-17 12:00:00 54 nan AO01 AO01 AO01 AO01
26 2019-06-17 12:00:00 55 nan AO01 AO01 AO01 AO01
27 2019-06-17 12:00:00 58 scratches on tiltet pyranometer AO01 AO01 AO01 AO01
28 2019-06-17 12:00:00 60 nan AO01 AO01 AO01 AO01
29 2019-06-17 12:00:00 86 nan AO01 AO01 AO01 AO01
30 2019-06-17 12:00:00 87 nan AO01 AO01 AO01 AO01

6.2. Parse Responses to dict

Maintenance Flags shall be parsed to a dictionary sorted by the PyrNet box number. The Survey reports are collected over the entire campaign. Therefore, consider only reports within a certain time interval around maintenance time (2 days) for quality flagging the measurement periode.

Reports within +-2 days around maintenance time are considered, giving the opportunity for corrections within this time frame by issuing another response (or via insert in the website interface). For example, the first report at 1PM includes the quality marks and some notes. Later, if one want to add notes for this station or correct marks, another report can be filled. The Values will be updated by the parsing function: * Valid Marks (not None) of the latest report within +-2days * Notes of multiple reports are attached (separated by “;”) starting with the oldest report notes.

#|export
_pollution_marks = {
    "None":4,
    "AO01":0,
    "AO02":1,
    "AO03":2,
    "AO04":3,
}
_alignment_marks = {
    "None":4,
    "AO01":0,
    "AO02":1,
    "AO03":2,
}
_note_keys = {
    "note_general": "Q01",
    "note_align": "MainQ01[comment]",
    "note_clean": "MainQ02[comment]",
    "note_align2": "ExtraQ01[comment]",
    "note_clean2": "ExtraQ02[comment]",
}
_mark_keys = {
    "clean": "MainQ01",
    "align": "MainQ02",
    "clean2": "ExtraQ01",
    "align2": "ExtraQ02",
}

def parse_report(
        df:  pd.DataFrame,
        date_of_maintenance: float | dt.datetime | np.datetime64 | None,
        stations: list | int | None = None
) -> dict:
    """
    Use pandas.read_csv (sep=;) to parse the survey report.

    Parameters
    ----------
    df: Dataframe
        LimeSurvey response parsed as pandas Dataframe.
    date_of_maintenance: float, datetime, datetime64 or None
        A rough date of maintenance (at least day resolution).
        If float, interpreted as Julian day from 2000-01-01T12:00.
        If None, the most recent logbook entries will be parsed.
    stations: list, int or None
        Selection of station (box) numbers to parse the report for.
        If None, parse all available stations. The default is None.

    Returns
    -------
    dict
        Dictionary storing maintenance flags and notes by PyrNet box number.
    """
    if date_of_maintenance is not None:
        date_of_maintenance = utils.to_datetime64(date_of_maintenance)

    # Dataframe polishing
    # drop only where station info is None
    df = df.fillna("None")
    df = df.mask(df["Q00"].eq("None")).dropna()
    df = df.reset_index()
    
    # Iterable station selection
    if stations is None:
        stations = np.unique(df['Q00'].values)
    if isinstance(stations, str):
        stations = [int(stations)]
    try:
        iter(stations)
    except:
        stations = [stations]
    
    # Find reports to consider per station
    idxs = []
    for station in stations:
        dfq = df.query(f"Q00=={station}")
        # find next report within -1 to 10 days
        report_dates = dfq["datestamp"].values.astype("datetime64[ns]")
        dtime = report_dates - date_of_maintenance
        mask = dtime < np.timedelta64(10,'D')
        mask *= dtime > np.timedelta64(-1,'h')
        if np.all(~mask): # no reports within time interval
            continue
        idx = np.argwhere(mask).ravel()[0]
        next_report_date = report_dates[idx]
        
        # find reports around 2 days of next report for merging
        dtime = report_dates - next_report_date
        mask = dtime < np.timedelta64(2,'D')
        mask *= dtime >= np.timedelta64(0,'D') # include "next_report_date"
        idx = np.argwhere(mask).ravel()
        idxs += list(dfq.index[idx])

    results = {}
    for i in idxs:
        if df["Q00"].values[i] == "None":
            continue
        box = int(df["Q00"].values[i])
        key = f"{box:03d}"
        mdate = pd.to_datetime(df['datestamp'][i])

        # store report in dictionary
        if key not in results:
            # initialize marks
            for mkey in _mark_keys:
                results = assoc_in(results, [key,mkey], 4)
            # initialize notes
            for nkey in _note_keys:
                results = assoc_in(results, [key,nkey], "")
            # initialize maintenancetime
            results = assoc_in(results, [key,"maintenancetime"], mdate)

        # merge notes if multiple reports exist
        for nkey in _note_keys:
            new_note = df[_note_keys[nkey]].values[i]
            if new_note=="None":
                continue
            # update_note = (results[key][nkey]+'; '+new_note).strip('; ')
            results = assoc_in(results, [key,nkey], new_note) # update_note)

        # update marks with most recent report if not None
        for mkey in _mark_keys:
            new_mark = df[_mark_keys[mkey]][i]
            if new_mark=="None":
                continue
            if mkey.startswith("clean"):
                new_mark = _pollution_marks[new_mark]
            else:
                new_mark = _alignment_marks[new_mark]
            results = assoc_in(results, [key,mkey], new_mark)
            # update associated maintenance date
            results = assoc_in(results, [key,"maintenancetime"], mdate)
        
    return results
#|dropout
# Parse LimeSurvey report
fn = "../../example_data/results-survey224783.csv"
#fn = "../../testnb/results-survey224783.csv"

df = pd.read_csv(fn, sep=';')

parse_report(df, np.datetime64("2023-05-08T01:58"))

Hide code cell output

{'001': {'clean': 1,
  'align': 2,
  'clean2': 3,
  'align2': 1,
  'note_general': '222',
  'note_align': 'test',
  'note_clean': 'testing notes',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2023-05-08 16:09:06')},
 '002': {'clean': 2,
  'align': 4,
  'clean2': 4,
  'align2': 4,
  'note_general': '222',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2023-05-08 16:08:20')}}
#|dropout
fn_lb = "../../example_data/legacy_logbook.xls"
# Parse legacy xls notebook
parse_report(parse_legacy_logbook(fn_lb), np.datetime64("2019-06-17T12:00"))

Hide code cell output

{'001': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '004': {'clean': 1,
  'align': 1,
  'clean2': 1,
  'align2': 1,
  'note_general': 'additional note',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 13:00:00')},
 '005': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '007': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '009': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '010': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '012': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '014': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '015': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '018': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '024': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '025': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '026': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '032': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '033': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '035': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '037': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '038': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '043': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '044': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '046': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '047': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '049': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'scratches on horizontal pyranometer',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '053': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '054': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '055': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '058': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'scratches on tiltet pyranometer',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '060': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '086': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')},
 '087': {'clean': 0,
  'align': 0,
  'clean2': 0,
  'align2': 0,
  'note_general': 'nan',
  'note_align': '',
  'note_clean': '',
  'note_align2': '',
  'note_clean2': '',
  'maintenancetime': Timestamp('2019-06-17 12:00:00')}}

6.3. Make aggregated quality flags

Aggregate quality marks to a binary number according to CF-Convention section 3.5 for quality flags.

QC flag binary representation, bits: XXYY with:

  • XX - level - [00,01,10] - good, slight out of level, bad out of level

  • YY - clean - [00,01,10,11] good, slight-, moderate-, strong covered

flag_mask = '3b,3b,3b, 12b, 12b'
flag_values = '1b, 2b, 3b, 4b, 8b '
flag_meanings = "
    soiling_light
    soiling_moderate
    soiling_heavy
    level_problematic
    level_bad"
#|export
def get_qcflag(qc_clean, qc_level):
    """
    Aggregate quality flags.

    Parameters
    ----------
    qc_clean: int
        [0,1,2,3] [clean, slight-, moderate-, strong covered]
    qc_level: int
        [0,1,2] [good, slight misalignment, strong misalignment]

    Returns
    -------
    int
        aggregated quality flagg [0-11]
    """
    qc = (qc_level<<2) + qc_clean
    return qc