{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:52.108373800Z",
     "start_time": "2024-05-24T08:11:51.773425200Z"
    },
    "collapsed": false,
    "tags": [
     "remove-cell"
    ]
   },
   "outputs": [],
   "source": [
    "#|hide\n",
    "#|default_exp reports"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# Maintenance Reports\n",
    "Parse Maintenance reports from LimeSurvey and legacy spreadsheet formats.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:55.691434500Z",
     "start_time": "2024-05-24T08:11:51.783947Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "import pandas as pd\n",
    "from pandas._typing import (\n",
    "    FilePath,\n",
    "    ReadCsvBuffer,\n",
    ")\n",
    "import datetime as dt\n",
    "import numpy as np\n",
    "from toolz import assoc_in\n",
    "\n",
    "from pyrnet import utils"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "## Survey Export\n",
    "In the following, the functions are designed to work with the survey response export in the .csv format:\n",
    "* Field separator: \"Semicolon\"\n",
    "* Responses: \"Answer codes\"\n",
    "* Headings: \"Question code\"\n",
    "\n",
    "The responses can be exported manually from the website ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:55.765230Z",
     "start_time": "2024-05-24T08:11:55.693427100Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>submitdate</th>\n",
       "      <th>lastpage</th>\n",
       "      <th>startlanguage</th>\n",
       "      <th>seed</th>\n",
       "      <th>startdate</th>\n",
       "      <th>datestamp</th>\n",
       "      <th>Q00</th>\n",
       "      <th>Q01</th>\n",
       "      <th>MainQ01</th>\n",
       "      <th>...</th>\n",
       "      <th>interviewtime</th>\n",
       "      <th>groupTime57</th>\n",
       "      <th>Q00Time</th>\n",
       "      <th>Q01Time</th>\n",
       "      <th>groupTime59</th>\n",
       "      <th>MainQ01Time</th>\n",
       "      <th>MainQ02Time</th>\n",
       "      <th>groupTime58</th>\n",
       "      <th>ExtraQ01Time</th>\n",
       "      <th>ExtraQ02Time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2023-05-01 16:09:06</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "      <td>632878730</td>\n",
       "      <td>2023-05-01 16:08:46</td>\n",
       "      <td>2023-05-01 16:09:06</td>\n",
       "      <td>1</td>\n",
       "      <td>222</td>\n",
       "      <td>AO02</td>\n",
       "      <td>...</td>\n",
       "      <td>19.89</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>2023-05-08 15:13:33</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "      <td>1428982518</td>\n",
       "      <td>2023-05-08 15:02:53</td>\n",
       "      <td>2023-05-08 15:13:33</td>\n",
       "      <td>1</td>\n",
       "      <td>this is a test</td>\n",
       "      <td>AO01</td>\n",
       "      <td>...</td>\n",
       "      <td>837.45</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>2023-05-08 16:08:20</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "      <td>852861659</td>\n",
       "      <td>2023-05-08 16:08:09</td>\n",
       "      <td>2023-05-08 16:08:20</td>\n",
       "      <td>2</td>\n",
       "      <td>222</td>\n",
       "      <td>AO03</td>\n",
       "      <td>...</td>\n",
       "      <td>11.13</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>2023-05-08 16:09:06</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "      <td>632878730</td>\n",
       "      <td>2023-05-08 16:08:46</td>\n",
       "      <td>2023-05-08 16:09:06</td>\n",
       "      <td>1</td>\n",
       "      <td>222</td>\n",
       "      <td>AO02</td>\n",
       "      <td>...</td>\n",
       "      <td>19.89</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>2023-05-18 16:09:06</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "      <td>632878730</td>\n",
       "      <td>2023-05-18 16:08:46</td>\n",
       "      <td>2023-05-18 16:09:06</td>\n",
       "      <td>1</td>\n",
       "      <td>222</td>\n",
       "      <td>AO02</td>\n",
       "      <td>...</td>\n",
       "      <td>19.89</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   id           submitdate  lastpage startlanguage        seed  \\\n",
       "0   1  2023-05-01 16:09:06         1            en   632878730   \n",
       "1   2  2023-05-08 15:13:33         1            en  1428982518   \n",
       "2   3  2023-05-08 16:08:20         1            en   852861659   \n",
       "3   4  2023-05-08 16:09:06         1            en   632878730   \n",
       "4   5  2023-05-18 16:09:06         1            en   632878730   \n",
       "\n",
       "             startdate            datestamp  Q00             Q01 MainQ01  ...  \\\n",
       "0  2023-05-01 16:08:46  2023-05-01 16:09:06    1             222    AO02  ...   \n",
       "1  2023-05-08 15:02:53  2023-05-08 15:13:33    1  this is a test    AO01  ...   \n",
       "2  2023-05-08 16:08:09  2023-05-08 16:08:20    2             222    AO03  ...   \n",
       "3  2023-05-08 16:08:46  2023-05-08 16:09:06    1             222    AO02  ...   \n",
       "4  2023-05-18 16:08:46  2023-05-18 16:09:06    1             222    AO02  ...   \n",
       "\n",
       "  interviewtime groupTime57 Q00Time Q01Time groupTime59 MainQ01Time  \\\n",
       "0         19.89        None    None    None        None        None   \n",
       "1        837.45        None    None    None        None        None   \n",
       "2         11.13        None    None    None        None        None   \n",
       "3         19.89        None    None    None        None        None   \n",
       "4         19.89        None    None    None        None        None   \n",
       "\n",
       "  MainQ02Time  groupTime58 ExtraQ01Time ExtraQ02Time  \n",
       "0        None         None         None         None  \n",
       "1        None         None         None         None  \n",
       "2        None         None         None         None  \n",
       "3        None         None         None         None  \n",
       "4        None         None         None         None  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fn = \"../../example_data/results-survey224783.csv\"\n",
    "\n",
    "df = pd.read_csv(fn, sep=';')\n",
    "df = df.fillna(\"None\")\n",
    "df\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "... or via the *limepy* python package."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:55.769243600Z",
     "start_time": "2024-05-24T08:11:55.740696Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# import limepy\n",
    "# import getpass\n",
    "# from io import StringIO\n",
    "# \n",
    "# pwd = getpass.getpass(\"LimeSurvey BeRichter Password: \")\n",
    "# if pwd != '':\n",
    "#     url = \"https://lgs-car.limesurvey.net/admin/remotecontrol\"\n",
    "#     csv = limepy.download.get_responses(\n",
    "#         base_url=url,\n",
    "#         user_name=\"BeRichter\",\n",
    "#         password=pwd,\n",
    "#         user_id=1,\n",
    "#         sid=224783\n",
    "#     )\n",
    "#     df = pd.read_csv(StringIO(csv), sep=';')\n",
    "#     df = df[~df.submitdate.isnull()].reset_index()\n",
    "#     df = df.fillna(\"None\")\n",
    "# \n",
    "# else:\n",
    "#     print(\"No password, no data.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:55.769243600Z",
     "start_time": "2024-05-24T08:11:55.750835900Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|dropout\n",
    "# df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:55.830184200Z",
     "start_time": "2024-05-24T08:11:55.764880200Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "def get_responses(\n",
    "        *,\n",
    "        fn: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str]|None = None,\n",
    "        online: dict|None = None\n",
    ") -> pd.DataFrame:\n",
    "    \"\"\"\n",
    "    Get LimeSurvey responses as pandas Dataframe providing a file or online download information.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    fn: str, path object or file-like object\n",
    "        Any pandas readable representation of the LimeSurvey response export file\n",
    "        (.csv, sep=;, answer and question codes).\n",
    "    online: dict\n",
    "        Dictionary of information required to download the responses via limepy:\n",
    "            * base_url -> limesurvey remote_control url\n",
    "            * user_name -> account name\n",
    "            * password\n",
    "            * user_id -> ID of account user (usually 1)\n",
    "            * sid -> Survey ID\n",
    "\n",
    "        Minimal information stored in *online* is the base_url, other information will then be filled via user input promt.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    pd.Dataframe\n",
    "        parsed responses csv file\n",
    "    \"\"\"\n",
    "    if fn is not None:\n",
    "        # legacy support:\n",
    "        if fn.endswith(\".xls\") or fn.endswith(\".xlsx\"):\n",
    "            return parse_legacy_logbook(fn)\n",
    "        filepath_or_buffer = fn\n",
    "    elif online is not None:\n",
    "        import limepy\n",
    "        import getpass\n",
    "        from io import StringIO\n",
    "        if \"base_url\" not in online:\n",
    "            raise ValueError\n",
    "        if \"user_name\" not in online:\n",
    "            online.update({'user_name': input(\"LimeSurvey Account Name: \")})\n",
    "        if \"password\" not in online:\n",
    "            online.update({'password': getpass.getpass(\"LimeSurvey Password: \")})\n",
    "        if \"user_id\" not in online:\n",
    "            online.update({\"user_id\": input(\"LimeSurvey User ID: \")})\n",
    "        if \"sid\" not in online:\n",
    "            online.update({'sid': input(\"LimeSurvey Survey ID: \")})\n",
    "\n",
    "        csv = limepy.download.get_responses(**online)\n",
    "        filepath_or_buffer = StringIO(csv)\n",
    "    else:\n",
    "        raise ValueError\n",
    "    df = pd.read_csv(filepath_or_buffer, sep=';')\n",
    "    df = df[~df.submitdate.isnull()].reset_index(drop=True)\n",
    "    df = df.fillna(\"None\")\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:55.831186Z",
     "start_time": "2024-05-24T08:11:55.773253100Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# url = \"https://lgs-car.limesurvey.net/admin/remotecontrol\"\n",
    "# get_responses(\n",
    "#     online=dict(\n",
    "#         base_url=url,\n",
    "#         user_id=1,\n",
    "#         sid=224783\n",
    "#     )\n",
    "# )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### Support legacy xls or xlsx logbook file\n",
    "Prior to LimeSurvey, the maintenance notes and marks are collected within a spreadsheet file. The following functions add legacy support, transfromig the xls notation into a pandas Dataframe ready to be parsed with"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:55.847170600Z",
     "start_time": "2024-05-24T08:11:55.782743500Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "def read_logbook(lfile):\n",
    "    '''\n",
    "    Load logbook file and store it as dictionary of rec arrays with stID keys.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    cfile: string\n",
    "        path and filename of loogbook file -> should be .xls file\n",
    "        each sheet represent a maintenance periode.\n",
    "        First row of .xls file have to include column names\n",
    "        *box*, *date*, *clean*, *clean_tilt*, *level*, *level_tilt*, *Hangle*, *Vangle*, *notes*\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    logbook: dict\n",
    "        dict of recarray for each station ID including quality flags from each maintenance cicle.\n",
    "    '''\n",
    "    dtype_log =[\n",
    "        ('box',         np.uint8),\n",
    "        ('site',        'U50'),\n",
    "        ('serial_pyr',  'U50'),\n",
    "        ('serial_pyr_tilt', 'U50'),\n",
    "        ('user',          'U50'),\n",
    "        ('campaign',    'U50'),\n",
    "        ('date',        'datetime64[ms]' ),\n",
    "        ('clean',       np.uint8),\n",
    "        ('clean_tilt',  np.uint8),\n",
    "        ('level',       np.uint8),\n",
    "        ('level_tilt',  np.uint8),\n",
    "        ('Hangle',      'f8'),\n",
    "        ('Vangle',      'f8'),\n",
    "        ('notes',       'U50')\n",
    "    ]\n",
    "    def _parse_name(name):\n",
    "        key=name.lower().strip()\n",
    "        if key in ['date']:\n",
    "            return 'date'\n",
    "        elif key in ['clean','cleanliness','clean(pyr1)','clean1']:\n",
    "            return 'clean'\n",
    "        elif key in ['clean_tilt','clean2','clean(pyr2)']:\n",
    "            return 'clean_tilt'\n",
    "        elif key in ['level','level(pyr1)','level1']:\n",
    "            return 'level'\n",
    "        elif key in ['level_tilt','level(pyr2)','level2']:\n",
    "            return 'level_tilt'\n",
    "        elif key in ['box','station','id','pyrbox','pyranometerbox']:\n",
    "            return 'box'\n",
    "        elif key in ['hangle','azimuth','azi','horizontal_angle']:\n",
    "            return 'Hangle'\n",
    "        elif key in ['vangle','zenith','zen','vertical_angle']:\n",
    "            return 'Vangle'\n",
    "        elif key in ['notes','note','description']:\n",
    "            return 'notes'\n",
    "        elif key in ['serial','serial1','serial_pyr','pyranometerID','pyrID']:\n",
    "            return 'serial_pyr'\n",
    "        elif key in ['serial2','serial_pyr2','serial_pyr_tilt']:\n",
    "            return 'serial_pyr_tilt'\n",
    "        elif key in ['site','location']:\n",
    "            return 'site'\n",
    "        elif key in ['user','author']:\n",
    "            return 'user'\n",
    "        elif key in ['campaign']:\n",
    "            return 'campaign'\n",
    "        elif key == 'index':\n",
    "            return False\n",
    "        else:\n",
    "            return False\n",
    "    def _hstack2(arrays):\n",
    "        return arrays[0].__array_wrap__(np.hstack(arrays))\n",
    "\n",
    "    logbook={}\n",
    "    df=pd.read_excel(lfile,sheet_name=None)#,engine='openpyxl')\n",
    "    for sheet in df.keys():# read all sheets from xls file\n",
    "        sh = df[sheet].dropna(axis=0,how='all',subset=['date']) #remove empty lines\n",
    "        sh = sh.dropna(axis=1,how='all') # remove empty columns\n",
    "        for row in sh.itertuples(index=True,name='Pandas'):\n",
    "            A=np.zeros(1,dtype=dtype_log).view(np.recarray)\n",
    "            for name,value in row._asdict().items():\n",
    "                key=_parse_name(name)\n",
    "                if key:\n",
    "                    if dict(dtype_log)[key]==np.uint8 and np.isnan(value):\n",
    "                        value=9\n",
    "                    A[key]=value\n",
    "            if str(A.box[0]) in logbook.keys():\n",
    "                logbook.update({str(A.box[0]):_hstack2([logbook[str(A.box[0])],A])})\n",
    "            else:\n",
    "                logbook.update({str(A.box[0]):A})\n",
    "    return logbook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:55.891911100Z",
     "start_time": "2024-05-24T08:11:55.802567Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "def parse_legacy_logbook(fn):\n",
    "    df = None\n",
    "    lb = read_logbook(fn)\n",
    "    for box in lb:\n",
    "        lbb = lb[box]\n",
    "        N = lbb['date'].shape[0]\n",
    "        faccept = [1,2,3,4]\n",
    "        dfb = pd.DataFrame(\n",
    "            {\"datestamp\": lbb['date'],\n",
    "             \"Q00\": int(box),\n",
    "             \"Q01\": lbb['notes'],\n",
    "             \"MainQ01[comment]\": np.repeat(\"\",N),\n",
    "             \"MainQ02[comment]\": np.repeat(\"\",N),\n",
    "             \"ExtraQ01[comment]\": np.repeat(\"\",N),\n",
    "             \"ExtraQ02[comment]\": np.repeat(\"\",N),\n",
    "             \"MainQ01\": [f\"AO0{f}\" if f in faccept else \"None\" for f in lbb['clean']],\n",
    "             \"MainQ02\": [f\"AO0{f}\" if f in faccept else \"None\" for f in lbb['level']],\n",
    "             \"ExtraQ01\": [f\"AO0{f}\" if f in faccept else \"None\" for f in lbb['clean_tilt']],\n",
    "             \"ExtraQ02\": [f\"AO0{f}\" if f in faccept else \"None\" for f in lbb['level_tilt']],\n",
    "             }\n",
    "        )\n",
    "        if df is None:\n",
    "            df = dfb.copy()\n",
    "        else:\n",
    "            df = pd.concat((df,dfb),ignore_index=True)\n",
    "    df = df.fillna(\"None\")\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:56.139347600Z",
     "start_time": "2024-05-24T08:11:55.813701600Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'1': rec.array([(1, 'TROPOS-roof', 'S12128.001', 'S12137.049', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '4': rec.array([(4, 'TROPOS-roof', 'S12128.004', 'S12137.001', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'moisture in horizontal pyranometer'),\n",
       "            (4, 'TROPOS-roof', 'S12128.004', 'S12137.001', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T13:00:00.000', 2, 2, 2, 2, 180., 0., 'additional note')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '5': rec.array([(5, 'TROPOS-roof', 'S12128.005', 'S12137.047', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '7': rec.array([(7, 'TROPOS-roof', 'S12128.007', 'S12128.031', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '9': rec.array([(9, 'TROPOS-roof', 'S12128.009', 'S12137.012', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '10': rec.array([(10, 'TROPOS-roof', 'S12128.010', 'S12137.019', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '12': rec.array([(12, 'TROPOS-roof', 'S12128.012', 'S12137.050', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '14': rec.array([(14, 'TROPOS-roof', 'S12128.014', 'S12137.024', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '15': rec.array([(15, 'TROPOS-roof', 'S12128.015', 'S12128.040', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '18': rec.array([(18, 'TROPOS-roof', 'S12128.018', 'S12137.045', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '24': rec.array([(24, 'TROPOS-roof', 'S12128.024', 'S12137.044', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '25': rec.array([(25, 'TROPOS-roof', 'S12128.025', 'S12137.046', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '26': rec.array([(26, 'TROPOS-roof', 'S12128.026', 'S12137.040', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '32': rec.array([(32, 'TROPOS-roof', 'S12128.032', 'S12137.042', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '33': rec.array([(33, 'TROPOS-roof', 'S12128.033', 'S12137.048', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '35': rec.array([(35, 'TROPOS-roof', 'S12128.035', 'S12128.042', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '37': rec.array([(37, 'TROPOS-roof', 'S12128.037', 'S12128.034', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '38': rec.array([(38, 'TROPOS-roof', 'S12128.038', 'S12128.039', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '43': rec.array([(43, 'TROPOS-roof', 'S12128.043', 'S12128.030', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '44': rec.array([(44, 'TROPOS-roof', 'S12128.044', 'S12137.043', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '46': rec.array([(46, 'TROPOS-roof', 'S12128.046', 'S12128.050', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '47': rec.array([(47, 'TROPOS-roof', 'S12128.047', 'S12128.036', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '49': rec.array([(49, 'TROPOS-roof', 'S12128.049', 'S12128.045', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'scratches on horizontal pyranometer')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '53': rec.array([(53, 'TROPOS-roof', 'S12137.003', 'S12137.007', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '54': rec.array([(54, 'TROPOS-roof', 'S12137.004', 'S12128.048', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '55': rec.array([(55, 'TROPOS-roof', 'S12137.005', 'S12137.006', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '58': rec.array([(58, 'TROPOS-roof', 'S12137.008', 'S12137.002', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'scratches on tiltet pyranometer')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '60': rec.array([(60, 'TROPOS-roof', 'S12137.010', 'S12137.009', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '86': rec.array([(86, 'TROPOS-roof', 'S12137.036', 'S12137.038', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')]),\n",
       " '87': rec.array([(87, 'TROPOS-roof', 'S12137.037', 'S12137.041', 'Jonas Witthuhn', 'TROPOS-calibration', '2019-06-17T12:00:00.000', 1, 1, 1, 1, 180., 0., 'nan')],\n",
       "           dtype=[('box', 'u1'), ('site', '<U50'), ('serial_pyr', '<U50'), ('serial_pyr_tilt', '<U50'), ('user', '<U50'), ('campaign', '<U50'), ('date', '<M8[ms]'), ('clean', 'u1'), ('clean_tilt', 'u1'), ('level', 'u1'), ('level_tilt', 'u1'), ('Hangle', '<f8'), ('Vangle', '<f8'), ('notes', '<U50')])}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#|dropout\n",
    "fn_lb = \"../../example_data/legacy_logbook.xls\"\n",
    "\n",
    "lb = read_logbook(fn_lb)\n",
    "lb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:56.149246900Z",
     "start_time": "2024-05-24T08:11:55.893906700Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>datestamp</th>\n",
       "      <th>Q00</th>\n",
       "      <th>Q01</th>\n",
       "      <th>MainQ01[comment]</th>\n",
       "      <th>MainQ02[comment]</th>\n",
       "      <th>ExtraQ01[comment]</th>\n",
       "      <th>ExtraQ02[comment]</th>\n",
       "      <th>MainQ01</th>\n",
       "      <th>MainQ02</th>\n",
       "      <th>ExtraQ01</th>\n",
       "      <th>ExtraQ02</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>1</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>4</td>\n",
       "      <td>moisture in horizontal pyranometer</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2019-06-17 13:00:00</td>\n",
       "      <td>4</td>\n",
       "      <td>additional note</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO02</td>\n",
       "      <td>AO02</td>\n",
       "      <td>AO02</td>\n",
       "      <td>AO02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>5</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>7</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>9</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>10</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>12</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>14</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>15</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>18</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>24</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>25</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>26</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>32</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>33</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>35</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>37</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>38</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>43</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>44</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>46</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>47</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>49</td>\n",
       "      <td>scratches on horizontal pyranometer</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>53</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>54</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>55</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>58</td>\n",
       "      <td>scratches on tiltet pyranometer</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>60</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>86</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2019-06-17 12:00:00</td>\n",
       "      <td>87</td>\n",
       "      <td>nan</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "      <td>AO01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             datestamp  Q00                                  Q01  \\\n",
       "0  2019-06-17 12:00:00    1                                  nan   \n",
       "1  2019-06-17 12:00:00    4   moisture in horizontal pyranometer   \n",
       "2  2019-06-17 13:00:00    4                      additional note   \n",
       "3  2019-06-17 12:00:00    5                                  nan   \n",
       "4  2019-06-17 12:00:00    7                                  nan   \n",
       "5  2019-06-17 12:00:00    9                                  nan   \n",
       "6  2019-06-17 12:00:00   10                                  nan   \n",
       "7  2019-06-17 12:00:00   12                                  nan   \n",
       "8  2019-06-17 12:00:00   14                                  nan   \n",
       "9  2019-06-17 12:00:00   15                                  nan   \n",
       "10 2019-06-17 12:00:00   18                                  nan   \n",
       "11 2019-06-17 12:00:00   24                                  nan   \n",
       "12 2019-06-17 12:00:00   25                                  nan   \n",
       "13 2019-06-17 12:00:00   26                                  nan   \n",
       "14 2019-06-17 12:00:00   32                                  nan   \n",
       "15 2019-06-17 12:00:00   33                                  nan   \n",
       "16 2019-06-17 12:00:00   35                                  nan   \n",
       "17 2019-06-17 12:00:00   37                                  nan   \n",
       "18 2019-06-17 12:00:00   38                                  nan   \n",
       "19 2019-06-17 12:00:00   43                                  nan   \n",
       "20 2019-06-17 12:00:00   44                                  nan   \n",
       "21 2019-06-17 12:00:00   46                                  nan   \n",
       "22 2019-06-17 12:00:00   47                                  nan   \n",
       "23 2019-06-17 12:00:00   49  scratches on horizontal pyranometer   \n",
       "24 2019-06-17 12:00:00   53                                  nan   \n",
       "25 2019-06-17 12:00:00   54                                  nan   \n",
       "26 2019-06-17 12:00:00   55                                  nan   \n",
       "27 2019-06-17 12:00:00   58      scratches on tiltet pyranometer   \n",
       "28 2019-06-17 12:00:00   60                                  nan   \n",
       "29 2019-06-17 12:00:00   86                                  nan   \n",
       "30 2019-06-17 12:00:00   87                                  nan   \n",
       "\n",
       "   MainQ01[comment] MainQ02[comment] ExtraQ01[comment] ExtraQ02[comment]  \\\n",
       "0                                                                          \n",
       "1                                                                          \n",
       "2                                                                          \n",
       "3                                                                          \n",
       "4                                                                          \n",
       "5                                                                          \n",
       "6                                                                          \n",
       "7                                                                          \n",
       "8                                                                          \n",
       "9                                                                          \n",
       "10                                                                         \n",
       "11                                                                         \n",
       "12                                                                         \n",
       "13                                                                         \n",
       "14                                                                         \n",
       "15                                                                         \n",
       "16                                                                         \n",
       "17                                                                         \n",
       "18                                                                         \n",
       "19                                                                         \n",
       "20                                                                         \n",
       "21                                                                         \n",
       "22                                                                         \n",
       "23                                                                         \n",
       "24                                                                         \n",
       "25                                                                         \n",
       "26                                                                         \n",
       "27                                                                         \n",
       "28                                                                         \n",
       "29                                                                         \n",
       "30                                                                         \n",
       "\n",
       "   MainQ01 MainQ02 ExtraQ01 ExtraQ02  \n",
       "0     AO01    AO01     AO01     AO01  \n",
       "1     AO01    AO01     AO01     AO01  \n",
       "2     AO02    AO02     AO02     AO02  \n",
       "3     AO01    AO01     AO01     AO01  \n",
       "4     AO01    AO01     AO01     AO01  \n",
       "5     AO01    AO01     AO01     AO01  \n",
       "6     AO01    AO01     AO01     AO01  \n",
       "7     AO01    AO01     AO01     AO01  \n",
       "8     AO01    AO01     AO01     AO01  \n",
       "9     AO01    AO01     AO01     AO01  \n",
       "10    AO01    AO01     AO01     AO01  \n",
       "11    AO01    AO01     AO01     AO01  \n",
       "12    AO01    AO01     AO01     AO01  \n",
       "13    AO01    AO01     AO01     AO01  \n",
       "14    AO01    AO01     AO01     AO01  \n",
       "15    AO01    AO01     AO01     AO01  \n",
       "16    AO01    AO01     AO01     AO01  \n",
       "17    AO01    AO01     AO01     AO01  \n",
       "18    AO01    AO01     AO01     AO01  \n",
       "19    AO01    AO01     AO01     AO01  \n",
       "20    AO01    AO01     AO01     AO01  \n",
       "21    AO01    AO01     AO01     AO01  \n",
       "22    AO01    AO01     AO01     AO01  \n",
       "23    AO01    AO01     AO01     AO01  \n",
       "24    AO01    AO01     AO01     AO01  \n",
       "25    AO01    AO01     AO01     AO01  \n",
       "26    AO01    AO01     AO01     AO01  \n",
       "27    AO01    AO01     AO01     AO01  \n",
       "28    AO01    AO01     AO01     AO01  \n",
       "29    AO01    AO01     AO01     AO01  \n",
       "30    AO01    AO01     AO01     AO01  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#|dropout\n",
    "parse_legacy_logbook(fn_lb)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Parse Responses to dict\n",
    "Maintenance Flags shall be parsed to a dictionary sorted by the PyrNet box number. The Survey reports are collected over the entire campaign. Therefore, consider only reports within a certain time interval around maintenance time (2 days) for quality flagging the measurement periode.\n",
    "\n",
    "Reports within +-2 days around maintenance time are considered, giving the opportunity for corrections within this time frame by issuing another response (or via insert in the website interface). For example, the first report at 1PM includes the quality marks and some notes. Later, if one want to add notes for this station or correct marks, another report can be filled. The Values will be updated by the parsing function:\n",
    "    * Valid Marks (not None) of the latest report within +-2days\n",
    "    * Notes of multiple reports are attached (separated by \";\") starting with the oldest report notes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:56.150261Z",
     "start_time": "2024-05-24T08:11:55.981907400Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "_pollution_marks = {\n",
    "    \"None\":4,\n",
    "    \"AO01\":0,\n",
    "    \"AO02\":1,\n",
    "    \"AO03\":2,\n",
    "    \"AO04\":3,\n",
    "}\n",
    "_alignment_marks = {\n",
    "    \"None\":4,\n",
    "    \"AO01\":0,\n",
    "    \"AO02\":1,\n",
    "    \"AO03\":2,\n",
    "}\n",
    "_note_keys = {\n",
    "    \"note_general\": \"Q01\",\n",
    "    \"note_align\": \"MainQ01[comment]\",\n",
    "    \"note_clean\": \"MainQ02[comment]\",\n",
    "    \"note_align2\": \"ExtraQ01[comment]\",\n",
    "    \"note_clean2\": \"ExtraQ02[comment]\",\n",
    "}\n",
    "_mark_keys = {\n",
    "    \"clean\": \"MainQ01\",\n",
    "    \"align\": \"MainQ02\",\n",
    "    \"clean2\": \"ExtraQ01\",\n",
    "    \"align2\": \"ExtraQ02\",\n",
    "}\n",
    "\n",
    "def parse_report(\n",
    "        df:  pd.DataFrame,\n",
    "        date_of_maintenance: float | dt.datetime | np.datetime64 | None,\n",
    "        stations: list | int | None = None\n",
    ") -> dict:\n",
    "    \"\"\"\n",
    "    Use pandas.read_csv (sep=;) to parse the survey report.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    df: Dataframe\n",
    "        LimeSurvey response parsed as pandas Dataframe.\n",
    "    date_of_maintenance: float, datetime, datetime64 or None\n",
    "        A rough date of maintenance (at least day resolution).\n",
    "        If float, interpreted as Julian day from 2000-01-01T12:00.\n",
    "        If None, the most recent logbook entries will be parsed.\n",
    "    stations: list, int or None\n",
    "        Selection of station (box) numbers to parse the report for.\n",
    "        If None, parse all available stations. The default is None.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    dict\n",
    "        Dictionary storing maintenance flags and notes by PyrNet box number.\n",
    "    \"\"\"\n",
    "    if date_of_maintenance is not None:\n",
    "        date_of_maintenance = utils.to_datetime64(date_of_maintenance)\n",
    "\n",
    "    # Dataframe polishing\n",
    "    # drop only where station info is None\n",
    "    df = df.fillna(\"None\")\n",
    "    df = df.mask(df[\"Q00\"].eq(\"None\")).dropna()\n",
    "    df = df.reset_index()\n",
    "    \n",
    "    # Iterable station selection\n",
    "    if stations is None:\n",
    "        stations = np.unique(df['Q00'].values)\n",
    "    if isinstance(stations, str):\n",
    "        stations = [int(stations)]\n",
    "    try:\n",
    "        iter(stations)\n",
    "    except:\n",
    "        stations = [stations]\n",
    "    \n",
    "    # Find reports to consider per station\n",
    "    idxs = []\n",
    "    for station in stations:\n",
    "        dfq = df.query(f\"Q00=={station}\")\n",
    "        # find next report within -1 to 10 days\n",
    "        report_dates = dfq[\"datestamp\"].values.astype(\"datetime64[ns]\")\n",
    "        dtime = report_dates - date_of_maintenance\n",
    "        mask = dtime < np.timedelta64(10,'D')\n",
    "        mask *= dtime > np.timedelta64(-1,'h')\n",
    "        if np.all(~mask): # no reports within time interval\n",
    "            continue\n",
    "        idx = np.argwhere(mask).ravel()[0]\n",
    "        next_report_date = report_dates[idx]\n",
    "        \n",
    "        # find reports around 2 days of next report for merging\n",
    "        dtime = report_dates - next_report_date\n",
    "        mask = dtime < np.timedelta64(2,'D')\n",
    "        mask *= dtime >= np.timedelta64(0,'D') # include \"next_report_date\"\n",
    "        idx = np.argwhere(mask).ravel()\n",
    "        idxs += list(dfq.index[idx])\n",
    "\n",
    "    results = {}\n",
    "    for i in idxs:\n",
    "        if df[\"Q00\"].values[i] == \"None\":\n",
    "            continue\n",
    "        box = int(df[\"Q00\"].values[i])\n",
    "        key = f\"{box:03d}\"\n",
    "        mdate = pd.to_datetime(df['datestamp'][i])\n",
    "\n",
    "        # store report in dictionary\n",
    "        if key not in results:\n",
    "            # initialize marks\n",
    "            for mkey in _mark_keys:\n",
    "                results = assoc_in(results, [key,mkey], 4)\n",
    "            # initialize notes\n",
    "            for nkey in _note_keys:\n",
    "                results = assoc_in(results, [key,nkey], \"\")\n",
    "            # initialize maintenancetime\n",
    "            results = assoc_in(results, [key,\"maintenancetime\"], mdate)\n",
    "\n",
    "        # merge notes if multiple reports exist\n",
    "        for nkey in _note_keys:\n",
    "            new_note = df[_note_keys[nkey]].values[i]\n",
    "            if new_note==\"None\":\n",
    "                continue\n",
    "            # update_note = (results[key][nkey]+'; '+new_note).strip('; ')\n",
    "            results = assoc_in(results, [key,nkey], new_note) # update_note)\n",
    "\n",
    "        # update marks with most recent report if not None\n",
    "        for mkey in _mark_keys:\n",
    "            new_mark = df[_mark_keys[mkey]][i]\n",
    "            if new_mark==\"None\":\n",
    "                continue\n",
    "            if mkey.startswith(\"clean\"):\n",
    "                new_mark = _pollution_marks[new_mark]\n",
    "            else:\n",
    "                new_mark = _alignment_marks[new_mark]\n",
    "            results = assoc_in(results, [key,mkey], new_mark)\n",
    "            # update associated maintenance date\n",
    "            results = assoc_in(results, [key,\"maintenancetime\"], mdate)\n",
    "        \n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:56.239233500Z",
     "start_time": "2024-05-24T08:11:55.992646200Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'001': {'clean': 1,\n",
       "  'align': 2,\n",
       "  'clean2': 3,\n",
       "  'align2': 1,\n",
       "  'note_general': '222',\n",
       "  'note_align': 'test',\n",
       "  'note_clean': 'testing notes',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2023-05-08 16:09:06')},\n",
       " '002': {'clean': 2,\n",
       "  'align': 4,\n",
       "  'clean2': 4,\n",
       "  'align2': 4,\n",
       "  'note_general': '222',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2023-05-08 16:08:20')}}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#|dropout\n",
    "# Parse LimeSurvey report\n",
    "fn = \"../../example_data/results-survey224783.csv\"\n",
    "#fn = \"../../testnb/results-survey224783.csv\"\n",
    "\n",
    "df = pd.read_csv(fn, sep=';')\n",
    "\n",
    "parse_report(df, np.datetime64(\"2023-05-08T01:58\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:56.389288500Z",
     "start_time": "2024-05-24T08:11:56.037737300Z"
    },
    "collapsed": false,
    "tags": [
     "hide-output"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'001': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '004': {'clean': 1,\n",
       "  'align': 1,\n",
       "  'clean2': 1,\n",
       "  'align2': 1,\n",
       "  'note_general': 'additional note',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 13:00:00')},\n",
       " '005': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '007': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '009': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '010': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '012': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '014': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '015': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '018': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '024': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '025': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '026': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '032': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '033': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '035': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '037': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '038': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '043': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '044': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '046': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '047': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '049': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'scratches on horizontal pyranometer',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '053': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '054': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '055': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '058': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'scratches on tiltet pyranometer',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '060': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '086': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')},\n",
       " '087': {'clean': 0,\n",
       "  'align': 0,\n",
       "  'clean2': 0,\n",
       "  'align2': 0,\n",
       "  'note_general': 'nan',\n",
       "  'note_align': '',\n",
       "  'note_clean': '',\n",
       "  'note_align2': '',\n",
       "  'note_clean2': '',\n",
       "  'maintenancetime': Timestamp('2019-06-17 12:00:00')}}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#|dropout\n",
    "fn_lb = \"../../example_data/legacy_logbook.xls\"\n",
    "# Parse legacy xls notebook\n",
    "parse_report(parse_legacy_logbook(fn_lb), np.datetime64(\"2019-06-17T12:00\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Make aggregated quality flags\n",
    "Aggregate quality marks to a binary number according to CF-Convention section 3.5 for quality flags.\n",
    "\n",
    "QC flag binary representation, bits: XXYY  with:\n",
    "* XX - level - [00,01,10] - good, slight out of level, bad out of level\n",
    "* YY - clean - [00,01,10,11] good, slight-, moderate-, strong covered\n",
    "```\n",
    "flag_mask = '3b,3b,3b, 12b, 12b'\n",
    "flag_values = '1b, 2b, 3b, 4b, 8b '\n",
    "flag_meanings = \"\n",
    "    soiling_light\n",
    "    soiling_moderate\n",
    "    soiling_heavy\n",
    "    level_problematic\n",
    "    level_bad\"\n",
    "```\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:56.389288500Z",
     "start_time": "2024-05-24T08:11:56.196029Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#|export\n",
    "def get_qcflag(qc_clean, qc_level):\n",
    "    \"\"\"\n",
    "    Aggregate quality flags.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    qc_clean: int\n",
    "        [0,1,2,3] [clean, slight-, moderate-, strong covered]\n",
    "    qc_level: int\n",
    "        [0,1,2] [good, slight misalignment, strong misalignment]\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    int\n",
    "        aggregated quality flagg [0-11]\n",
    "    \"\"\"\n",
    "    qc = (qc_level<<2) + qc_clean\n",
    "    return qc\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-05-24T08:11:57.713919700Z",
     "start_time": "2024-05-24T08:11:56.208840800Z"
    },
    "collapsed": false,
    "tags": [
     "remove-cell",
     "hide-input",
     "hide-output"
    ]
   },
   "outputs": [],
   "source": [
    "#|hide\n",
    "# Export module\n",
    "# Requires *nbdev* to export and update the *reports.py* module\n",
    "import nbdev.export\n",
    "import nbformat as nbf\n",
    "name = \"reports\"\n",
    "\n",
    "# Export python module\n",
    "nbdev.export.nb_export( f\"{name}.ipynb\" ,f\"../../src/pyrnet\")\n",
    "\n",
    "# Export to docs\n",
    "ntbk = nbf.read(f\"{name}.ipynb\", nbf.NO_CONVERT)\n",
    "\n",
    "text_search_dict = {\n",
    "    \"#|hide\": \"remove-cell\",  # Remove the whole cell\n",
    "    \"#|dropcode\": \"hide-input\",  # Hide the input w/ a button to show\n",
    "    \"#|dropout\": \"hide-output\"  # Hide the output w/ a button to show\n",
    "}\n",
    "for cell in ntbk.cells:\n",
    "    cell_tags = cell.get('metadata', {}).get('tags', [])\n",
    "    for key, val in text_search_dict.items():\n",
    "            if key in cell['source']:\n",
    "                if val not in cell_tags:\n",
    "                    cell_tags.append(val)\n",
    "    if len(cell_tags) > 0:\n",
    "        cell['metadata']['tags'] = cell_tags\n",
    "    nbf.write(ntbk, f\"../../docs/source/nbs/{name}.ipynb\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}