{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "# Parse Logger Files\n", "Parse raw logger data and utility to handle raw data." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "is_executing": true, "tags": [ "remove-cell" ] }, "outputs": [], "source": [ "#|hide\n", "#|default_exp logger" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:23.382489600Z", "start_time": "2023-08-31T12:54:15.661534300Z" }, "collapsed": false }, "outputs": [], "source": [ "#|export\n", "from numpy.typing import NDArray,ArrayLike\n", "import re\n", "import gzip\n", "import numpy as np\n", "import pandas as pd\n", "from scipy.stats import linregress\n", "import logging\n", "\n", "from pyrnet import utils\n", "\n", "# logging setup\n", "logging.basicConfig(\n", " filename='pyrnet.log',\n", " encoding='utf-8',\n", " level=logging.DEBUG,\n", " format='%(asctime)s %(name)s %(levelname)s:%(message)s'\n", ")\n", "logger = logging.getLogger(__name__)\n", "\n", "\n", "# Local variables\n", "_nat = np.datetime64('NaT','ms')\n", "_re_gprmc = re.compile('^(\\d+,\\d+ \\d) \\$GPRMC,(.*)(\\*\\w{2})$')\n", "_re_adc = re.compile('^(\\d+)(\\s\\d+)+$')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:24.858973200Z", "start_time": "2023-08-31T12:54:23.394459500Z" }, "collapsed": false }, "outputs": [], "source": [ "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Operations on the raw logger file\n", "Open and read the binary files from the logger. These files have a header of 8 lines.\n", "\n", "If the Logger receives power, it immediately starts to write data to the file. The first voltage measurements are good, but the GPS signal needs some time to establish.\n", "\n", "If Logger writes data item per item, so if turned off, the file ends in an incomplete line most of the time. Sometimes, the last information is a GPS line. These events have to be handled.\n", "\n", "### Reading the raw logger file:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:24.888059800Z", "start_time": "2023-08-31T12:54:24.869559700Z" }, "collapsed": false, "tags": [ "hide-output" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "=====Header and Start of Data=====:\n", "# - TROPOS - Pyranometer Network BOX 9\n", "#FIRMWARE: Logomatic Kwan v1.1 (modified Witthuhn 201803) Aug 23 2018 10:43:35\n", "# MAM setup: MAMCR=2, MAMTIM=4\n", "# SD Card Setup: result=3, MID=3, OID='SD', PNM='SU02G', PRV=128, PSN=321248437\n", "# IDs: TROPOS_ID: A201300109 ; BOX: 9 ; WTS: 9\n", "# Pyr9 - serial:S12128.009 ; Pyr62 - serial:S12137.012\n", "# Calibration: Pyr9: 7.460000 (201504) ; Pyr62: 7.590000 (201504)\n", "# time [ms] , [counts]...\n", "891 836 322 484 194 1000 180\n", "991 836 322 484 194 998 180\n", "91 836 322 484 194 999 180\n", "20800108,112102 0 $GPRMC,112102.067,V,,,,,0.00,0.00,080180,,,N*4C\n", "191 836 322 485 194 1000 180\n", "291 837 324 485 194 998 181\n", "391 836 324 485 194 998 180\n", "\n", "...\n", "\n", "==========End of Data=============:\n", "791 834 322 488 194 1000 180\n", "891 835 323 488 194 998 180\n", "991 834 322 486 194 998 180\n", "91 835 322 486 194 997 180\n", "191 838 322 486 194 1000 180\n", "20030114,112109 0 $GPRMC,112109.000,A,5123.4125,N,01153.1148,E,0.21,0.00,140103,,,A*6D\n", "291 836 322 487 194 1002 180\n", "391 837 323 488 194 1002 181\n", "491 838 324 488 194 1000 180\n", "591 838 324\n" ] } ], "source": [ "#|dropout\n", "fname = \"../../example_data/Pyr9_000.bin\"\n", "if fname[-3:]=='.gz':\n", " f = gzip.open(fname,'rt',errors='ignore') # open in text mode, ignore non UTF8 characters\n", "else:\n", " f = open(fname,'r',errors='ignore')\n", "lines =[l.rstrip() for l in f.readlines()]\n", "f.close()\n", "\n", "print(\"=====Header and Start of Data=====:\")\n", "print(*lines[:15], sep='\\n')\n", "print('\\n...\\n')\n", "print(\"==========End of Data=============:\")\n", "print(*lines[-10:], sep='\\n')" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "### Cleanup\n", "* Skip almost empty files (data lines <20)\n", "* remove the last line as it is probably incomplete\n", "* check again if now a GPS line is the last line and remove it if so" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:25.012212500Z", "start_time": "2023-08-31T12:54:24.891022Z" }, "collapsed": false }, "outputs": [], "source": [ "# remove last line -> mostly damaged or empty\n", "lines=lines[:-1]\n", "# remove gps line at the end -> else processing issues\n", "if _re_gprmc.match(lines[-1]):\n", " lines=lines[:-1]" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "### Divide GPS and ADC records\n", "GPS and ADC lines have to be parsed separately" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:25.049520300Z", "start_time": "2023-08-31T12:54:24.914375700Z" }, "collapsed": false, "tags": [ "hide-output" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPS lines:\n", "20800108,112102 0 $GPRMC,112102.067,V,,,,,0.00,0.00,080180,,,N*4C\n", "20800108,112103 0 $GPRMC,112103.067,V,,,,,0.00,0.00,080180,,,N*4D\n", "20030114,112104 0 $GPRMC,112104.065,A,5123.4127,N,01153.1153,E,0.06,0.00,140103,,,A*6E\n", "20030114,112105 0 $GPRMC,112105.065,A,5123.4127,N,01153.1154,E,0.05,0.00,140103,,,A*6B\n", "20030114,112107 0 $GPRMC,112107.000,A,5123.4126,N,01153.1151,E,0.14,0.00,140103,,,A*6E\n", "20030114,112108 0 $GPRMC,112108.000,A,5123.4126,N,01153.1150,E,0.20,0.00,140103,,,A*67\n", "20030114,112109 0 $GPRMC,112109.000,A,5123.4125,N,01153.1148,E,0.21,0.00,140103,,,A*6D\n", "\n", "ADC lines:\n", "891 836 322 484 194 1000 180\n", "991 836 322 484 194 998 180\n", "91 836 322 484 194 999 180\n", "191 836 322 485 194 1000 180\n", "291 837 324 485 194 998 181\n", "391 836 324 485 194 998 180\n", "491 836 322 484 194 1000 181\n", "591 836 322 484 194 999 180\n", "691 837 322 484 194 998 180\n", "791 829 322 484 194 1000 180\n", "...\n" ] } ], "source": [ "#|dropout\n", "gpslines = []\n", "adclines = []\n", "for i,l in enumerate(lines):\n", " m = _re_gprmc.match(l)\n", " if m:\n", " gpslines.append(l)\n", " # -> send to parse_gps function\n", " elif _re_adc.match(l):\n", " adclines.append(l)\n", " # -> send to parse_adc function\n", " else:\n", " # unhandled record...\n", " pass\n", "\n", "print(\"GPS lines:\")\n", "print(*gpslines, sep=\"\\n\")\n", "print()\n", "print(\"ADC lines:\")\n", "print(*adclines[:10], sep=\"\\n\")\n", "print(\"...\")" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "### Parsing GPS\n", "Workflow:\n", "1. Look if GPS is available (Status:*A*) if not return nan\n", "2. Read lat,lon and time information\n", "3. Account for GPS rollover\n", "\n", "The GPS chip in use has no automatic patch to account for GPS week integer rollover, which happend at 2019-04-06. Measurements taken after this date, have to add 1024 weeks to the date the GPS provides. Therefore this parsing function requires the measurement date (to process older campaigns)." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:25.051522300Z", "start_time": "2023-08-31T12:54:24.940817300Z" }, "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def parse_gprmc(s, date_of_measure=np.datetime64('now')):\n", " '''\n", " Parse a string with a GPRMC GPS record\n", "\n", " Parameters\n", " ----------\n", " s: string\n", " A GPRMC record\n", " date_of_measure: datetime or datetime64\n", " A rough time, when the measurements happen to account for GPS rollover.\n", " Precise datetime is only necessary for the period of 2019-05-06 to 2019-08-17.\n", " Otherwise, providing a year is sufficient.\n", " The default is np.datetime64(\"now\"), which is sufficient for all measurements conducted later than 2019-08-17.\n", "\n", " Returns\n", " -------\n", " gprmc : tuple\n", " A tuple with datetime64,status,lat,lon\n", " '''\n", " date_of_measure = utils.to_datetime64(date_of_measure)\n", " # split fields of GPRMC record\n", " f = s.split(',')\n", " status = f[1]\n", " if status=='A':\n", " try:\n", " # parse latitude\n", " lat = float(f[2][:2])+float(f[2][2:])/60\n", " if f[3]=='S':\n", " lat *= -1.0\n", " # parse longitude\n", " lon = float(f[4][:3])+float(f[4][3:])/60\n", " if f[5]=='W':\n", " lon *= -1.0\n", " if date_of_measure>np.datetime64(\"2019-04-06\") and date_of_measurenp.datetime64(\"2019-04-06\"): #account for gps week rollover -> date jump 1024 weeks back at 2019-04-06\n", " dt= dt+np.timedelta64(1024,'W')\n", " r = (dt,status,lat,lon)\n", " except:\n", " return (_nat,'V',np.nan,np.nan)\n", " else:\n", " r = (_nat,'V',np.nan,np.nan)\n", " return r" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "### Parse ADC\n", "The ADC output are simple tab separated voltage values of the 5 analog input pins of the Logger.\n", "#### ADC (3.3V, 10bit) Analog In Pins:\n", "* [0] Internal Battery measure\n", "* [1] Temperature sensor (0-5V, measured with voltage split)\n", "* [2] Humidity sensor (0-5V, measured with voltage split)\n", "* [3] Pyranometer main (0 -$\\mathrm{gain}*\\mathrm{F}*\\mathrm{C}$ V)\n", "* [4] Battery (0-6V, measured with voltage split (same resistors))\n", "* [5] None\n", "* [6] Pyranometer secondary platform (0 - $\\mathrm{gain}*\\mathrm{F}*\\mathrm{C}$ V)\n", "* [7] None\n", "* [8] None\n", "\n", "with $\\mathrm{gain}=300$, surface irradiance $\\mathrm{F}$ of 0-1500 Wm-2, and calibration factor $\\mathrm{C} < 8e-6 V\\,Wm-2$, the voltage range of the pyranometer sensors spans the full available range (0-3.3V) depending on the calibration factor.\n", "\n", "> *None* column won't appear in raw output file\n", "\n", "> The first column is always milliseconds of processor runtime." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:25.051522300Z", "start_time": "2023-08-31T12:54:24.957713300Z" }, "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def parse_adc(s):\n", " '''\n", " Parse an ADC record\n", "\n", " Parameters\n", " ----------\n", " s: string\n", " The ADC record\n", "\n", " Returns\n", " -------\n", " t: tuple\n", " A tuple of digital counts of the ADC\n", " '''\n", " return tuple(map(int,s.split()))" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "### Putt all together\n", "This function reads the logger file, separates GPS and ADC records, wich will be parsed by the functions introduced above. Then the data is collected" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:25.052970Z", "start_time": "2023-08-31T12:54:24.969512600Z" }, "collapsed": false }, "outputs": [], "source": [ "#|export\n", "dtype_gprmc = [\n", " ( 'time', 'datetime64[ms]' ),\n", " ( 'status', 'S1' ),\n", " ( 'lat', 'f8' ),\n", " ( 'lon', 'f8' ),\n", " ( 'iadc', 'u4' )\n", "]\n", "\n", "def read_records(fname: str,\n", " date_of_measure: np.datetime64 = np.datetime64('now')) -> (NDArray, NDArray):\n", " '''\n", " Read the GPRMC and ADC records from the pyranometer logger files\n", "\n", " Parameters\n", " ----------\n", " fname: string\n", " The filename of the logger file\n", " date_of_measure: numpy.datetime64\n", " Date of measurement to account for gps rollover\n", "\n", " Returns\n", " -------\n", " rec_adc: ndarray\n", " The 10bit ADC readings\n", " rec_gprmc: recarray\n", " The GPRMC GPS records\n", " '''\n", " logger.info(f\"Start reading records from file: {fname}\")\n", " date_of_measure = utils.to_datetime64(date_of_measure)\n", " # Read file, use errors='ignore' to skip non UTF-8 characters\n", " # non UTF-8 characters may arise in broken GPS strings from time to time\n", " if fname[-3:]=='.gz':\n", " f = gzip.open(fname,'rt',errors='ignore') # open in text mode, ignore non UTF8 characters\n", " else:\n", " f = open(fname,'r',errors='ignore')\n", " lines =[l.rstrip() for l in f.readlines()]\n", " f.close()\n", "\n", " ##- skip almost empty files\n", " if len(lines)<20:\n", " logger.info(\"Skip file, as number of records is < 20.\")\n", " return False,False\n", "\n", " # remove last line -> mostly damaged or empty\n", " lines=lines[:-1]\n", " # remove gps line at the end -> else processing issues\n", " if _re_gprmc.match(lines[-1]):\n", " lines=lines[:-1]\n", "\n", " rec_gprmc = []\n", " rec_adc = []\n", " iadc = 0\n", " for i,l in enumerate(lines):\n", " m = _re_gprmc.match(l)\n", " if m:\n", " r = parse_gprmc(m.group(2), date_of_measure)\n", " if not np.isnat(r[0]):\n", " # add number of adc values before GPS line\n", " rec_gprmc.append(r+(iadc,))\n", " elif _re_adc.match(l):\n", " r = parse_adc(l)\n", " if iadc==0:\n", " adc_len=len(r)\n", " # if record line is incomplete (due to power cut off)\n", " # the line is dropped\n", " if len(r)==adc_len:\n", " rec_adc.append(r)\n", " iadc += 1\n", " else:\n", " # unhandled record...\n", " pass\n", " rec_adc = np.array(rec_adc,dtype=np.uint16)\n", " rec_gprmc = np.array(rec_gprmc,dtype=dtype_gprmc).view(np.recarray)\n", " logger.info(\"Done reading records from raw file.\")\n", " return rec_adc, rec_gprmc" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:25.066867400Z", "start_time": "2023-08-31T12:54:25.012212500Z" }, "collapsed": false }, "outputs": [], "source": [ "rec_adc, rec_gprmc = read_records(fname)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:25.200822200Z", "start_time": "2023-08-31T12:54:25.031273100Z" }, "collapsed": false, "tags": [ "hide-output" ] }, "outputs": [ { "data": { "text/plain": [ "rec.array([('2022-08-30T11:21:04.065', b'A', 51.39021167, 11.885255 , 26),\n", " ('2022-08-30T11:21:05.065', b'A', 51.39021167, 11.88525667, 35),\n", " ('2022-08-30T11:21:07.000', b'A', 51.39021 , 11.88525167, 54),\n", " ('2022-08-30T11:21:08.000', b'A', 51.39021 , 11.88525 , 64),\n", " ('2022-08-30T11:21:09.000', b'A', 51.39020833, 11.88524667, 74)],\n", " dtype=[('time', '" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# get millisecond part\n", "ta = get_adc_time(rec_adc)\n", "### 2. Get logger clock for GPRMC records\n", "# get time of previous ADC record\n", "t1 = ta[rec_gprmc.iadc]/np.timedelta64(1,'ms')\n", "# time of GPS records from GPRMC record\n", "t2 = (rec_gprmc.time-rec_gprmc.time[0])/np.timedelta64(1,'ms')\n", "a,b = linregress(t1,t2)[:2]\n", "t = rec_gprmc.time[0]+ta*a+b.astype('timedelta64[ms]')\n", "\n", "fig,ax = plt.subplots(1,1)\n", "_ = ax.plot(ta/np.timedelta64(1,'ms'),\n", " a*ta/np.timedelta64(1,'ms')+b,\n", " 'r.', label='ADC records')\n", "_ = ax.plot(t1,t2,'k.', label='GPS records')\n", "_ = ax.grid(True)\n", "_ = ax.legend()\n", "_ = ax.set_ylabel(\"GPS milliseconds from first GPS record\")\n", "_ = ax.set_xlabel(\"ADC milliseconds from first GPS record\")\n", "fig.show()\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.076441400Z", "start_time": "2023-08-31T12:54:25.898233400Z" }, "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def sync_adc_time(adctime, gpstime, iadc, check_results=True):\n", " '''\n", " Synchronize the ADC time to the GPS records\n", "\n", " Parameters\n", " ----------\n", " adctime: ndarray\n", " Milliseconds from start of the ADC measurement.\n", " gpstime: ndarray\n", " GPS time\n", " iadc: ndarray of int\n", " Index of the last ADC sample before a GPS record has been stored.\n", " check_results: bool\n", " If True, check plausibility of fitted slope and offset (abs(slope)<10s/day; abs(offset)<2s).\n", " The default is True\n", "\n", " Returns\n", " -------\n", " time: ndarray(datetime64[ms])\n", " The time of the ADC records\n", " '''\n", " # assure milliseconds\n", " ta = adctime.astype('timedelta64[ms]')\n", " # assure int type\n", " iadc = np.array(iadc).astype(int)\n", " # get GPStime of the previous ADC record\n", " # here we assume, that the last ADC sample stored before a GPS record\n", " # has the same time as the GPS timestamp\n", " t1 = ta[iadc]/np.timedelta64(1,'ms')\n", " # time of GPS records from GPRMC record\n", " t2 = (gpstime-gpstime[0])/np.timedelta64(1,'ms')\n", " a,b = linregress(t1,t2)[:2]\n", " t = gpstime[0]+ta*a+b.astype('timedelta64[ms]')\n", "\n", " drift = (1/a-1)*86400\n", " logger.info('Sync ADC time to GPS Fit Summary:')\n", " logger.info('|-- Drift : {0:7.2f} [s/day]'.format(drift))\n", " logger.info('|-- Slope : {0:13.8f}'.format(a))\n", " logger.info('|-- Offset : {0:7.2f} [s]'.format(b/1000))\n", " logger.info('|-- Jitter : {0:7.2f} [ms]'.format(np.std(t2-(a*t1+b))))\n", " \n", " if check_results:\n", " if np.abs(drift)>10:\n", " logger.warning(\"Absolute ADC drift larger than 10 s/day.\")\n", " return None\n", " return t" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.078646800Z", "start_time": "2023-08-31T12:54:25.928316200Z" }, "collapsed": false, "tags": [ "hide-output" ] }, "outputs": [ { "data": { "text/plain": [ "array(['2022-08-30T11:21:01.443', '2022-08-30T11:21:01.545',\n", " '2022-08-30T11:21:01.647', '2022-08-30T11:21:01.750',\n", " '2022-08-30T11:21:01.852', '2022-08-30T11:21:01.955',\n", " '2022-08-30T11:21:02.057', '2022-08-30T11:21:02.160',\n", " '2022-08-30T11:21:02.262', '2022-08-30T11:21:02.364',\n", " '2022-08-30T11:21:02.467', '2022-08-30T11:21:02.569',\n", " '2022-08-30T11:21:02.672', '2022-08-30T11:21:02.774',\n", " '2022-08-30T11:21:02.877', '2022-08-30T11:21:02.979',\n", " '2022-08-30T11:21:03.081', '2022-08-30T11:21:03.184',\n", " '2022-08-30T11:21:03.286', '2022-08-30T11:21:03.389',\n", " '2022-08-30T11:21:03.491', '2022-08-30T11:21:03.594',\n", " '2022-08-30T11:21:03.696', '2022-08-30T11:21:03.799',\n", " '2022-08-30T11:21:03.901', '2022-08-30T11:21:04.003',\n", " '2022-08-30T11:21:04.106', '2022-08-30T11:21:04.208',\n", " '2022-08-30T11:21:04.311', '2022-08-30T11:21:04.413',\n", " '2022-08-30T11:21:04.516', '2022-08-30T11:21:04.618',\n", " '2022-08-30T11:21:04.720', '2022-08-30T11:21:04.823',\n", " '2022-08-30T11:21:04.925', '2022-08-30T11:21:05.028',\n", " '2022-08-30T11:21:05.130', '2022-08-30T11:21:05.233',\n", " '2022-08-30T11:21:05.335', '2022-08-30T11:21:05.438',\n", " '2022-08-30T11:21:05.540', '2022-08-30T11:21:05.642',\n", " '2022-08-30T11:21:05.745', '2022-08-30T11:21:05.847',\n", " '2022-08-30T11:21:05.950', '2022-08-30T11:21:06.052',\n", " '2022-08-30T11:21:06.155', '2022-08-30T11:21:06.257',\n", " '2022-08-30T11:21:06.359', '2022-08-30T11:21:06.462',\n", " '2022-08-30T11:21:06.564', '2022-08-30T11:21:06.667',\n", " '2022-08-30T11:21:06.769', '2022-08-30T11:21:06.872',\n", " '2022-08-30T11:21:06.974', '2022-08-30T11:21:07.077',\n", " '2022-08-30T11:21:07.179', '2022-08-30T11:21:07.281',\n", " '2022-08-30T11:21:07.384', '2022-08-30T11:21:07.486',\n", " '2022-08-30T11:21:07.589', '2022-08-30T11:21:07.691',\n", " '2022-08-30T11:21:07.794', '2022-08-30T11:21:07.896',\n", " '2022-08-30T11:21:07.998', '2022-08-30T11:21:08.101',\n", " '2022-08-30T11:21:08.203', '2022-08-30T11:21:08.306',\n", " '2022-08-30T11:21:08.408', '2022-08-30T11:21:08.511',\n", " '2022-08-30T11:21:08.613', '2022-08-30T11:21:08.715',\n", " '2022-08-30T11:21:08.818', '2022-08-30T11:21:08.920',\n", " '2022-08-30T11:21:09.023', '2022-08-30T11:21:09.125',\n", " '2022-08-30T11:21:09.228'], dtype='datetime64[ms]')" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#|dropout\n", "adctime = get_adc_time(rec_adc)\n", "time = sync_adc_time(\n", " adctime=adctime,\n", " gpstime=rec_gprmc.time,\n", " iadc=rec_gprmc.iadc,\n", " check_results=False\n", ")\n", "time" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Binning of ADC values\n", "The original time resolution is about 10Hz. Here we apply binning of ADC samples to a regular spaced time grid.\n", "\n", "We declare *bins* as the desired number of bins per day, e.g. ```bins = 86400``` one bin every second." ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.117219300Z", "start_time": "2023-08-31T12:54:25.968026700Z" }, "collapsed": false }, "outputs": [], "source": [ "# Desired bins per day\n", "bins = 86400" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Now we can calculate the bin number each sample is assigned to." ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.118487800Z", "start_time": "2023-08-31T12:54:26.002083900Z" }, "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([40861, 40861, 40861, 40861, 40861, 40861, 40862, 40862, 40862,\n", " 40862, 40862, 40862, 40862, 40862, 40862, 40862, 40863, 40863,\n", " 40863, 40863, 40863, 40863, 40863, 40863, 40863, 40864, 40864,\n", " 40864, 40864, 40864, 40864, 40864, 40864, 40864, 40864, 40865,\n", " 40865, 40865, 40865, 40865, 40865, 40865, 40865, 40865, 40865,\n", " 40866, 40866, 40866, 40866, 40866, 40866, 40866, 40866, 40866,\n", " 40866, 40867, 40867, 40867, 40867, 40867, 40867, 40867, 40867,\n", " 40867, 40867, 40868, 40868, 40868, 40868, 40868, 40868, 40868,\n", " 40868, 40868, 40869, 40869, 40869])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# starting day\n", "t0 = time[0].astype('datetime64[D]')\n", "# convert time to 'days from t0'\n", "dday = (time-t0)/np.timedelta64(1,'D')\n", "# calculate the bin number assigned to every sample\n", "it = np.int64(dday*bins)\n", "it" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Calculate number of samples per bins (*cnt*) and a bin index starting with the first bin (*inv_idx*)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.119495700Z", "start_time": "2023-08-31T12:54:26.050743200Z" }, "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(array([40861, 40862, 40863, 40864, 40865, 40866, 40867, 40868, 40869]),\n", " array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,\n", " 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,\n", " 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8]),\n", " array([ 6, 10, 9, 10, 10, 10, 10, 9, 3]))" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "uval,inv_idx,cnt = np.unique(it,return_inverse=True,return_counts=True)\n", "uval,inv_idx,cnt" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Now [np.bincount](https://numpy.org/doc/stable/reference/generated/numpy.bincount.html) can be used to calculate the average value of the samples in every bin." ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.151571Z", "start_time": "2023-08-31T12:54:26.074518600Z" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "rec_adc values of the fist bin, 3rd column\n", "[322 322 322 322 324 324]\n", "Average calculated with bincount:\n", "322.6666666666667\n" ] } ], "source": [ "binvalues = np.bincount(inv_idx,weights=rec_adc[:,2])/cnt\n", "print('rec_adc values of the fist bin, 3rd column')\n", "print(rec_adc[inv_idx==0,2])\n", "print(\"Average calculated with bincount:\")\n", "print(binvalues[0])" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "Time values for each bin are assigned as the starting boundary of the sample times within the bin." ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.172619700Z", "start_time": "2023-08-31T12:54:26.094134200Z" }, "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sample time for the fist bin:\n", "['2022-08-30T11:21:01.443' '2022-08-30T11:21:01.545'\n", " '2022-08-30T11:21:01.647' '2022-08-30T11:21:01.750'\n", " '2022-08-30T11:21:01.852' '2022-08-30T11:21:01.955']\n", "Assigned bin time:\n", "2022-08-30T11:21:01.000\n" ] } ], "source": [ "# assign bin time\n", "bintime = t0+ np.timedelta64(86400000,'ms')*uval.astype(np.float64)/bins\n", "print('sample time for the fist bin:')\n", "print(time[inv_idx==0])\n", "print(\"Assigned bin time:\")\n", "print(bintime[0])" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.394789500Z", "start_time": "2023-08-31T12:54:26.113015800Z" }, "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def adc_binning(rec_adc, time, bins=86400):\n", " \"\"\"\n", " Binning and averaging of ADC samples\n", "\n", " Parameters\n", " ----------\n", " rec_adc: ndarray\n", " The ADC records parsed from the logger.\n", " time: ndarray of time objects\n", " Sample time of ADC records.\n", " bins: int\n", " Number of desired bins per day. The default is 86400, which result in\n", " mean values of 1 second steps per day. Maximum resolution is 86400000.\n", "\n", " Returns\n", " -------\n", " ndarray, ndarray(datetime64)\n", " Binned ADC records and corresponding time.\n", " \"\"\"\n", " time = utils.to_datetime64(time)\n", " # starting day\n", " t0 = time[0].astype('datetime64[D]')\n", " # convert time to 'days from t0'\n", " dday = (time-t0)/np.timedelta64(1,'D')\n", " # calculate time bins of output dataset\n", " it = np.int64(dday*bins)\n", " # index for unique bins (inv_idx) and count of samples per bin (cnt)\n", " uval, inv_idx, cnt = np.unique(it,\n", " return_inverse=True,\n", " return_counts=True)\n", " logger.info(f\"ADC records fill {len(uval)} bins of data.\")\n", " # Calculate average of sample values per bin\n", " # The first two columns of rec_adc will be omitted as they store the\n", " # internal measures for timing and battery (first two columns)\n", " V = np.zeros((len(uval),rec_adc.shape[1]-2))\n", " for i in range(V.shape[1]):\n", " V[:,i] = np.bincount(inv_idx,weights=rec_adc[:,i+2])/cnt\n", " bintime = t0+ np.timedelta64(86400000,'ms')*uval.astype(np.float64)/bins\n", " logger.info(f\"ADC records span a time period from {bintime[0]} to {bintime[-1]}.\")\n", " return V, bintime" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ " In a similar way we can do it for a dataset:" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.396783100Z", "start_time": "2023-08-31T12:54:26.133238600Z" }, "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def resample_mean(ds,freq='1s'):\n", "\n", " # start and end bin time\n", " start_time = np.datetime64(\n", " pd.to_datetime(ds.time.values[0]).floor(freq)\n", " )\n", " end_time = np.datetime64(\n", " pd.to_datetime(ds.time.values[-1]).floor(freq)\n", " )\n", "\n", " # bin time\n", " bintime = pd.date_range(\n", " start_time,\n", " end_time,\n", " freq=freq\n", " ).floor(freq)\n", "\n", " ds_r = ds.assign_coords(\n", " {\n", " \"time_resampled\": (\"time_resampled\", bintime)\n", " }\n", " )\n", "\n", " # calculate bin index of output dataset\n", " it = np.int64(\n", " (ds.time.values - start_time)/pd.Timedelta(freq)\n", " )\n", "\n", " # index for unique bins (inv_idx) and count of samples per bin (cnt)\n", " uval, inv_idx, cnt = np.unique(it,\n", " return_inverse=True,\n", " return_counts=True)\n", "\n", " # apply to all time dependent variables\n", " for var in ds:\n", " if 'time' in ds[var].dims:\n", " # replace time dimension with time_resampled\n", " vardims = ds[var].dims\n", " newdims = [d if d!='time' else 'time_resampled' for d in vardims]\n", " # if only time dimension, take the shortcut\n", " if len(vardims)==1:\n", " newval = np.bincount(inv_idx,weights=ds[var].values)/cnt\n", " ds_r = ds_r.assign( {var: (newdims, newval)})\n", " elif len(vardims)==2: # more than the time dimension, assume 2\n", " N = ds[var].shape[1]\n", " newval = np.zeros((bintime.size,N))\n", " for i in range(N):\n", " newval[:,i] = np.bincount(inv_idx,weights=ds[var].values[:,i])/cnt\n", " ds_r = ds_r.assign( {var: (newdims, newval)})\n", " else:\n", " raise ValueError(\"logger.resample is implemented for 2dims only.\")\n", " # add attributes again\n", " ds_r[var].attrs.update(ds[var].attrs)\n", " ds_r[var].encoding.update(ds[var].encoding)\n", "\n", " # drop original time and rename\n", " ds_r = ds_r.drop_dims(\"time\").rename({\"time_resampled\":\"time\"})\n", " # add time encoding\n", " ds_r.time.encoding.update({\n", " \"units\": f\"seconds since {np.datetime_as_string(ds_r.time.data[0], unit='D')}T00:00Z\",\n", " })\n", "\n", " return ds_r\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Interpolate GPS coordinates" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:26.427699700Z", "start_time": "2023-08-31T12:54:26.146843900Z" }, "collapsed": false }, "outputs": [], "source": [ "#|export\n", "def interpolate_coords(rec_gprmc, time):\n", " \"\"\"\n", " Interpolate lat and lon from gps records\n", "\n", " Parameters\n", " ----------\n", " rec_gprmc: recarray\n", " The GPRMC records from the logger file\n", " time: list or array of time objects\n", "\n", " Returns\n", " -------\n", " ndarray, ndarray\n", " lat and lon interpolated from GPS records to `time`\n", " \"\"\"\n", " time = utils.to_datetime64(time)\n", " t0 = time[0].astype('datetime64[D]')\n", " # coordinate variables\n", " x1 = (time-t0)/np.timedelta64(1,'ms')\n", " x2 = (rec_gprmc.time-t0)/np.timedelta64(1,'ms')\n", " lat = np.interp(x1,x2,rec_gprmc.lat)\n", " lon = np.interp(x1,x2,rec_gprmc.lon)\n", " return lat, lon" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "ExecuteTime": { "end_time": "2023-08-31T12:54:29.060939800Z", "start_time": "2023-08-31T12:54:26.162882100Z" }, "collapsed": false, "tags": [ "remove-cell", "hide-input", "hide-output" ] }, "outputs": [], "source": [ "#|hide\n", "# Export module\n", "# Requires *nbdev* to export and update the *../lib/logger.py* module\n", "import nbdev.export\n", "import nbformat as nbf\n", "name = \"logger\"\n", "\n", "# Export python module\n", "nbdev.export.nb_export( f\"{name}.ipynb\" ,f\"../../src/pyrnet\")\n", "\n", "# Export to docs\n", "ntbk = nbf.read(f\"{name}.ipynb\", nbf.NO_CONVERT)\n", "\n", "text_search_dict = {\n", " \"#|hide\": \"remove-cell\", # Remove the whole cell\n", " \"#|dropcode\": \"hide-input\", # Hide the input w/ a button to show\n", " \"#|dropout\": \"hide-output\" # Hide the output w/ a button to show\n", "}\n", "for cell in ntbk.cells:\n", " cell_tags = cell.get('metadata', {}).get('tags', [])\n", " for key, val in text_search_dict.items():\n", " if key in cell['source']:\n", " if val not in cell_tags:\n", " cell_tags.append(val)\n", " if len(cell_tags) > 0:\n", " cell['metadata']['tags'] = cell_tags\n", " nbf.write(ntbk, f\"../../docs/source/nbs/{name}.ipynb\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }