"""
A formatting-friendly convenience class for 1-D sample data.
"""
import locale
import math
import re
from dataclasses import dataclass
from typing import Sequence
import numpy as np
from GTC import type_a
from GTC import ureal
from GTC.lib import UncertainReal
# The regular expression to parse a format specification (format_spec)
# with additional (and optional) characters at the end for custom fields.
#
# format_spec ::= [[fill]align][sign][#][0][width][grouping][.precision][type][mode][style][si]
# https://docs.python.org/3/library/string.html#format-specification-mini-language
_format_spec_regex = re.compile(
# the builtin grammar fields
r'((?P<fill>.)(?=[<>=^]))?'
r'(?P<align>[<>=^])?'
r'(?P<sign>[ +-])?'
r'(?P<hash>#)?'
r'(?P<zero>0)?'
r'(?P<width>\d+)?'
r'(?P<grouping>[_,])?'
r'((\.)(?P<precision>\d+))?'
r'(?P<type>[bcdeEfFgGnosxX%])?'
# Bracket or Plus-minus
# NOTE: these characters cannot be in <type>
r'(?P<mode>[BP])?'
# Latex or Unicode
# NOTE: these characters cannot be in <type> nor <mode>
r'(?P<style>[LU])?'
# SI prefix
# NOTE: this character cannot be in <type>, <mode> nor <style>
r'(?P<si>S)?'
# the regex must match until the end of the string
r'$'
)
_exponent_regex = re.compile(r'[eE][+-]\d+')
_si_map = {i*3: c for i, c in enumerate('qryzafpnum kMGTPEZYRQ', start=-10)}
_unicode_superscripts = {
ord('+'): '\u207A',
ord('-'): '\u207B',
ord('0'): '\u2070',
ord('1'): '\u00B9',
ord('2'): '\u00B2',
ord('3'): '\u00B3',
ord('4'): '\u2074',
ord('5'): '\u2075',
ord('6'): '\u2076',
ord('7'): '\u2077',
ord('8'): '\u2078',
ord('9'): '\u2079',
}
[docs]
def order_of_magnitude(value: float) -> int:
"""Returns the order of magnitude of `value`."""
if value == 0:
return 0
return int(math.floor(math.log10(math.fabs(value))))
[docs]
def parse(format_spec: str) -> dict[str, str]:
"""Parse a format specification into its grammar fields."""
match = _format_spec_regex.match(format_spec)
if not match:
raise ValueError(f'Invalid format specifier {format_spec!r}')
return match.groupdict()
[docs]
def si_prefix_factor(exponent: int) -> tuple[str, float]:
"""Returns the SI prefix and scaling factor.
Args:
exponent: The exponent, e.g., 10 ** exponent
"""
mod = exponent % 3
prefix = _si_map.get(exponent - mod)
factor = 10. ** mod
if exponent < 0 and prefix is None:
prefix = 'q'
factor = 10. ** (exponent + 30)
elif 0 <= exponent < 3:
prefix = ''
elif prefix is None:
prefix = 'Q'
factor = 10. ** (exponent - 30)
return prefix, factor
[docs]
@dataclass
class Rounded:
"""Represents a rounded value."""
value: float
precision: int
type: str
exponent: int
suffix: str
[docs]
class Samples:
def __init__(self,
samples: str | Sequence[str | int | float] | np.ndarray = None,
*,
mean: float = None,
stdev: float = None,
size: int = None,
overload: float | None = 1e30) -> None:
"""Convenience class for a 1-D array of data samples.
Calculates the mean, standard deviation, variance, relative standard
deviation and standard deviation of the mean of the samples.
Args:
samples: The samples. If a string then in CSV format.
mean: If specified, then it is not calculated from the `samples`.
stdev: If specified, then it is not calculated from the `samples`.
size: If specified, then it is not determined from the `samples`.
overload: For some devices, like a DMM, if the input signal is greater
than the present range can measure, the device returns a large
value (e.g., 9.9E+37) to indicate a measurement overload. If the
absolute value of the mean is greater than `overload` then the
mean and standard deviation become NaN. Setting `overload` to
:data:`None` disables this check.
"""
if samples is not None and any(a is not None for a in (mean, stdev, size)):
raise ValueError('Cannot specify samples and the mean, stdev or size')
if isinstance(samples, str):
stripped = samples.rstrip()
if stripped:
self._samples = np.array(stripped.split(','), dtype=float)
else:
self._samples = np.empty(0)
elif isinstance(samples, np.ndarray):
self._samples = samples
elif samples is None:
self._samples = np.empty(0)
else:
self._samples = np.asarray(list(map(float, samples))) # noqa: samples cannot be None
if self._samples.ndim != 1:
raise ValueError('only 1D arrays are allowed')
self._size = self._samples.size if size is None else size
self._overload = overload
self._stdev = stdev
if mean is not None:
self._mean = self._check_overload(mean)
else:
self._mean = None
def __iter__(self):
return iter((self.mean, self.stdev))
def __format__(self, format_spec) -> str:
fmt = Format(**parse(format_spec))
fmt.update(self.stdom)
return fmt.result(_stylize(self._to_string(fmt), fmt))
[docs]
def __getattr__(self, item):
"""Pass all other attributes to the ndarray."""
return getattr(self._samples, item)
def __repr__(self) -> str:
return f'Samples(mean={self.mean}, stdev={self.stdev}, size={self.size})'
def __getitem__(self, item) -> float:
return self._samples[item]
def __len__(self) -> int:
return self._samples.size
def _check_overload(self, mean: float) -> float:
if self._overload is None:
return mean
if math.isfinite(mean) and abs(mean) > self._overload:
self._stdev = math.nan
return math.nan
return mean
def _to_string(self, fmt: Format) -> str:
"""Convert to a formatted string."""
x, u = self.mean, self.stdom
if u == 0:
if fmt.si:
fmt.update(x)
r = _round(x, fmt)
x_str = fmt.value(r.value, precision=r.precision, type=r.type)
v_str = f'{x_str}{r.suffix}'
else:
v_str = fmt.value(x)
return fmt.result(v_str)
u_finite = math.isfinite(u)
x_finite = math.isfinite(x)
if not (u_finite and x_finite):
si_prefix = ''
if fmt.si and x_finite:
fmt.update(x)
r = _round(x, fmt)
si_prefix = r.suffix
x_str = fmt.value(r.value, precision=r.precision, type=r.type)
else:
x_str = fmt.value(x)
u_str = fmt.uncertainty(u, type=None)
if fmt.mode == 'B':
result = f'{x_str}({u_str}){si_prefix}'
else:
result = f'{x_str}+/-{u_str}{si_prefix}'
# move an exponential term (if it exists) to the end of the string
exp = _exponent_regex.search(result)
if exp:
start, end = exp.span()
s1, s2, s3 = result[:start], result[end:], exp.group()
if fmt.mode == 'B':
result = f'{s1}{s2}{s3}'
else:
result = f'({s1}{s2}){s3}'
return result
x_rounded, u_rounded = _round_samples(x, u, fmt)
u_r = u_rounded.value
precision = x_rounded.precision
x_str = fmt.value(x_rounded.value, precision=precision, type=x_rounded.type)
if fmt.mode == 'P': # Plus-minus mode
u_str = fmt.uncertainty(u_r, precision=precision)
x_u_str = f'{x_str}+/-{u_str}'
if x_rounded.suffix:
return f'({x_u_str}){x_rounded.suffix}'
return x_u_str
# Bracket mode
oom = order_of_magnitude(u_r)
if precision > 0 and oom >= 0:
# the uncertainty straddles the decimal point so
# keep the decimal point in the result
u_str = fmt.uncertainty(u_r, precision=precision, type=u_rounded.type)
else:
hash_, type_ = None, u_rounded.type
if oom < 0:
if fmt.hash:
hash_ = ''
else:
type_ = 'f'
u_str = fmt.uncertainty(round(u_r * 10. ** precision),
precision=0, type=type_, hash=hash_)
return f'{x_str}({u_str}){x_rounded.suffix}'
@property
def mean(self) -> float:
"""Returns the mean."""
if self._mean is not None:
return self._mean
mean = float(np.mean(self._samples)) if self._size > 0 else math.nan
self._mean = self._check_overload(mean)
return self._mean
@property
def overload(self) -> float | None:
"""Returns the overload value."""
return self._overload
@property
def relative_stdev(self) -> float:
"""Returns the relative standard deviation."""
try:
return 100.0 * (self.stdev / self.mean)
except ZeroDivisionError:
return math.nan
@property
def relative_stdom(self) -> float:
"""Returns the relative standard deviation of the mean."""
try:
return 100.0 * (self.stdom / self.mean)
except ZeroDivisionError:
return math.nan
@property
def samples(self) -> np.ndarray:
"""Returns the samples."""
return self._samples
@property
def size(self) -> int:
"""Returns the number of samples."""
return self._size
@property
def stdev(self) -> float:
"""Returns the sample standard deviation."""
if self._stdev is not None:
return self._stdev
self._stdev = float(np.std(self._samples, ddof=1)) if self._size > 1 else math.nan
return self._stdev
@property
def stdom(self) -> float:
"""Returns the standard deviation of the mean."""
try:
return self.stdev / math.sqrt(self._size)
except ZeroDivisionError:
return math.nan
[docs]
def to_json(self) -> dict[str, float]:
"""Allows for this class to be JSON serializable with msl-network."""
return {
'mean': self.mean,
'stdev': self.stdev,
'size': self._size,
'overload': self._overload
}
[docs]
def to_ureal(self,
*,
label: str = None,
delta: float = None,
truncated: bool = False) -> UncertainReal:
"""Convert to an uncertain-real number.
Args:
label: The label to associate with the uncertain number.
delta: The digitization step size (only valid if the samples are digitized).
truncated: Whether the digitized samples were truncated or rounded.
Only used if `delta` is not :data:`None`.
Returns:
The samples as an uncertain-real number.
"""
if delta is not None:
return type_a.estimate_digitized(
self._samples, delta, label=label, truncate=truncated)
if self._samples.size > 0:
return type_a.estimate(self._samples, label=label)
return ureal(self.mean, self.stdom, df=self._size-1, label=label,
independent=True)
@property
def variance(self) -> float:
"""Returns the sample variance."""
return self.stdev * self.stdev
def _round(value: float, fmt: Format, exponent: int = None) -> Rounded:
"""Round `value` to the appropriate number of significant digits."""
if not fmt.si and not (fmt.nonzero_and_finite or math.isfinite(value)):
return Rounded(value=value, precision=fmt.precision,
type=fmt.type, exponent=0, suffix='')
if exponent is None:
exponent = order_of_magnitude(value)
_type = fmt.type
f_or_g_as_f = (_type in 'fF') or \
((_type in 'gGn') and
(-4 <= exponent < exponent - fmt.u_exponent))
if f_or_g_as_f:
factor = 1.0
digits = -fmt.u_exponent
precision = max(digits, 0)
suffix = ''
elif _type == '%':
factor = 0.01
digits = -fmt.u_exponent - 2
precision = max(digits, 0)
suffix = '%'
else:
factor = 10. ** exponent
digits = max(exponent - fmt.u_exponent, 0)
precision = digits
suffix = f'{factor:.0{_type}}'[1:]
if _type in 'eg%':
_type = 'f'
elif _type in 'EG':
_type = 'F'
if fmt.si:
prefix, si_factor = si_prefix_factor(exponent)
n = order_of_magnitude(si_factor)
precision = max(0, precision - n)
val = round(value * si_factor / factor, digits - n)
suffix = f' {prefix}' if prefix else ''
else:
val = round(value / factor, digits)
return Rounded(value=val, precision=precision, type=_type,
exponent=exponent, suffix=suffix)
def _round_samples(x: float, u: float, fmt: Format) -> tuple[Rounded, Rounded]:
"""Round the samples.
This function ensures that both x and u get scaled by the same factor.
"""
maximum = round(max(math.fabs(x), u), -fmt.u_exponent)
rounded = _round(maximum, fmt)
x_rounded = _round(x, fmt, exponent=rounded.exponent)
u_rounded = _round(u, fmt, exponent=rounded.exponent)
return x_rounded, u_rounded
def _stylize(text: str, fmt: Format) -> str:
"""Apply the formatting style to `text`."""
if not fmt.style or not text:
return text
exponent = ''
exp_number = None
exp_match = _exponent_regex.search(text)
if exp_match:
# don't care whether it starts with e or E and
# don't want to include the + symbol
group = exp_match.group()
exp_number = int(group[1:])
if fmt.style == 'U':
if exp_match and exp_number != 0:
e = f'{exp_number}'
translated = e.translate(_unicode_superscripts)
exponent = f'\u00D710{translated}'
replacements = [
('+/-', '\u00B1'),
('u', '\u00B5')
]
elif fmt.style == 'L':
if exp_match and exp_number != 0:
exponent = fr'\times10^{{{exp_number}}}'
replacements = [
('(', r'\left('),
(')', r'\right)'),
('nan', r'\mathrm{NaN}'),
('NAN', r'\mathrm{NaN}'),
('inf', r'\infty'), # must come before 'INF'
('INF', r'\infty'),
('%', r'\%'),
]
else:
assert False, 'should not get here'
if exp_match:
start, end = exp_match.span()
s1, s2, s3 = text[:start], exponent, text[end:]
text = f'{s1}{s2}{s3}'
for old, new in replacements:
text = text.replace(old, new)
return text