Source code for photons.samples

"""
A formatting-friendly convenience class for 1-D sample data.
"""
import locale
import math
import re
from dataclasses import dataclass
from typing import Sequence

import numpy as np
from GTC import type_a
from GTC import ureal
from GTC.lib import UncertainReal

# The regular expression to parse a format specification (format_spec)
# with additional (and optional) characters at the end for custom fields.
#
# format_spec ::= [[fill]align][sign][#][0][width][grouping][.precision][type][mode][style][si]
# https://docs.python.org/3/library/string.html#format-specification-mini-language
_format_spec_regex = re.compile(
    # the builtin grammar fields
    r'((?P<fill>.)(?=[<>=^]))?'
    r'(?P<align>[<>=^])?'
    r'(?P<sign>[ +-])?'
    r'(?P<hash>#)?'
    r'(?P<zero>0)?'
    r'(?P<width>\d+)?'
    r'(?P<grouping>[_,])?'
    r'((\.)(?P<precision>\d+))?'
    r'(?P<type>[bcdeEfFgGnosxX%])?'

    # Bracket or Plus-minus
    # NOTE: these characters cannot be in <type>
    r'(?P<mode>[BP])?'

    # Latex or Unicode
    # NOTE: these characters cannot be in <type> nor <mode>
    r'(?P<style>[LU])?'

    # SI prefix
    # NOTE: this character cannot be in <type>, <mode> nor <style>
    r'(?P<si>S)?'

    # the regex must match until the end of the string
    r'$'
)

_exponent_regex = re.compile(r'[eE][+-]\d+')

_si_map = {i*3: c for i, c in enumerate('qryzafpnum kMGTPEZYRQ', start=-10)}

_unicode_superscripts = {
    ord('+'): '\u207A',
    ord('-'): '\u207B',
    ord('0'): '\u2070',
    ord('1'): '\u00B9',
    ord('2'): '\u00B2',
    ord('3'): '\u00B3',
    ord('4'): '\u2074',
    ord('5'): '\u2075',
    ord('6'): '\u2076',
    ord('7'): '\u2077',
    ord('8'): '\u2078',
    ord('9'): '\u2079',
}


[docs] def order_of_magnitude(value: float) -> int: """Returns the order of magnitude of `value`.""" if value == 0: return 0 return int(math.floor(math.log10(math.fabs(value))))
[docs] def parse(format_spec: str) -> dict[str, str]: """Parse a format specification into its grammar fields.""" match = _format_spec_regex.match(format_spec) if not match: raise ValueError(f'Invalid format specifier {format_spec!r}') return match.groupdict()
[docs] def si_prefix_factor(exponent: int) -> tuple[str, float]: """Returns the SI prefix and scaling factor. Args: exponent: The exponent, e.g., 10 ** exponent """ mod = exponent % 3 prefix = _si_map.get(exponent - mod) factor = 10. ** mod if exponent < 0 and prefix is None: prefix = 'q' factor = 10. ** (exponent + 30) elif 0 <= exponent < 3: prefix = '' elif prefix is None: prefix = 'Q' factor = 10. ** (exponent - 30) return prefix, factor
[docs] @dataclass class Rounded: """Represents a rounded value.""" value: float precision: int type: str exponent: int suffix: str
[docs] class Format: def __init__(self, **kwargs) -> None: """Format specification.""" # builtin grammar fields self.fill: str = kwargs['fill'] or '' self.align: str = kwargs['align'] or '' self.sign: str = kwargs['sign'] or '' self.hash: str = kwargs['hash'] or '' self.zero: str = kwargs['zero'] or '' self.width: str = kwargs['width'] or '' self.grouping: str = kwargs['grouping'] or '' self.precision = int(kwargs['precision'] or 2) self.type: str = kwargs['type'] or 'f' if self.type == 'n' and self.grouping: raise ValueError(f"Cannot use 'n' and grouping={self.grouping!r}") # custom grammar fields self.mode: str = kwargs['mode'] or 'B' self.style: str = kwargs['style'] or '' self.si: str = kwargs['si'] or '' if self.si: self.type = 'e' # these attributes are used when rounding self.digits = self.precision self.u_exponent = 0 # keeps a record of whether the Format was created for # an uncertain number with an uncertainty of 0, NaN or INF self.nonzero_and_finite = True def __repr__(self) -> str: # Use .digits instead of .precision in the result spec = f'{self.fill}{self.align}{self.sign}{self.hash}{self.zero}' \ f'{self.width}{self.grouping}.{self.digits}{self.type}' \ f'{self.mode}{self.style}{self.si}' return f'Format(format_spec={spec!r})'
[docs] def result(self, text: str) -> str: """Format `text` using the fill, align, zero and width fields.""" fmt = f'{self.fill}{self.align}{self.zero}{self.width}' return f'{text:{fmt}s}'
[docs] def uncertainty(self, uncertainty: float, *, hash: str = None, # noqa: Shadows built-in name 'hash' type: str | None = 'f', # noqa: Shadows built-in name 'type' precision: int = None) -> str: """Format `uncertainty` using the hash, grouping, precision and type fields. Args: uncertainty: The uncertainty to format. hash: Can be either # or '' (an empty string) type: Can be one of: e, E, f, F, g, G, n precision: Indicates how many digits should be displayed after the decimal point for presentation types f and F, or before and after the decimal point for presentation types g or G. Returns: The `uncertainty` formatted. """ return self.value( uncertainty, hash=hash, type=type, sign='', precision=precision)
[docs] def update(self, std: float) -> None: """Update the `precision` and `u_exponent` attributes. Args: std: The standard uncertainty of the samples. """ if std == 0 or not math.isfinite(std): self.nonzero_and_finite = False return exponent = order_of_magnitude(std) if exponent - self.precision + 1 >= 0: self.precision = 0 else: self.precision = int(self.precision - exponent + 1) u_exponent = exponent - self.digits + 1 # edge case, for example, if 0.099 then round to 0.1 rounded = round(std, -u_exponent) e_rounded = order_of_magnitude(rounded) if e_rounded > exponent: u_exponent += 1 self.u_exponent = u_exponent
[docs] def value(self, value: float, *, hash: str = None, # noqa: Shadows built-in name 'hash' type: str = None, # noqa: Shadows built-in name 'type' sign: str = None, precision: int = None) -> str: """Format `value` using the sign, hash, grouping, precision and type fields. Args: value: The value to format. hash: Can be either # or '' (an empty string) type: Can be one of: e, E, f, F, g, G, n sign: Can be one of: +, -, ' ' (a space) precision: Indicates how many digits should be displayed after the decimal point for presentation types f and F, or before and after the decimal point for presentation types g or G. Returns: The `value` formatted. """ if sign is None: sign = self.sign if precision is None: precision = self.precision if type is None: type = self.type # noqa: Shadows built-in name 'type' if hash is None: hash = self.hash # noqa: Shadows built-in name 'hash' if type == 'n': fmt = f'%{sign}{hash}.{precision}f' return locale.format_string(fmt, value, grouping=True) return f'{value:{sign}{hash}{self.grouping}.{precision}{type}}'
[docs] class Samples: def __init__(self, samples: str | Sequence[str | int | float] | np.ndarray = None, *, mean: float = None, stdev: float = None, size: int = None, overload: float | None = 1e30) -> None: """Convenience class for a 1-D array of data samples. Calculates the mean, standard deviation, variance, relative standard deviation and standard deviation of the mean of the samples. Args: samples: The samples. If a string then in CSV format. mean: If specified, then it is not calculated from the `samples`. stdev: If specified, then it is not calculated from the `samples`. size: If specified, then it is not determined from the `samples`. overload: For some devices, like a DMM, if the input signal is greater than the present range can measure, the device returns a large value (e.g., 9.9E+37) to indicate a measurement overload. If the absolute value of the mean is greater than `overload` then the mean and standard deviation become NaN. Setting `overload` to :data:`None` disables this check. """ if samples is not None and any(a is not None for a in (mean, stdev, size)): raise ValueError('Cannot specify samples and the mean, stdev or size') if isinstance(samples, str): stripped = samples.rstrip() if stripped: self._samples = np.array(stripped.split(','), dtype=float) else: self._samples = np.empty(0) elif isinstance(samples, np.ndarray): self._samples = samples elif samples is None: self._samples = np.empty(0) else: self._samples = np.asarray(list(map(float, samples))) # noqa: samples cannot be None if self._samples.ndim != 1: raise ValueError('only 1D arrays are allowed') self._size = self._samples.size if size is None else size self._overload = overload self._stdev = stdev if mean is not None: self._mean = self._check_overload(mean) else: self._mean = None def __iter__(self): return iter((self.mean, self.stdev)) def __format__(self, format_spec) -> str: fmt = Format(**parse(format_spec)) fmt.update(self.stdom) return fmt.result(_stylize(self._to_string(fmt), fmt))
[docs] def __getattr__(self, item): """Pass all other attributes to the ndarray.""" return getattr(self._samples, item)
def __repr__(self) -> str: return f'Samples(mean={self.mean}, stdev={self.stdev}, size={self.size})' def __getitem__(self, item) -> float: return self._samples[item] def __len__(self) -> int: return self._samples.size def _check_overload(self, mean: float) -> float: if self._overload is None: return mean if math.isfinite(mean) and abs(mean) > self._overload: self._stdev = math.nan return math.nan return mean def _to_string(self, fmt: Format) -> str: """Convert to a formatted string.""" x, u = self.mean, self.stdom if u == 0: if fmt.si: fmt.update(x) r = _round(x, fmt) x_str = fmt.value(r.value, precision=r.precision, type=r.type) v_str = f'{x_str}{r.suffix}' else: v_str = fmt.value(x) return fmt.result(v_str) u_finite = math.isfinite(u) x_finite = math.isfinite(x) if not (u_finite and x_finite): si_prefix = '' if fmt.si and x_finite: fmt.update(x) r = _round(x, fmt) si_prefix = r.suffix x_str = fmt.value(r.value, precision=r.precision, type=r.type) else: x_str = fmt.value(x) u_str = fmt.uncertainty(u, type=None) if fmt.mode == 'B': result = f'{x_str}({u_str}){si_prefix}' else: result = f'{x_str}+/-{u_str}{si_prefix}' # move an exponential term (if it exists) to the end of the string exp = _exponent_regex.search(result) if exp: start, end = exp.span() s1, s2, s3 = result[:start], result[end:], exp.group() if fmt.mode == 'B': result = f'{s1}{s2}{s3}' else: result = f'({s1}{s2}){s3}' return result x_rounded, u_rounded = _round_samples(x, u, fmt) u_r = u_rounded.value precision = x_rounded.precision x_str = fmt.value(x_rounded.value, precision=precision, type=x_rounded.type) if fmt.mode == 'P': # Plus-minus mode u_str = fmt.uncertainty(u_r, precision=precision) x_u_str = f'{x_str}+/-{u_str}' if x_rounded.suffix: return f'({x_u_str}){x_rounded.suffix}' return x_u_str # Bracket mode oom = order_of_magnitude(u_r) if precision > 0 and oom >= 0: # the uncertainty straddles the decimal point so # keep the decimal point in the result u_str = fmt.uncertainty(u_r, precision=precision, type=u_rounded.type) else: hash_, type_ = None, u_rounded.type if oom < 0: if fmt.hash: hash_ = '' else: type_ = 'f' u_str = fmt.uncertainty(round(u_r * 10. ** precision), precision=0, type=type_, hash=hash_) return f'{x_str}({u_str}){x_rounded.suffix}' @property def mean(self) -> float: """Returns the mean.""" if self._mean is not None: return self._mean mean = float(np.mean(self._samples)) if self._size > 0 else math.nan self._mean = self._check_overload(mean) return self._mean @property def overload(self) -> float | None: """Returns the overload value.""" return self._overload @property def relative_stdev(self) -> float: """Returns the relative standard deviation.""" try: return 100.0 * (self.stdev / self.mean) except ZeroDivisionError: return math.nan @property def relative_stdom(self) -> float: """Returns the relative standard deviation of the mean.""" try: return 100.0 * (self.stdom / self.mean) except ZeroDivisionError: return math.nan @property def samples(self) -> np.ndarray: """Returns the samples.""" return self._samples @property def size(self) -> int: """Returns the number of samples.""" return self._size @property def stdev(self) -> float: """Returns the sample standard deviation.""" if self._stdev is not None: return self._stdev self._stdev = float(np.std(self._samples, ddof=1)) if self._size > 1 else math.nan return self._stdev @property def stdom(self) -> float: """Returns the standard deviation of the mean.""" try: return self.stdev / math.sqrt(self._size) except ZeroDivisionError: return math.nan
[docs] def to_json(self) -> dict[str, float]: """Allows for this class to be JSON serializable with msl-network.""" return { 'mean': self.mean, 'stdev': self.stdev, 'size': self._size, 'overload': self._overload }
[docs] def to_ureal(self, *, label: str = None, delta: float = None, truncated: bool = False) -> UncertainReal: """Convert to an uncertain-real number. Args: label: The label to associate with the uncertain number. delta: The digitization step size (only valid if the samples are digitized). truncated: Whether the digitized samples were truncated or rounded. Only used if `delta` is not :data:`None`. Returns: The samples as an uncertain-real number. """ if delta is not None: return type_a.estimate_digitized( self._samples, delta, label=label, truncate=truncated) if self._samples.size > 0: return type_a.estimate(self._samples, label=label) return ureal(self.mean, self.stdom, df=self._size-1, label=label, independent=True)
@property def variance(self) -> float: """Returns the sample variance.""" return self.stdev * self.stdev
def _round(value: float, fmt: Format, exponent: int = None) -> Rounded: """Round `value` to the appropriate number of significant digits.""" if not fmt.si and not (fmt.nonzero_and_finite or math.isfinite(value)): return Rounded(value=value, precision=fmt.precision, type=fmt.type, exponent=0, suffix='') if exponent is None: exponent = order_of_magnitude(value) _type = fmt.type f_or_g_as_f = (_type in 'fF') or \ ((_type in 'gGn') and (-4 <= exponent < exponent - fmt.u_exponent)) if f_or_g_as_f: factor = 1.0 digits = -fmt.u_exponent precision = max(digits, 0) suffix = '' elif _type == '%': factor = 0.01 digits = -fmt.u_exponent - 2 precision = max(digits, 0) suffix = '%' else: factor = 10. ** exponent digits = max(exponent - fmt.u_exponent, 0) precision = digits suffix = f'{factor:.0{_type}}'[1:] if _type in 'eg%': _type = 'f' elif _type in 'EG': _type = 'F' if fmt.si: prefix, si_factor = si_prefix_factor(exponent) n = order_of_magnitude(si_factor) precision = max(0, precision - n) val = round(value * si_factor / factor, digits - n) suffix = f' {prefix}' if prefix else '' else: val = round(value / factor, digits) return Rounded(value=val, precision=precision, type=_type, exponent=exponent, suffix=suffix) def _round_samples(x: float, u: float, fmt: Format) -> tuple[Rounded, Rounded]: """Round the samples. This function ensures that both x and u get scaled by the same factor. """ maximum = round(max(math.fabs(x), u), -fmt.u_exponent) rounded = _round(maximum, fmt) x_rounded = _round(x, fmt, exponent=rounded.exponent) u_rounded = _round(u, fmt, exponent=rounded.exponent) return x_rounded, u_rounded def _stylize(text: str, fmt: Format) -> str: """Apply the formatting style to `text`.""" if not fmt.style or not text: return text exponent = '' exp_number = None exp_match = _exponent_regex.search(text) if exp_match: # don't care whether it starts with e or E and # don't want to include the + symbol group = exp_match.group() exp_number = int(group[1:]) if fmt.style == 'U': if exp_match and exp_number != 0: e = f'{exp_number}' translated = e.translate(_unicode_superscripts) exponent = f'\u00D710{translated}' replacements = [ ('+/-', '\u00B1'), ('u', '\u00B5') ] elif fmt.style == 'L': if exp_match and exp_number != 0: exponent = fr'\times10^{{{exp_number}}}' replacements = [ ('(', r'\left('), (')', r'\right)'), ('nan', r'\mathrm{NaN}'), ('NAN', r'\mathrm{NaN}'), ('inf', r'\infty'), # must come before 'INF' ('INF', r'\infty'), ('%', r'\%'), ] else: assert False, 'should not get here' if exp_match: start, end = exp_match.span() s1, s2, s3 = text[:start], exponent, text[end:] text = f'{s1}{s2}{s3}' for old, new in replacements: text = text.replace(old, new) return text