[docs]defchoose_missing_integer_placeholder(x:Sequence[int],max_dtype:type=numpy.int32)->Optional[numpy.generic]:"""Choose a missing placeholder for integer sequences. Args: x: Sequence of integers, possibly containing masked or None values. max_dtype: Integer NumPy type that is guaranteed to faithfully represent all (non-None, non-masked) values of ``x``. Returns: Value of the placeholder. This is guaranteed to be of a type that can fit into ``max_dtype``. It also may not be of the same type as ``x.dtype`` if ``x`` is a NumPy array, so some casting may be required when replacing missing values with the placeholder. If no suitable placeholder can be found, None is returned instead. """ifisinstance(x,numpy.ndarray)and(x.itemsize<max_dtype().itemsize):candidate=_scan_for_integer_placeholder(x,x.dtype.type)ifnotcandidateisNone:returncandidatereturn_scan_for_integer_placeholder(x,max_dtype)
[docs]defchoose_missing_float_placeholder(x:Sequence[float],dtype:type=numpy.float64)->Optional[numpy.generic]:"""Choose a missing placeholder for float sequences. Args: x: Sequence of floats, possibly containing masked or None values. dtype: Floating-point NumPy type to use for the placeholder. Ignored if ``x`` is already a NumPy floating-point array, in which case the ``dtype`` is just set to the ``x.dtype``. Returns: Value of the placeholder. If ``x`` is a NumPy floating-point array, this is guaranteed to be of the same type as ``x.dtype``. If no suitable placeholder can be found, None is returned instead. """ifisinstance(x,numpy.ndarray)andnumpy.issubdtype(x.dtype,numpy.floating):dtype=x.dtype.typecan_nan=Trueforyinx:ifyisnotNoneandnotnumpy.ma.is_masked(y)andnumpy.isnan(y):can_nan=Falsebreakifcan_nan:returndtype(numpy.nan)ifnotnumpy.infinx:returndtype(numpy.inf)ifnot-numpy.infinx:returndtype(-numpy.inf)stats=numpy.finfo(dtype)ifnotstats.mininx:returndtype(stats.min)ifnotstats.maxinx:returndtype(stats.max)ifnot0inx:returndtype(0)ifnotisinstance(x,set):alt=set()foryinx:ifyisnotNoneandnotnumpy.ma.is_masked(y):alt.add(y)x=altaccumulated=[]foryinx:ifyisnotNoneandnumpy.isfinite(y):accumulated.append(y)accumulated.sort()foriinrange(1,len(accumulated)):previous=accumulated[i-1]current=accumulated[i]mid=previous+(current-previous)/dtype(2)ifmid!=previousandmid!=current:returnmid# Highly unlikely that we'll get to this point.returnNone
[docs]defchoose_missing_string_placeholder(x:Sequence[str])->str:"""Choose a missing placeholder for string sequences. Args: x: Sequence of strings, possibly containing missing or None values. Returns: String to use as the placeholder. This may be longer than the maximum string length in ``x`` (for fixed-length-string arrays), so some casting may be required. """placeholder="NA"whileplaceholderinx:placeholder+="_"returnplaceholder