import numpy as np
import pandas as pd
from geost.utils import series
[docs]
def get_layer_top(
data: pd.DataFrame,
column: str,
value: int | float | str | list[str] | slice,
min_thickness: float = None,
min_fraction: float = None,
) -> pd.Series:
"""
Find the top depth in individual survey ids where a column in a Pandas DataFrame contains
specified search value or values, or falls within a specified range.
Parameters
----------
data : pd.DataFrame
Pandas DataFrame containing the data. The DataFrame must contain columns specifying
depth intervals, such as "top" and "bottom" or "depth" and "thickness". See
GeostFrame.has_depth_columns for more information.
column : str
Name of the column to search for the specified value or values.
value : int | float | str | list[str] | slice
Value or values to search for in the specified column. If a slice is provided, the
function will search for values within the specified range.
min_thickness : float, optional
Minimum thickness of the layer to consider. Layers thinner than this value will be
ignored. The thickness of a layer is calculated as the difference uppermost top
and the lowermost bottom of consecutive elements that meet the value criteria. If
None, no minimum thickness is applied which returns the first encountered layer.
min_fraction : float, optional
Whether or not to allow for disturbing layers: layers that do not meet the value
criteria in between. The minimum fraction is the minimal fraction of the 'min_thickness'
that must meet the value criteria. If None, the entire layer must meet the criteria.
Note that 'min_fraction' is only applied when 'min_thickness' is specified.
Returns
-------
pd.Series
Series containing the top depth of the layers that meet the specified criteria
for each survey id as the index.
Raises
------
ValueError
- If the input DataFrame does not contain columns specifying depth intervals
- If min_thickness is below zero
- If min_fraction is not between 0 and 1
"""
if not data.gst.has_depth_columns:
raise ValueError(
"Data must contain columns specifying depth intervals. See "
"GeostFrame.has_depth_columns for more information."
)
data = data.gst.select_by_values(column, value)
data["values_mask"] = series.mask(value, data[column])
if data.gst._top is None:
# If we have discrete data, we need to calculate top depths because depth indicates bottom depths
data["thickness"] = data.gst.calculate_thickness()
data["top"] = data[data.gst._bottom] - data["thickness"]
if min_thickness is not None:
if min_thickness <= 0:
raise ValueError("'min_thickness' cannot be below zero.")
data["layer_nrs"] = series.label_consecutive_elements(data["values_mask"])
data = _get_layer_top_bottom(data)
data["thickness"] = data.gst.calculate_thickness()
if min_fraction is not None and not (0 <= min_fraction <= 1):
raise ValueError("'min_fraction' must be between 0 and 1.")
return _get_layer_top(data, min_thickness, min_fraction)
[docs]
def get_layer_base(
data: pd.DataFrame,
column: str,
value: int | float | str | list[str] | slice,
min_thickness: float = None,
min_fraction: float = None,
) -> pd.Series:
if not data.gst.has_depth_columns:
raise ValueError(
"Data must contain columns specifying depth intervals. See "
"GeostFrame.has_depth_columns for more information."
)
data = data.gst.select_by_values(column, value)
return
def _get_layer_top_bottom(data: pd.DataFrame) -> pd.DataFrame:
"""
Helper for get_layer_top and get_layer_base to find the top and bottom depths of
data that have been labelled in terms of layers of consecutive array elements.
"""
top_col = data.gst._top
bottom_col = data.gst._bottom
top_bottom = data.groupby(["nr", "layer_nrs"], as_index=False).agg(
{"surface": "first", top_col: "min", bottom_col: "max", "values_mask": "first"}
)
return top_bottom
def _get_layer_top(
data: pd.DataFrame,
min_thickness: float = None,
min_fraction: float = None,
) -> pd.Series:
"""
Helper for get_layer_top to find the top depth of layers in different ways using the
options 'min_thickness' and 'min_fraction'.
"""
top_col = data.gst._top
if min_thickness is not None:
if min_fraction is not None:
tops = data.groupby("nr").apply(
lambda df: _find_top(
df["values_mask"].values,
df[top_col].values,
df[data.gst._bottom].values,
min_thickness,
min_fraction,
)
)
return tops.dropna()
selection = data[data["values_mask"] & (data["thickness"] >= min_thickness)]
else:
selection = data[data["values_mask"]]
tops = selection.groupby("nr")[top_col].min()
return tops
def _find_top(
valid, top, bottom, min_thickness: float, min_fraction: float
) -> pd.DataFrame:
"""
Helper function to find the top depth of a layer in a single data survey when `min_fraction`
is used in `get_layer_top`. The 'min_fraction' option allows for disturbing layers: invalid
elements in between the valid elements.
"""
idx_valid = np.flatnonzero(valid)
for idx in idx_valid:
t_idx = top[idx]
search_depth = t_idx + min_thickness
search_mask = (top >= t_idx) & (top < search_depth)
tmp_top = top[search_mask].copy()
tmp_bottom = bottom[search_mask].copy()
if tmp_bottom[-1] > search_depth:
tmp_bottom[-1] = search_depth
length = tmp_bottom - tmp_top
fraction = length[valid[search_mask]].sum() / min_thickness
if fraction > min_fraction or np.isclose(fraction, min_fraction):
return t_idx
else:
return np.nan
def find_top_sand(
lith: np.ndarray,
top: np.ndarray,
bottom: np.ndarray,
min_sand_frac: float,
min_sand_thickness: float,
) -> float:
"""
Find the top of sand depth in a borehole described in NEN5104 format. The top of sand
is defined by the first layer of a specified thickness that contains a minimum
percentage of sand. By default: when the first layer of sand is detected, the next 1
meter is scanned. Within this meter, if more than 50% of the length has a main
lithology of sand, the initially detected layer of sand is regarded as the top
of sand. If not, continue downward until the next layer of sand is detected and
repeat.
Parameters
----------
lith : ndarray
Numpy array containing the lithology of the borehole.
top : ndarray
Numpy array containing the top depth of the layers of the borehole.
bottom : ndarray
Numpy array containing the bottom depth of the layers of the borehole.
min_sand_frac : float
Minimum percentage required to be sand.
min_sand_thickness : float
Minimum thickness of the sand to search for.
Returns
-------
top_sand : float
Top depth of the sand layer that meets the requirements.
"""
is_sand = ("Z" == lith) + ("G" == lith)
found_sand = False
if np.any(is_sand):
idx_sand = np.flatnonzero(is_sand)
for idx in idx_sand:
top_sand = top[idx]
search_depth = top_sand + min_sand_thickness
search_mask = (top >= top_sand) & (top < search_depth)
tmp_top = top[search_mask].copy()
tmp_bottom = bottom[search_mask].copy()
if tmp_bottom[-1] > search_depth:
tmp_bottom[-1] = search_depth
length = tmp_bottom - tmp_top
sand_frac = length[is_sand[search_mask]].sum() / min_sand_thickness
if sand_frac >= min_sand_frac:
found_sand = True
break
if not found_sand:
top_sand = np.nan
return top_sand
def top_of_sand(
boreholes: pd.DataFrame,
ids: str = "nr",
min_sand_frac: float = 0.5,
min_sand_thickness: int | float = 1,
):
"""
Find the top of sand depth in a borehole described in NEN5104 format. The top of sand
is defined by the first layer of a specified thickness that contains a minimum fraction
of sand.
Parameters
----------
boreholes : pd.DataFrame
Boreholes in NEN5104 format with "lith", "top" and "bottom" columns.
ids : str, optional
Column specifying the borehole IDs. The default is "nr".
min_sand_frac : float, optional
Minimum percentage of sand in the sand layer. The default is 0.5 (=50%).
min_sand_thickness : int | float, optional
Minimum thickness of the sand layer to find the top of. The default is 1.
Returns
------
pd.DataFrame
DataFrame containing the borehole IDs and the top of sand depths.
"""
groupby = boreholes.groupby(ids)
result = []
for nr, df in groupby:
lith = df["lith"].values
top = df["top"].values
bottom = df["bottom"].values
top_sand = find_top_sand(lith, top, bottom, min_sand_frac, min_sand_thickness)
result.append((nr, top_sand))
return pd.DataFrame(result, columns=["nr", "top"])
def cumulative_thickness():
pass