Source code for geost.analysis.layers

import numpy as np
import pandas as pd

from geost.utils import series


[docs] def get_layer_top( data: pd.DataFrame, column: str, value: int | float | str | list[str] | slice, min_thickness: float = None, min_fraction: float = None, ) -> pd.Series: """ Find the top depth in individual survey ids where a column in a Pandas DataFrame contains specified search value or values, or falls within a specified range. Parameters ---------- data : pd.DataFrame Pandas DataFrame containing the data. The DataFrame must contain columns specifying depth intervals, such as "top" and "bottom" or "depth" and "thickness". See GeostFrame.has_depth_columns for more information. column : str Name of the column to search for the specified value or values. value : int | float | str | list[str] | slice Value or values to search for in the specified column. If a slice is provided, the function will search for values within the specified range. min_thickness : float, optional Minimum thickness of the layer to consider. Layers thinner than this value will be ignored. The thickness of a layer is calculated as the difference uppermost top and the lowermost bottom of consecutive elements that meet the value criteria. If None, no minimum thickness is applied which returns the first encountered layer. min_fraction : float, optional Whether or not to allow for disturbing layers: layers that do not meet the value criteria in between. The minimum fraction is the minimal fraction of the 'min_thickness' that must meet the value criteria. If None, the entire layer must meet the criteria. Note that 'min_fraction' is only applied when 'min_thickness' is specified. Returns ------- pd.Series Series containing the top depth of the layers that meet the specified criteria for each survey id as the index. Raises ------ ValueError - If the input DataFrame does not contain columns specifying depth intervals - If min_thickness is below zero - If min_fraction is not between 0 and 1 """ if not data.gst.has_depth_columns: raise ValueError( "Data must contain columns specifying depth intervals. See " "GeostFrame.has_depth_columns for more information." ) data = data.gst.select_by_values(column, value) data["values_mask"] = series.mask(value, data[column]) if data.gst._top is None: # If we have discrete data, we need to calculate top depths because depth indicates bottom depths data["thickness"] = data.gst.calculate_thickness() data["top"] = data[data.gst._bottom] - data["thickness"] if min_thickness is not None: if min_thickness <= 0: raise ValueError("'min_thickness' cannot be below zero.") data["layer_nrs"] = series.label_consecutive_elements(data["values_mask"]) data = _get_layer_top_bottom(data) data["thickness"] = data.gst.calculate_thickness() if min_fraction is not None and not (0 <= min_fraction <= 1): raise ValueError("'min_fraction' must be between 0 and 1.") return _get_layer_top(data, min_thickness, min_fraction)
[docs] def get_layer_base( data: pd.DataFrame, column: str, value: int | float | str | list[str] | slice, min_thickness: float = None, min_fraction: float = None, ) -> pd.Series: if not data.gst.has_depth_columns: raise ValueError( "Data must contain columns specifying depth intervals. See " "GeostFrame.has_depth_columns for more information." ) data = data.gst.select_by_values(column, value) return
def _get_layer_top_bottom(data: pd.DataFrame) -> pd.DataFrame: """ Helper for get_layer_top and get_layer_base to find the top and bottom depths of data that have been labelled in terms of layers of consecutive array elements. """ top_col = data.gst._top bottom_col = data.gst._bottom top_bottom = data.groupby(["nr", "layer_nrs"], as_index=False).agg( {"surface": "first", top_col: "min", bottom_col: "max", "values_mask": "first"} ) return top_bottom def _get_layer_top( data: pd.DataFrame, min_thickness: float = None, min_fraction: float = None, ) -> pd.Series: """ Helper for get_layer_top to find the top depth of layers in different ways using the options 'min_thickness' and 'min_fraction'. """ top_col = data.gst._top if min_thickness is not None: if min_fraction is not None: tops = data.groupby("nr").apply( lambda df: _find_top( df["values_mask"].values, df[top_col].values, df[data.gst._bottom].values, min_thickness, min_fraction, ) ) return tops.dropna() selection = data[data["values_mask"] & (data["thickness"] >= min_thickness)] else: selection = data[data["values_mask"]] tops = selection.groupby("nr")[top_col].min() return tops def _find_top( valid, top, bottom, min_thickness: float, min_fraction: float ) -> pd.DataFrame: """ Helper function to find the top depth of a layer in a single data survey when `min_fraction` is used in `get_layer_top`. The 'min_fraction' option allows for disturbing layers: invalid elements in between the valid elements. """ idx_valid = np.flatnonzero(valid) for idx in idx_valid: t_idx = top[idx] search_depth = t_idx + min_thickness search_mask = (top >= t_idx) & (top < search_depth) tmp_top = top[search_mask].copy() tmp_bottom = bottom[search_mask].copy() if tmp_bottom[-1] > search_depth: tmp_bottom[-1] = search_depth length = tmp_bottom - tmp_top fraction = length[valid[search_mask]].sum() / min_thickness if fraction > min_fraction or np.isclose(fraction, min_fraction): return t_idx else: return np.nan def find_top_sand( lith: np.ndarray, top: np.ndarray, bottom: np.ndarray, min_sand_frac: float, min_sand_thickness: float, ) -> float: """ Find the top of sand depth in a borehole described in NEN5104 format. The top of sand is defined by the first layer of a specified thickness that contains a minimum percentage of sand. By default: when the first layer of sand is detected, the next 1 meter is scanned. Within this meter, if more than 50% of the length has a main lithology of sand, the initially detected layer of sand is regarded as the top of sand. If not, continue downward until the next layer of sand is detected and repeat. Parameters ---------- lith : ndarray Numpy array containing the lithology of the borehole. top : ndarray Numpy array containing the top depth of the layers of the borehole. bottom : ndarray Numpy array containing the bottom depth of the layers of the borehole. min_sand_frac : float Minimum percentage required to be sand. min_sand_thickness : float Minimum thickness of the sand to search for. Returns ------- top_sand : float Top depth of the sand layer that meets the requirements. """ is_sand = ("Z" == lith) + ("G" == lith) found_sand = False if np.any(is_sand): idx_sand = np.flatnonzero(is_sand) for idx in idx_sand: top_sand = top[idx] search_depth = top_sand + min_sand_thickness search_mask = (top >= top_sand) & (top < search_depth) tmp_top = top[search_mask].copy() tmp_bottom = bottom[search_mask].copy() if tmp_bottom[-1] > search_depth: tmp_bottom[-1] = search_depth length = tmp_bottom - tmp_top sand_frac = length[is_sand[search_mask]].sum() / min_sand_thickness if sand_frac >= min_sand_frac: found_sand = True break if not found_sand: top_sand = np.nan return top_sand def top_of_sand( boreholes: pd.DataFrame, ids: str = "nr", min_sand_frac: float = 0.5, min_sand_thickness: int | float = 1, ): """ Find the top of sand depth in a borehole described in NEN5104 format. The top of sand is defined by the first layer of a specified thickness that contains a minimum fraction of sand. Parameters ---------- boreholes : pd.DataFrame Boreholes in NEN5104 format with "lith", "top" and "bottom" columns. ids : str, optional Column specifying the borehole IDs. The default is "nr". min_sand_frac : float, optional Minimum percentage of sand in the sand layer. The default is 0.5 (=50%). min_sand_thickness : int | float, optional Minimum thickness of the sand layer to find the top of. The default is 1. Returns ------ pd.DataFrame DataFrame containing the borehole IDs and the top of sand depths. """ groupby = boreholes.groupby(ids) result = [] for nr, df in groupby: lith = df["lith"].values top = df["top"].values bottom = df["bottom"].values top_sand = find_top_sand(lith, top, bottom, min_sand_frac, min_sand_thickness) result.append((nr, top_sand)) return pd.DataFrame(result, columns=["nr", "top"]) def cumulative_thickness(): pass