lactationcurve

Lactation curve modelling and fitting package.

 1"""Lactation curve modelling and fitting package."""
 2
 3
 4from .characteristics import (
 5    calculate_characteristic,
 6    lactation_curve_characteristic_function,
 7    numeric_cumulative_yield,
 8    numeric_peak_yield,
 9    numeric_time_to_peak,
10    persistency_fitted_curve,
11    persistency_milkbot,
12    persistency_wood,
13    test_interval_method,
14)
15from .fitting import (
16    ali_schaeffer_model,
17    bayesian_fit_milkbot_single_lactation,
18    brody_model,
19    dhanoa_model,
20    dijkstra_model,
21    emmans_model,
22    fischer_model,
23    fit_lactation_curve,
24    get_chen_priors,
25    get_lc_parameters,
26    get_lc_parameters_least_squares,
27    hayashi_model,
28    milkbot_model,
29    nelder_model,
30    prasad_model,
31    rook_model,
32    sikka_model,
33    wilmink_model,
34    wood_model,
35)
36from .preprocessing import (
37    PreparedInputs,
38    standardize_lactation_columns,
39    validate_and_prepare_inputs,
40)
41
42__all__ = [
43    # Preprocessing
44    "PreparedInputs",
45    "standardize_lactation_columns",
46    "validate_and_prepare_inputs",
47    # Fitting
48    "ali_schaeffer_model",
49    "bayesian_fit_milkbot_single_lactation",
50    "brody_model",
51    "dhanoa_model",
52    "dijkstra_model",
53    "emmans_model",
54    "fischer_model",
55    "fit_lactation_curve",
56    "get_chen_priors",
57    "get_lc_parameters",
58    "get_lc_parameters_least_squares",
59    "hayashi_model",
60    "milkbot_model",
61    "nelder_model",
62    "prasad_model",
63    "rook_model",
64    "sikka_model",
65    "wilmink_model",
66    "wood_model",
67    # Characteristics
68    "calculate_characteristic",
69    "lactation_curve_characteristic_function",
70    "numeric_cumulative_yield",
71    "numeric_peak_yield",
72    "numeric_time_to_peak",
73    "persistency_fitted_curve",
74    "persistency_milkbot",
75    "persistency_wood",
76    "test_interval_method",
77]
@dataclass
class PreparedInputs:
42@dataclass
43class PreparedInputs:
44    """Normalized, ready‑to‑fit inputs.
45
46    This container is returned by `validate_and_prepare_inputs` and is the single
47    hand‑off object expected by the fitting routines. Arrays are finite and 1‑dimensional;
48    categorical fields are lower/upper‑cased as appropriate and may be `None` if omitted.
49
50    Attributes:
51        dim: 1D NumPy array of day‑in‑milk values (finite; same length as `milkrecordings`).
52        milkrecordings: 1D NumPy array of test‑day milk yields aligned to `dim`.
53        model: Lowercased model identifier or `None` if not provided.
54        fitting: `"frequentist"` or `"bayesian"` (lowercased) or `None`.
55        breed: `"H"` or `"J"` or `None`.
56        parity: Lactation number as `int`, if provided; otherwise `None`.
57        continent: Prior source flag for Bayesian flows (`"USA"`, `"EU"`, `"CHEN"`), or `None`.
58        persistency_method: Either `"derived"` or `"literature"`, or `None`.
59        lactation_length: Integer horizon (e.g., 305), the string `"max"`, or `None`.
60    """
61    dim: np.ndarray
62    milkrecordings: np.ndarray
63    model: str | None = None
64    fitting: str | None = None
65    breed: str | None = None
66    parity: int | None = None
67    continent: str | None = None
68    persistency_method: str | None = None
69    lactation_length: int | str | None = None

Normalized, ready‑to‑fit inputs.

This container is returned by validate_and_prepare_inputs and is the single hand‑off object expected by the fitting routines. Arrays are finite and 1‑dimensional; categorical fields are lower/upper‑cased as appropriate and may be None if omitted.

Attributes: dim: 1D NumPy array of day‑in‑milk values (finite; same length as milkrecordings). milkrecordings: 1D NumPy array of test‑day milk yields aligned to dim. model: Lowercased model identifier or None if not provided. fitting: "frequentist" or "bayesian" (lowercased) or None. breed: "H" or "J" or None. parity: Lactation number as int, if provided; otherwise None. continent: Prior source flag for Bayesian flows ("USA", "EU", "CHEN"), or None. persistency_method: Either "derived" or "literature", or None. lactation_length: Integer horizon (e.g., 305), the string "max", or None.

PreparedInputs( dim: numpy.ndarray, milkrecordings: numpy.ndarray, model: str | None = None, fitting: str | None = None, breed: str | None = None, parity: int | None = None, continent: str | None = None, persistency_method: str | None = None, lactation_length: int | str | None = None)
dim: numpy.ndarray
milkrecordings: numpy.ndarray
model: str | None = None
fitting: str | None = None
breed: str | None = None
parity: int | None = None
continent: str | None = None
persistency_method: str | None = None
lactation_length: int | str | None = None
def standardize_lactation_columns( df: pandas.core.frame.DataFrame, *, days_in_milk_col: str | None = None, milking_yield_col: str | None = None, test_id_col: str | None = None, default_test_id=0, max_dim: int = 305):
195def standardize_lactation_columns(
196    df: pd.DataFrame,
197    *,
198    days_in_milk_col: str | None = None,
199    milking_yield_col: str | None = None,
200    test_id_col: str | None = None,
201    default_test_id=0,
202    max_dim: int = 305,
203):
204    """
205    Standardize column names and structure for lactation data.
206
207    Returns
208    -------
209    df_out : pd.DataFrame
210        Copy of df with standardized columns:
211        - DaysInMilk
212        - MilkingYield
213        - TestId
214    """
215
216    df = df.copy()
217
218    # Accepted aliases (case-insensitive)
219    aliases = {
220        "DaysInMilk": ["daysinmilk", "dim", "testday"],
221        "MilkingYield": [
222            "milkingyield",
223            "testdaymilkyield",
224            "milkyield",
225            "yield",
226            "milkproduction",
227            "milk_yield",
228        ],
229        "TestId": ["testid", "animalid", "id"],
230    }
231
232    # Lowercase lookup → actual column name
233    col_lookup = {col.lower(): col for col in df.columns}
234
235    def resolve_col(override, possible_names):
236        if override:
237            return col_lookup.get(override.lower())
238        for name in possible_names:
239            if name in col_lookup:
240                return col_lookup[name]
241        return None
242
243    # Resolve columns
244    dim_col = resolve_col(days_in_milk_col, aliases["DaysInMilk"])
245    if not dim_col:
246        raise ValueError("No DaysInMilk column found.")
247
248    yield_col = resolve_col(milking_yield_col, aliases["MilkingYield"])
249    if not yield_col:
250        raise ValueError("No MilkingYield column found.")
251
252    id_col = resolve_col(test_id_col, aliases["TestId"])
253
254    # Create TestId if missing
255    if not id_col:
256        df["TestId"] = default_test_id
257        id_col = "TestId"
258
259    # Rename to standardized names
260    df = df.rename(
261        columns={
262            dim_col: "DaysInMilk",
263            yield_col: "MilkingYield",
264            id_col: "TestId",
265        }
266    )
267
268    # Filter DIM
269    df = df[df["DaysInMilk"] <= max_dim]
270
271    return df

Standardize column names and structure for lactation data.

Returns

df_out : pd.DataFrame Copy of df with standardized columns: - DaysInMilk - MilkingYield - TestId

def validate_and_prepare_inputs( dim, milkrecordings, model=None, fitting=None, *, breed=None, parity=None, continent=None, persistency_method=None, lactation_length=None) -> PreparedInputs:
 72def validate_and_prepare_inputs(
 73    dim,
 74    milkrecordings,
 75    model=None,
 76    fitting=None,
 77    *,
 78    breed=None,
 79    parity=None,
 80    continent=None,
 81    persistency_method=None,
 82    lactation_length=None,
 83) -> PreparedInputs:
 84    """
 85    Validate, normalize, and clean input data for lactation curve fitting.
 86
 87    This function performs basic consistency checks on the provided
 88    days-in-milk (DIM) and milk recording data, normalizes optional
 89    categorical parameters, and removes observations with missing or
 90    non-finite values. The cleaned and validated inputs are returned
 91    in a structured :class:`PreparedInputs` object.
 92
 93    Parameters
 94    ----------
 95    dim : array-like
 96        Days in milk corresponding to each milk recording.
 97    milkrecordings : array-like
 98        Milk yield measurements corresponding to `dim`.
 99    model : str or None, optional
100        Name of the lactation curve model. If provided, the name is
101        stripped of whitespace and converted to lowercase.
102    fitting : str or None, optional
103        Fitting approach to be used. Must be either ``"frequentist"``
104        or ``"bayesian"`` if provided.
105    breed : str or None, optional
106        Cow breed identifier. Must be ``"H"`` (Holstein) or ``"J"``
107        (Jersey) if provided. Case-insensitive.
108    parity : int or None, optional
109        Lactation number (parity). If provided, it is coerced to an
110        integer.
111    continent : str or None, optional
112        Geographic region identifier. Must be one of ``"USA"``,
113        ``"EU"``, or ``"CHEN"`` if provided. Case-insensitive.
114
115    Extra input for persistency calculation:
116        persistency_method (String): way of calculating persistency, options: 'derived' which gives the average slope of the lactation after the peak until the end of lactation (default) or 'literature' for the wood and milkbot model.
117        Lactation_length: string or int: length of the lactation in days to calculate persistency over, options: 305 = default or 'max'  uses the maximum DIM in the data, or an integer value to set the desired lactation length.
118
119    Returns
120    -------
121    PreparedInputs
122        A dataclass containing the cleaned numeric arrays (`dim`,
123        `milkrecordings`) and the normalized optional parameters.
124
125    Raises
126    ------
127    ValueError
128        If input arrays have different lengths, contain insufficient
129        valid observations, or if categorical parameters are invalid.
130
131    Notes
132    -----
133    Observations with missing or non-finite values in either `dim` or
134    `milkrecordings` are removed prior to model fitting. At least two
135    valid observations are required to proceed.
136    """
137    if len(dim) != len(milkrecordings):
138        raise ValueError("dim and milkrecordings must have the same length")
139
140    model = (model or "").strip().lower()
141
142    if parity is not None:
143        parity = int(parity)
144
145    if fitting is not None:
146        fitting = fitting.lower()
147        if fitting not in {"frequentist", "bayesian"}:
148            raise ValueError("Fitting method must be either frequentist or bayesian")
149
150    if breed is not None:
151        breed = breed.upper()
152        if breed not in {"H", "J"}:
153            raise ValueError("Breed must be either Holstein = 'H' or Jersey 'J'")
154
155    if continent is not None:
156        continent = continent.upper()
157        if continent not in {"USA", "EU", "CHEN"}:
158            raise ValueError("continent must be 'USA', 'EU', or 'CHEN'")
159
160    dim = np.asarray(dim, dtype=float)
161    milkrecordings = np.asarray(milkrecordings, dtype=float)
162
163    mask = np.isfinite(dim) & np.isfinite(milkrecordings)
164    dim = dim[mask]
165    milkrecordings = milkrecordings[mask]
166
167    if len(dim) < 2:
168        raise ValueError("At least two non missing points are required to fit a lactation curve")
169
170    if persistency_method is not None:
171        persistency_method = persistency_method.lower()
172        if persistency_method not in {"derived", "literature"}:
173            raise ValueError("persistency_method must be either 'derived' or 'literature'")
174
175    if lactation_length is not None:
176        if isinstance(lactation_length, str):
177            if lactation_length.lower() != "max":
178                raise ValueError("lactation_length string option must be 'max'")
179        else:
180            lactation_length = int(lactation_length)
181
182    return PreparedInputs(
183        dim=dim,
184        milkrecordings=milkrecordings,
185        model=model or None,
186        fitting=fitting,
187        breed=breed,
188        parity=parity,
189        continent=continent,
190        persistency_method=persistency_method,
191        lactation_length=lactation_length,
192    )

Validate, normalize, and clean input data for lactation curve fitting.

This function performs basic consistency checks on the provided days-in-milk (DIM) and milk recording data, normalizes optional categorical parameters, and removes observations with missing or non-finite values. The cleaned and validated inputs are returned in a structured PreparedInputs object.

Parameters

dim : array-like Days in milk corresponding to each milk recording. milkrecordings : array-like Milk yield measurements corresponding to dim. model : str or None, optional Name of the lactation curve model. If provided, the name is stripped of whitespace and converted to lowercase. fitting : str or None, optional Fitting approach to be used. Must be either "frequentist" or "bayesian" if provided. breed : str or None, optional Cow breed identifier. Must be "H" (Holstein) or "J" (Jersey) if provided. Case-insensitive. parity : int or None, optional Lactation number (parity). If provided, it is coerced to an integer. continent : str or None, optional Geographic region identifier. Must be one of "USA", "EU", or "CHEN" if provided. Case-insensitive.

Extra input for persistency calculation: persistency_method (String): way of calculating persistency, options: 'derived' which gives the average slope of the lactation after the peak until the end of lactation (default) or 'literature' for the wood and milkbot model. Lactation_length: string or int: length of the lactation in days to calculate persistency over, options: 305 = default or 'max' uses the maximum DIM in the data, or an integer value to set the desired lactation length.

Returns

PreparedInputs A dataclass containing the cleaned numeric arrays (dim, milkrecordings) and the normalized optional parameters.

Raises

ValueError If input arrays have different lengths, contain insufficient valid observations, or if categorical parameters are invalid.

Notes

Observations with missing or non-finite values in either dim or milkrecordings are removed prior to model fitting. At least two valid observations are required to proceed.

def ali_schaeffer_model(t, a, b, c, d, k):
128def ali_schaeffer_model(t, a, b, c, d, k):
129    """Ali & Schaeffer lactation curve model.
130
131    Args:
132        t: Time since calving in days (DIM). Use `t >= 1` to avoid `log(0)`.
133        a: Intercept-like parameter (numerical).
134        b: Linear coefficient on scaled time `t/340` (numerical).
135        c: Quadratic coefficient on scaled time `t/340` (numerical).
136        d: Coefficient on `log(340/t)` (numerical).
137        k: Coefficient on `[log(340/t)]^2` (numerical).
138
139    Returns:
140        Predicted milk yield at `t`.
141
142    Notes:
143        Uses `t_scaled = t / 340` and `log_term = ln(340 / t)`.
144    """
145    t_scaled = t / 340
146    log_term = np.log(340 / t)
147    return a + b * t_scaled + c * (t_scaled**2) + d * log_term + k * (log_term**2)

Ali & Schaeffer lactation curve model.

Args: t: Time since calving in days (DIM). Use t >= 1 to avoid log(0). a: Intercept-like parameter (numerical). b: Linear coefficient on scaled time t/340 (numerical). c: Quadratic coefficient on scaled time t/340 (numerical). d: Coefficient on log(340/t) (numerical). k: Coefficient on [log(340/t)]^2 (numerical).

Returns: Predicted milk yield at t.

Notes: Uses t_scaled = t / 340 and log_term = ln(340 / t).

def bayesian_fit_milkbot_single_lactation( dim, milkrecordings, key: str, parity=3, breed='H', continent='USA') -> dict:
687def bayesian_fit_milkbot_single_lactation(
688    dim, milkrecordings, key: str, parity=3, breed="H", continent="USA"
689) -> dict:
690    """
691    Fit a single lactation using the MilkBot API.
692
693    Args:
694        dim: List/array of DIM values.
695        milkrecordings: List/array of milk recordings (kg).
696        key: API key for MilkBot.
697        parity: Lactation number; values >= 3 are treated as one group in priors.
698        breed: "H" (Holstein) or "J" (Jersey).
699        continent: Prior source:
700            - "USA"   → MilkBot USA priors
701            - "EU"    → MilkBot EU priors
702            - "CHEN"  → Chen et al. published priors
703
704    Returns:
705        Dictionary with fitted parameters and metadata:
706            {
707                "scale": float,
708                "ramp": float,
709                "decay": float,
710                "offset": float,
711                "nPoints": int
712            }
713
714    Raises:
715        requests.HTTPError: For unsuccessful HTTP response codes.
716        RuntimeError: If the response format is unexpected.
717
718    Notes:
719        - When `continent == "CHEN"`, Chen et al. priors are included in the request payload.
720        - EU calls use the GCP EU endpoint; others use `milkbot.com`.
721    """
722    # check and prepare input
723    inputs = validate_and_prepare_inputs(
724        dim, milkrecordings, breed=breed, parity=parity, continent=continent
725    )
726
727    dim = inputs.dim
728    milkrecordings = inputs.milkrecordings
729    breed = inputs.breed
730    parity = inputs.parity
731    continent = inputs.continent
732
733    # -----------------------------
734    # Select server (USA vs EU)
735    # -----------------------------
736    if continent == "EU":
737        base_url = "https://europe-west1-numeric-analogy-337601.cloudfunctions.net/milkBot-fitter"
738    else:
739        base_url = "https://milkbot.com"
740
741    # -----------------------------
742    # Prepare headers
743    # -----------------------------
744    headers = {"Content-Type": "application/json", "X-API-KEY": key}
745
746    # -----------------------------
747    # Prepare milk points
748    # -----------------------------
749    points = sorted(
750        ({"dim": int(d), "milk": float(m)} for d, m in zip(dim, milkrecordings)),
751        key=lambda p: p["dim"],
752    )
753
754    # -----------------------------
755    # Lactation metadata
756    # -----------------------------
757    payload = {
758        "lactation": {
759            "lacKey": "single_lactation_fit",
760            "breed": breed,
761            "parity": parity,
762            "points": points,
763        },
764        "options": {
765            "returnInputData": False,
766            "returnPath": False,
767            "returnDiscriminatorPath": False,
768            # "fitEngine": "AnnealingFitter@2.0", #comment out to use the default fitter 
769            # "fitObjective": "MB2@2.0",
770            "preferredMilkUnit": "kg",
771        },
772    }
773
774    # -----------------------------
775    # Add priors only if Chen
776    # -----------------------------
777    if continent == "CHEN":
778        payload["priors"] = get_chen_priors(parity)
779
780    # -----------------------------
781    # Call API
782    # -----------------------------
783    response = requests.post(f"{base_url}/fitLactation", headers=headers, json=payload)
784    response.raise_for_status()
785    res = response.json()
786
787    # -----------------------------
788    # Normalize response (USA vs EU)
789    # -----------------------------
790    if "fittedParams" in res:
791        # USA-style response
792        fitted = res["fittedParams"]
793    elif "params" in res:
794        fitted = res["params"]
795    else:
796        raise RuntimeError(f"Unexpected MilkBot response format: {res}")
797
798    # -----------------------------
799    # Parse result
800    # -----------------------------
801
802    return {
803        "scale": fitted["scale"],
804        "ramp": fitted["ramp"],
805        "decay": fitted["decay"],
806        "offset": fitted["offset"],
807        "nPoints": len(points),
808    }

Fit a single lactation using the MilkBot API.

Args: dim: List/array of DIM values. milkrecordings: List/array of milk recordings (kg). key: API key for MilkBot. parity: Lactation number; values >= 3 are treated as one group in priors. breed: "H" (Holstein) or "J" (Jersey). continent: Prior source: - "USA" → MilkBot USA priors - "EU" → MilkBot EU priors - "CHEN" → Chen et al. published priors

Returns: Dictionary with fitted parameters and metadata: { "scale": float, "ramp": float, "decay": float, "offset": float, "nPoints": int }

Raises: requests.HTTPError: For unsuccessful HTTP response codes. RuntimeError: If the response format is unexpected.

Notes: - When continent == "CHEN", Chen et al. priors are included in the request payload. - EU calls use the GCP EU endpoint; others use milkbot.com.

def brody_model(t, a, k):
165def brody_model(t, a, k):
166    """Brody lactation curve model.
167
168    Args:
169        t: Time since calving in days (DIM).
170        a: Scale parameter (numerical).
171        k: Decay parameter (numerical).
172
173    Returns:
174        Predicted milk yield at `t`.
175    """
176    return a * np.exp(-k * t)

Brody lactation curve model.

Args: t: Time since calving in days (DIM). a: Scale parameter (numerical). k: Decay parameter (numerical).

Returns: Predicted milk yield at t.

def dhanoa_model(t, a, b, c):
212def dhanoa_model(t, a, b, c):
213    """Dhanoa lactation curve model.
214
215    Args:
216        t: Time since calving in days (DIM).
217        a: Scale parameter (numerical).
218        b: Shape parameter (numerical).
219        c: Decay parameter (numerical).
220
221    Returns:
222        Predicted milk yield at `t`.
223
224    Notes:
225        Formula: `y(t) = a * t ** (b * c) * exp(-c * t)`.
226    """
227    return a * t ** (b * c) * np.exp(-c * t)

Dhanoa lactation curve model.

Args: t: Time since calving in days (DIM). a: Scale parameter (numerical). b: Shape parameter (numerical). c: Decay parameter (numerical).

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = a * t ** (b * c) * exp(-c * t).

def dijkstra_model(t, a, b, c, d):
287def dijkstra_model(t, a, b, c, d):
288    """Dijkstra lactation curve model.
289
290    Args:
291        t: Time since calving in days (DIM).
292        a: Scale parameter (numerical).
293        b: Growth parameter (numerical).
294        c: Saturation rate parameter (numerical).
295        d: Decay parameter (numerical).
296
297    Returns:
298        Predicted milk yield at `t`.
299
300    Notes:
301        Formula: `y(t) = a * exp((b * (1 - exp(-c * t)) / c) - d * t)`.
302    """
303    return a * np.exp((b * (1 - np.exp(-c * t)) / c) - d * t)

Dijkstra lactation curve model.

Args: t: Time since calving in days (DIM). a: Scale parameter (numerical). b: Growth parameter (numerical). c: Saturation rate parameter (numerical). d: Decay parameter (numerical).

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = a * exp((b * (1 - exp(-c * t)) / c) - d * t).

def emmans_model(t, a, b, c, d):
230def emmans_model(t, a, b, c, d):
231    """Emmans lactation curve model.
232
233    Args:
234        t: Time since calving in days (DIM).
235        a: Scale parameter (numerical).
236        b: Growth parameter (numerical).
237        c: Decay parameter (numerical).
238        d: Location parameter in nested exponential (numerical).
239
240    Returns:
241        Predicted milk yield at `t`.
242
243    Notes:
244        Formula: `y(t) = a * exp(-exp(d - b*t)) * exp(-c*t)`.
245    """
246    return a * np.exp(-np.exp(d - b * t)) * np.exp(-c * t)

Emmans lactation curve model.

Args: t: Time since calving in days (DIM). a: Scale parameter (numerical). b: Growth parameter (numerical). c: Decay parameter (numerical). d: Location parameter in nested exponential (numerical).

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = a * exp(-exp(d - b*t)) * exp(-c*t).

def fischer_model(t, a, b, c):
150def fischer_model(t, a, b, c):
151    """Fischer lactation curve model.
152
153    Args:
154        t: Time since calving in days (DIM).
155        a: Scale parameter (numerical).
156        b: Linear decline parameter (numerical).
157        c: Exponential decay parameter (numerical).
158
159    Returns:
160        Predicted milk yield at `t`.
161    """
162    return a - b * t - a * np.exp(-c * t)

Fischer lactation curve model.

Args: t: Time since calving in days (DIM). a: Scale parameter (numerical). b: Linear decline parameter (numerical). c: Exponential decay parameter (numerical).

Returns: Predicted milk yield at t.

def fit_lactation_curve( dim, milkrecordings, model='wood', fitting='frequentist', breed='H', parity=3, continent='USA', key=None):
367def fit_lactation_curve(
368    dim,
369    milkrecordings,
370    model="wood",
371    fitting="frequentist",
372    breed="H",
373    parity=3,
374    continent="USA",
375    key=None,
376):
377    """Fit lactation data to a lactation curve model and return predictions.
378
379    Depending on `fitting`:
380    - **frequentist**: Fits parameters using `minimize` and/or `curve_fit`
381      for the specified `model`, then predicts over DIM 1–305 (or up to `max(dim)` if greater).
382    - **bayesian**: (MilkBot only) Calls the MilkBot Bayesian fitting API and
383      returns predictions using the fitted parameters.
384
385    Args:
386        dim (Int): List/array of days in milk (DIM).
387        milkrecordings (Float): List/array of milk recordings (kg).
388        model (Str): Model name (lowercase), default "wood".
389            Supported for frequentist: "wood", "wilmink", "ali_schaeffer", "fischer", "milkbot".
390        fitting (Str): "frequentist" (default) or "bayesian".
391            Bayesian fitting is currently implemented only for "milkbot".
392        breed (Str): "H" (Holstein, default) or "J" (Jersey). Only used for Bayesian.
393        parity (Int): Lactation number; all parities >= 3 considered one group in priors (Bayesian).
394        continent (Str): Prior source for Bayesian, "USA" (default), "EU", or "CHEN".
395        key (Str | None): API key for MilkBot (required when `fitting == "bayesian"`).
396
397    Returns:
398        List/array of predicted milk yield for DIM 1–305 (or up to the maximum DIM if > 305).
399
400    Raises:
401        Exception: If an unknown model is requested (frequentist),
402            or Bayesian is requested for a non-MilkBot model,
403            or `key` is missing when Bayesian fitting is requested.
404
405    Notes:
406        Uses `validate_and_prepare_inputs` for input checking and normalization.
407    """
408    # check and prepare input
409    inputs = validate_and_prepare_inputs(
410        dim,
411        milkrecordings,
412        model=model,
413        fitting=fitting,
414        breed=breed,
415        parity=parity,
416        continent=continent,
417    )
418
419    dim = inputs.dim
420    milkrecordings = inputs.milkrecordings
421    model = inputs.model
422    fitting = inputs.fitting
423    breed = inputs.breed
424    parity = inputs.parity
425    continent = inputs.continent
426
427    if fitting == "frequentist":
428        if model == "wood":
429            a_w, b_w, c_w = get_lc_parameters(dim, milkrecordings, model)
430            if max(dim) > 305:
431                t_range = np.arange(1, (max(dim) + 1))
432                y_w = wood_model(t_range, a_w, b_w, c_w)
433            else:
434                t_range = np.arange(1, 306)
435                y_w = wood_model(t_range, a_w, b_w, c_w)
436            return y_w
437
438        elif model == "wilmink":
439            a_wil, b_wil, c_wil, k_wil = get_lc_parameters(dim, milkrecordings, model)
440            if max(dim) > 305:
441                t_range = np.arange(1, (max(dim) + 1))
442                y_wil = wilmink_model(t_range, a_wil, b_wil, c_wil, k_wil)
443            else:
444                t_range = np.arange(1, 306)
445                y_wil = wilmink_model(t_range, a_wil, b_wil, c_wil, k_wil)
446            return y_wil
447
448        elif model == "ali_schaeffer":
449            a_as, b_as, c_as, d_as, k_as = get_lc_parameters(dim, milkrecordings, model)
450            if max(dim) > 305:
451                t_range = np.arange(1, (max(dim) + 1))
452                y_as = ali_schaeffer_model(t_range, a_as, b_as, c_as, d_as, k_as)
453            else:
454                t_range = np.arange(1, 306)
455                y_as = ali_schaeffer_model(t_range, a_as, b_as, c_as, d_as, k_as)
456            return y_as
457
458        elif model == "fischer":
459            a_f, b_f, c_f = get_lc_parameters(dim, milkrecordings, model)
460            if max(dim) > 305:
461                t_range = np.arange(1, (max(dim) + 1))
462                y_f = fischer_model(t_range, a_f, b_f, c_f)
463            else:
464                t_range = np.arange(1, 306)
465                y_f = fischer_model(t_range, a_f, b_f, c_f)
466            return y_f
467
468        elif model == "milkbot":
469            a_mb, b_mb, c_mb, d_mb = get_lc_parameters(dim, milkrecordings, model)
470            if max(dim) > 305:
471                t_range = np.arange(1, (max(dim) + 1))
472            else:
473                t_range = np.arange(1, 306)
474
475            y_mb = milkbot_model(t_range, a_mb, b_mb, c_mb, d_mb)
476            return y_mb
477
478        else:
479            raise Exception("Unknown model")
480    else:
481        if model == "milkbot":
482            if key == None:
483                raise Exception("Key needed to use Bayesian fitting engine milkbot")
484            else:
485                parameters = bayesian_fit_milkbot_single_lactation(
486                    dim, milkrecordings, key, parity, breed, continent
487                )
488                if max(dim) > 305:
489                    t_range = np.arange(1, (max(dim) + 1))
490                    y_mb_bay = milkbot_model(
491                        t_range,
492                        parameters["scale"],
493                        parameters["ramp"],
494                        parameters["offset"],
495                        parameters["decay"],
496                    )
497                else:
498                    t_range = np.arange(1, 306)
499                    y_mb_bay = milkbot_model(
500                        t_range,
501                        parameters["scale"],
502                        parameters["ramp"],
503                        parameters["offset"],
504                        parameters["decay"],
505                    )
506                return y_mb_bay
507        else:
508            raise Exception("Bayesian fitting is currently only implemented for milkbot models")

Fit lactation data to a lactation curve model and return predictions.

Depending on fitting:

  • frequentist: Fits parameters using minimize and/or curve_fit for the specified model, then predicts over DIM 1–305 (or up to max(dim) if greater).
  • bayesian: (MilkBot only) Calls the MilkBot Bayesian fitting API and returns predictions using the fitted parameters.

Args: dim (Int): List/array of days in milk (DIM). milkrecordings (Float): List/array of milk recordings (kg). model (Str): Model name (lowercase), default "wood". Supported for frequentist: "wood", "wilmink", "ali_schaeffer", "fischer", "milkbot". fitting (Str): "frequentist" (default) or "bayesian". Bayesian fitting is currently implemented only for "milkbot". breed (Str): "H" (Holstein, default) or "J" (Jersey). Only used for Bayesian. parity (Int): Lactation number; all parities >= 3 considered one group in priors (Bayesian). continent (Str): Prior source for Bayesian, "USA" (default), "EU", or "CHEN". key (Str | None): API key for MilkBot (required when fitting == "bayesian").

Returns: List/array of predicted milk yield for DIM 1–305 (or up to the maximum DIM if > 305).

Raises: Exception: If an unknown model is requested (frequentist), or Bayesian is requested for a non-MilkBot model, or key is missing when Bayesian fitting is requested.

Notes: Uses validate_and_prepare_inputs for input checking and normalization.

def get_chen_priors(parity: int) -> dict:
640def get_chen_priors(parity: int) -> dict:
641    """
642    Return Chen et al. priors in MilkBot format.
643
644    Args:
645        parity: Lactation number (1, 2, or >= 3).
646
647    Returns:
648        Dictionary with parameter priors:
649        - "scale": {"mean", "sd"}
650        - "ramp": {"mean", "sd"}
651        - "decay": {"mean", "sd"}
652        - "offset": {"mean", "sd"}
653        - "seMilk": Standard error of milk measurement.
654        - "milkUnit": Unit string (e.g., "kg").
655    """
656    if parity == 1:
657        return {
658            "scale": {"mean": 34.11, "sd": 7},
659            "ramp": {"mean": 29.96, "sd": 3},
660            "decay": {"mean": 0.001835, "sd": 0.000738},
661            "offset": {"mean": -0.5, "sd": 0.02},
662            "seMilk": 4,
663            "milkUnit": "kg",
664        }
665
666    if parity == 2:
667        return {
668            "scale": {"mean": 44.26, "sd": 9.57},
669            "ramp": {"mean": 22.52, "sd": 3},
670            "decay": {"mean": 0.002745, "sd": 0.000979},
671            "offset": {"mean": -0.78, "sd": 0.07},
672            "seMilk": 4,
673            "milkUnit": "kg",
674        }
675
676    # parity >= 3
677    return {
678        "scale": {"mean": 48.41, "sd": 10.66},
679        "ramp": {"mean": 22.54, "sd": 8.724},
680        "decay": {"mean": 0.002997, "sd": 0.000972},
681        "offset": {"mean": 0.0, "sd": 0.03},
682        "seMilk": 4,
683        "milkUnit": "kg",
684    }

Return Chen et al. priors in MilkBot format.

Args: parity: Lactation number (1, 2, or >= 3).

Returns: Dictionary with parameter priors: - "scale": {"mean", "sd"} - "ramp": {"mean", "sd"} - "decay": {"mean", "sd"} - "offset": {"mean", "sd"} - "seMilk": Standard error of milk measurement. - "milkUnit": Unit string (e.g., "kg").

def get_lc_parameters(dim, milkrecordings, model='wood'):
569def get_lc_parameters(dim, milkrecordings, model="wood"):
570    """Fit lactation data to a model and return fitted parameters (frequentist).
571
572    Depending on `model`, this uses `scipy.optimize.minimize` and/or
573    `scipy.optimize.curve_fit` with model-specific starting values and bounds.
574
575    Args:
576        dim (int): List/array of DIM values.
577        milkrecordings (float): List/array of milk recordings (kg).
578        model (str): One of "wood", "wilmink", "ali_schaeffer", "fischer", "milkbot".
579
580    Returns:
581        Fitted parameters as floats, in alphabetical order by parameter name:
582            - wood: (a, b, c)
583            - wilmink: (a, b, c, k) with k fixed at -0.05
584            - ali_schaeffer: (a, b, c, d, k)
585            - fischer: (a, b, c)
586            - milkbot: (a, b, c, d)
587    """
588    # check and prepare input
589    inputs = validate_and_prepare_inputs(dim, milkrecordings, model=model)
590
591    dim = inputs.dim
592    milkrecordings = inputs.milkrecordings
593    model = inputs.model
594
595    if model == "wood":
596        wood_guess = [30, 0.2, 0.01]
597        wood_bounds = [(1, 100), (0.01, 1.5), (0.0001, 0.1)]
598        wood_res = minimize(
599            wood_objective, wood_guess, args=(dim, milkrecordings), bounds=wood_bounds
600        )
601        a_w, b_w, c_w = wood_res.x
602        return a_w, b_w, c_w
603
604    elif model == "wilmink":
605        wil_guess = [10, 0.1, 30]
606        wil_params, _ = curve_fit(wilmink_model, dim, milkrecordings, p0=wil_guess)
607        a_wil, b_wil, c_wil = wil_params
608        k_wil = -0.05  # set fixed
609        return a_wil, b_wil, c_wil, k_wil
610
611    elif model == "ali_schaeffer":
612        ali_schaeffer_guess = [10, 10, -5, 1, 1]
613        ali_schaeffer_params, _ = curve_fit(
614            ali_schaeffer_model, dim, milkrecordings, p0=ali_schaeffer_guess
615        )
616        a_as, b_as, c_as, d_as, k_as = ali_schaeffer_params
617        return a_as, b_as, c_as, d_as, k_as
618
619    elif model == "fischer":
620        fischer_guess = [max(milkrecordings), 0.01, 0.01]
621        fischer_bounds = [(0, 100), (0, 1), (0.0001, 1)]
622        fischer_params, _ = curve_fit(
623            fischer_model,
624            dim,
625            milkrecordings,
626            p0=fischer_guess,
627            bounds=np.transpose(fischer_bounds),
628        )
629        a_f, b_f, c_f = fischer_params
630        return a_f, b_f, c_f
631
632    elif model == "milkbot":
633        mb_guess = [max(milkrecordings), 20.0, -0.7, 0.022]
634        mb_bounds = [(1, 100), (1, 100), (-600, 300), (0.0001, 0.1)]
635        mb_res = minimize(milkbot_objective, mb_guess, args=(dim, milkrecordings), bounds=mb_bounds)
636        a_mb, b_mb, c_mb, d_mb = mb_res.x
637        return a_mb, b_mb, c_mb, d_mb

Fit lactation data to a model and return fitted parameters (frequentist).

Depending on model, this uses scipy.optimize.minimize and/or scipy.optimize.curve_fit with model-specific starting values and bounds.

Args: dim (int): List/array of DIM values. milkrecordings (float): List/array of milk recordings (kg). model (str): One of "wood", "wilmink", "ali_schaeffer", "fischer", "milkbot".

Returns: Fitted parameters as floats, in alphabetical order by parameter name: - wood: (a, b, c) - wilmink: (a, b, c, k) with k fixed at -0.05 - ali_schaeffer: (a, b, c, d, k) - fischer: (a, b, c) - milkbot: (a, b, c, d)

def get_lc_parameters_least_squares(dim, milkrecordings, model='milkbot'):
511def get_lc_parameters_least_squares(dim, milkrecordings, model="milkbot"):
512    """Fit lactation data and return model parameters (least squares; frequentist).
513
514    This helper uses `scipy.optimize.least_squares` to fit the MilkBot model with bounds,
515    and returns the fitted parameters. 
516    Currently implemented only for the MilkBot model, as it is more complex and benefits from the robust optimization approach. 
517    Other models can be fitted using `get_lc_parameters` with numerical optimisation, which is generally faster for simpler models.
518
519    Args:
520        dim (int): List/array of DIM values.
521        milkrecordings (float): List/array of milk recordings (kg).
522        model (str): Pre-defined model name; currently used with "milkbot".
523
524    Returns:
525        Parameters `(a, b, c, d)` as `np.float` in alphabetic order.
526
527    """
528    # check and prepare input
529    inputs = validate_and_prepare_inputs(dim, milkrecordings, model=model)
530
531    dim = inputs.dim
532    milkrecordings = inputs.milkrecordings
533    model = inputs.model
534
535    # ------------------------------
536    # Initial guess
537    # ------------------------------
538    a0 = np.max(milkrecordings)
539    b0 = 50.0
540    c0 = 30.0
541    d0 = 0.01
542    p0 = [a0, b0, c0, d0]
543
544    # ------------------------------
545    # Parameter bounds
546    # ------------------------------
547    lower = [np.max(milkrecordings) * 0.5, 1.0, -300.0, 1e-6]
548    upper = [np.max(milkrecordings) * 8.0, 400.0, 300.0, 1.0]
549
550    # ------------------------------
551    # Fit using least-squares
552    # ------------------------------
553    res = least_squares(
554        residuals_milkbot,
555        p0,
556        args=(dim, milkrecordings),
557        bounds=(lower, upper),
558        method="trf",  # trust region reflective, works well with bounds
559    )
560
561    # ------------------------------
562    # Extract parameters
563    # ------------------------------
564    a_mb, b_mb, c_mb, d_mb = res.x
565
566    return a_mb, b_mb, c_mb, d_mb

Fit lactation data and return model parameters (least squares; frequentist).

This helper uses scipy.optimize.least_squares to fit the MilkBot model with bounds, and returns the fitted parameters. Currently implemented only for the MilkBot model, as it is more complex and benefits from the robust optimization approach. Other models can be fitted using get_lc_parameters with numerical optimisation, which is generally faster for simpler models.

Args: dim (int): List/array of DIM values. milkrecordings (float): List/array of milk recordings (kg). model (str): Pre-defined model name; currently used with "milkbot".

Returns: Parameters (a, b, c, d) as np.float in alphabetic order.

def hayashi_model(t, a, b, c, d):
249def hayashi_model(t, a, b, c, d):
250    """Hayashi lactation curve model.
251
252    Args:
253        t: Time since calving in days (DIM).
254        a: Ratio parameter (> 0) (numerical).
255        b: Scale parameter (numerical).
256        c: Time constant for the first exponential term (numerical).
257        d: Parameter retained for compatibility with literature (unused in this expression).
258
259    Returns:
260        Predicted milk yield at `t`.
261
262    Notes:
263        Formula: `y(t) = b * (exp(-t / c) - exp(-t / (a * c)))`.
264    """
265    return b * (np.exp(-t / c) - np.exp(-t / (a * c)))

Hayashi lactation curve model.

Args: t: Time since calving in days (DIM). a: Ratio parameter (> 0) (numerical). b: Scale parameter (numerical). c: Time constant for the first exponential term (numerical). d: Parameter retained for compatibility with literature (unused in this expression).

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = b * (exp(-t / c) - exp(-t / (a * c))).

def milkbot_model(t, a, b, c, d):
71def milkbot_model(t, a, b, c, d):
72    """MilkBot lactation curve model.
73
74    Args:
75        t: Time since calving in days (DIM), scalar or array-like.
76        a: Scale; overall level of milk production.
77        b: Ramp; governs the rate of rise in early lactation.
78        c: Offset; small (usually minor) correction around the theoretical start of lactation.
79        d: Decay; exponential decline rate, evident in late lactation.
80
81    Returns:
82        Predicted milk yield at `t` (same shape as `t`).
83
84    Notes:
85        Formula: `y(t) = a * (1 - exp((c - t) / b) / 2) * exp(-d * t)`.
86    """
87    return a * (1 - np.exp((c - t) / b) / 2) * np.exp(-d * t)

MilkBot lactation curve model.

Args: t: Time since calving in days (DIM), scalar or array-like. a: Scale; overall level of milk production. b: Ramp; governs the rate of rise in early lactation. c: Offset; small (usually minor) correction around the theoretical start of lactation. d: Decay; exponential decline rate, evident in late lactation.

Returns: Predicted milk yield at t (same shape as t).

Notes: Formula: y(t) = a * (1 - exp((c - t) / b) / 2) * exp(-d * t).

def nelder_model(t, a, b, c):
194def nelder_model(t, a, b, c):
195    """Nelder lactation curve model.
196
197    Args:
198        t: Time since calving in days (DIM).
199        a: Denominator intercept (numerical).
200        b: Denominator linear coefficient (numerical).
201        c: Denominator quadratic coefficient (numerical).
202
203    Returns:
204        Predicted milk yield at `t`.
205
206    Notes:
207        Formula: `y(t) = t / (a + b*t + c*t**2)`.
208    """
209    return t / (a + b * t + c * t**2)

Nelder lactation curve model.

Args: t: Time since calving in days (DIM). a: Denominator intercept (numerical). b: Denominator linear coefficient (numerical). c: Denominator quadratic coefficient (numerical).

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = t / (a + b*t + c*t**2).

def prasad_model(t, a, b, c, d):
306def prasad_model(t, a, b, c, d):
307    """Prasad lactation curve model.
308
309    Args:
310        t: Time since calving in days (DIM).
311        a: Intercept-like parameter (numerical).
312        b: Linear coefficient (numerical).
313        c: Quadratic coefficient (numerical).
314        d: Inverse-time coefficient (numerical).
315
316    Returns:
317        Predicted milk yield at `t`.
318
319    Notes:
320        Formula: `y(t) = a + b*t + c*t**2 + d/t`.
321    """
322    return a + b * t + c * t**2 + d / t

Prasad lactation curve model.

Args: t: Time since calving in days (DIM). a: Intercept-like parameter (numerical). b: Linear coefficient (numerical). c: Quadratic coefficient (numerical). d: Inverse-time coefficient (numerical).

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = a + b*t + c*t**2 + d/t.

def rook_model(t, a, b, c, d):
268def rook_model(t, a, b, c, d):
269    """Rook lactation curve model.
270
271    Args:
272        t: Time since calving in days (DIM).
273        a: Scale parameter (numerical).
274        b: Shape parameter in rational term (numerical).
275        c: Offset parameter in rational term (numerical).
276        d: Exponential decay parameter (numerical).
277
278    Returns:
279        Predicted milk yield at `t`.
280
281    Notes:
282        Formula: `y(t) = a * (1 / (1 + b / (c + t))) * exp(-d * t)`.
283    """
284    return a * (1 / (1 + b / (c + t))) * np.exp(-d * t)

Rook lactation curve model.

Args: t: Time since calving in days (DIM). a: Scale parameter (numerical). b: Shape parameter in rational term (numerical). c: Offset parameter in rational term (numerical). d: Exponential decay parameter (numerical).

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = a * (1 / (1 + b / (c + t))) * exp(-d * t).

def sikka_model(t, a, b, c):
179def sikka_model(t, a, b, c):
180    """Sikka lactation curve model.
181
182    Args:
183        t: Time since calving in days (DIM).
184        a: Scale parameter (numerical).
185        b: Growth parameter (numerical).
186        c: Quadratic decay parameter (numerical).
187
188    Returns:
189        Predicted milk yield at `t`.
190    """
191    return a * np.exp(b * t - c * t**2)

Sikka lactation curve model.

Args: t: Time since calving in days (DIM). a: Scale parameter (numerical). b: Growth parameter (numerical). c: Quadratic decay parameter (numerical).

Returns: Predicted milk yield at t.

def wilmink_model(t, a, b, c, k=-0.05):
108def wilmink_model(t, a, b, c, k=-0.05):
109    """Wilmink lactation curve model.
110
111    Args:
112        t: Time since calving in days (DIM), scalar or array-like.
113        a: Intercept-like parameter (numerical).
114        b: Linear trend coefficient (numerical).
115        c: Exponential-term scale (numerical).
116        k: Fixed exponential rate (numerical), default -0.05.
117
118    Returns:
119        Predicted milk yield at `t`.
120
121    Notes:
122        Formula: `y(t) = a + b * t + c * exp(k * t)`.
123    """
124    t = np.asarray(t)
125    return a + b * t + c * np.exp(k * t)

Wilmink lactation curve model.

Args: t: Time since calving in days (DIM), scalar or array-like. a: Intercept-like parameter (numerical). b: Linear trend coefficient (numerical). c: Exponential-term scale (numerical). k: Fixed exponential rate (numerical), default -0.05.

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = a + b * t + c * exp(k * t).

def wood_model(t, a, b, c):
 90def wood_model(t, a, b, c):
 91    """Wood lactation curve model.
 92
 93    Args:
 94        t: Time since calving in days (DIM), scalar or array-like.
 95        a: Scale parameter (numerical).
 96        b: Shape parameter controlling rise (numerical).
 97        c: Decay parameter (numerical).
 98
 99    Returns:
100        Predicted milk yield at `t`.
101
102    Notes:
103        Formula: `y(t) = a * t**b * exp(-c * t)`.
104    """
105    return a * (t**b) * np.exp(-c * t)

Wood lactation curve model.

Args: t: Time since calving in days (DIM), scalar or array-like. a: Scale parameter (numerical). b: Shape parameter controlling rise (numerical). c: Decay parameter (numerical).

Returns: Predicted milk yield at t.

Notes: Formula: y(t) = a * t**b * exp(-c * t).

def calculate_characteristic( dim, milkrecordings, model='wood', characteristic='cumulative_milk_yield', fitting='frequentist', key=None, parity=3, breed='H', continent='USA', persistency_method='derived', lactation_length=305):
292def calculate_characteristic(dim, milkrecordings, model='wood', characteristic='cumulative_milk_yield',
293                             fitting='frequentist', key=None, parity=3, breed='H', continent='USA',
294                             persistency_method='derived', lactation_length=305):
295    """Evaluate a lactation curve characteristic from observed test-day data.
296
297    This function fits the requested model (frequentist or Bayesian via MilkBot),
298    retrieves model parameters, and evaluates the requested characteristic using the
299    symbolic expression (if available), falling back to numeric methods when needed.
300
301    Args:
302        dim (Int): Days in milk (DIM).
303        milkrecordings (Float): Milk recordings (kg or lbs) for each DIM.
304        model (str): Model name. Supported for this function:
305            'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
306        characteristic (str): One of 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
307        fitting (str): 'frequentist' (default) or 'bayesian'.
308        key (str | None): API key for MilkBot Bayesian fitting.
309        parity (Int): Parity of the cow; values above 3 are considered as 3 (Bayesian).
310        breed (str): 'H' (Holstein) or 'J' (Jersey) (Bayesian).
311        continent (str): 'USA', 'EU', or 'CHEN' (Bayesian).
312        persistency_method (str): 'derived' (average slope after peak; default) or 'literature'
313            (only for Wood and MilkBot).
314        lactation_length (Int | str): Horizon for persistency calculation:
315            305 (default), 'max' (use max DIM in data), or an integer.
316
317    Returns:
318        float: The requested characteristic value.
319
320    Raises:
321        Exception: If inputs are invalid, the model/characteristic is unsupported,
322            an API key is missing for Bayesian fitting, or the characteristic cannot be computed.
323    """
324    # check and prepare input
325    inputs = validate_and_prepare_inputs(
326        dim,
327        milkrecordings,
328        model=model,
329        fitting=fitting,
330        breed=breed,
331        parity=parity,
332        continent=continent,
333        persistency_method=persistency_method,
334        lactation_length=lactation_length
335    )
336
337    dim: ndarray = inputs.dim
338    milkrecordings: ndarray = inputs.milkrecordings
339    model: str | None = inputs.model
340    fitting: str | None = inputs.fitting
341    breed: str | None = inputs.breed
342    parity: int | None = inputs.parity
343    continent: str | None = inputs.continent
344    persistency_method: str | None = inputs.persistency_method
345    lactation_length: int | str | None = inputs.lactation_length
346
347    if model not in ['milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer']:
348        raise Exception('this function currently only works for the milkbot, wood, wilmink, ali_schaeffer and fischer models')
349
350    characteristic_options: list[str] = ["time_to_peak", "peak_yield", "cumulative_milk_yield", "persistency"]
351
352    if characteristic in characteristic_options:
353        if fitting == 'frequentist':
354
355            # Get fitted parameters from your fitting function
356            fitted_params = get_lc_parameters(dim, milkrecordings, model)
357
358            if characteristic != 'persistency':
359                # Try symbolic formula first
360                expr, params, fn = lactation_curve_characteristic_function(model, characteristic, lactation_length)
361                with np.errstate(divide='ignore', invalid='ignore'):  # get rid of warnings for invalid operations
362                    value = fn(*fitted_params)
363
364                # If symbolic formula fails or is invalid (use numeric approach)
365                if value is None or not np.isfinite(value) or (characteristic == 'time_to_peak' and value <= 0):
366                    if characteristic == 'time_to_peak':
367                        value = numeric_time_to_peak(dim, milkrecordings, model, fitting=fitting,
368                                                     key=key, parity=parity, breed=breed, continent=continent)
369                    elif characteristic == 'peak_yield':
370                        value = numeric_peak_yield(dim, milkrecordings, model, fitting=fitting,
371                                                   key=key, parity=parity, breed=breed, continent=continent)
372                    elif characteristic == 'cumulative_milk_yield':
373                        value = numeric_cumulative_yield(dim, milkrecordings, model, fitting=fitting,
374                                                         lactation_length=lactation_length,
375                                                         key=key, parity=parity, breed=breed, continent=continent)
376
377            else:
378                if persistency_method == 'derived':
379                    # find lactation length from data
380                    if lactation_length == 'max':
381                        lactation_length = max(dim)
382                    elif isinstance(lactation_length, int):
383                        lactation_length = lactation_length
384                    else:
385                        lactation_length = 305
386                    value = persistency_fitted_curve(dim, milkrecordings, model, fitting='frequentist', lactation_length=lactation_length)
387
388                else:
389                    if model == 'wood':
390                        value = persistency_wood(fitted_params[1], fitted_params[2])
391
392                    elif model == 'milkbot':
393                        value = persistency_milkbot(fitted_params[3])
394
395                    else:
396                        raise Exception(
397                            'Currently only the Wood model and MilkBot model have a separate model function from the literature integrated for persistency. if persistency="derived" is selected, persistency can be calculated for every model as the average slope of the lactation after the peak.')
398            try:
399                return float(value)
400            except ValueError:
401                raise Exception('Could not compute characteristic, possibly due to invalid fitted parameters')
402
403        else:
404            if model == 'milkbot':
405                if key is None:
406                    raise Exception('Key needed to use Bayesian fitting engine MilkBot')
407                else:
408                    fitted_params_bayes = bayesian_fit_milkbot_single_lactation(dim, milkrecordings, key, parity, breed, continent)
409                    fitted_params_bayes = fitted_params_bayes['scale'], fitted_params_bayes['ramp'], fitted_params_bayes['offset'], fitted_params_bayes['decay']
410
411                    if characteristic != 'persistency':
412                        # Get the symbolic expression and model parameters
413                        expr, param_symbols, fn = lactation_curve_characteristic_function(model, characteristic)
414                        value = fn(*fitted_params_bayes)
415
416                    else:
417                        if persistency_method == 'derived':
418                            # find lactation length from data
419                            if lactation_length == 'max':
420                                lactation_length = max(dim)
421                            elif isinstance(lactation_length, int):
422                                lactation_length = lactation_length
423                            else:
424                                lactation_length = 305
425                            value = persistency_fitted_curve(dim, milkrecordings, model, fitting='bayesian',
426                                                             key=key, parity=parity, breed=breed, continent=continent,
427                                                             lactation_length=lactation_length)
428
429                        else:
430                            value = persistency_milkbot(fitted_params_bayes[3])
431
432                    return float(value)
433            else:
434                raise Exception('Bayesian fitting is currently only implemented for MilkBot models')
435
436    else:
437        raise Exception('Unknown characteristic')

Evaluate a lactation curve characteristic from observed test-day data.

This function fits the requested model (frequentist or Bayesian via MilkBot), retrieves model parameters, and evaluates the requested characteristic using the symbolic expression (if available), falling back to numeric methods when needed.

Args: dim (Int): Days in milk (DIM). milkrecordings (Float): Milk recordings (kg or lbs) for each DIM. model (str): Model name. Supported for this function: 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer'. characteristic (str): One of 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'. fitting (str): 'frequentist' (default) or 'bayesian'. key (str | None): API key for MilkBot Bayesian fitting. parity (Int): Parity of the cow; values above 3 are considered as 3 (Bayesian). breed (str): 'H' (Holstein) or 'J' (Jersey) (Bayesian). continent (str): 'USA', 'EU', or 'CHEN' (Bayesian). persistency_method (str): 'derived' (average slope after peak; default) or 'literature' (only for Wood and MilkBot). lactation_length (Int | str): Horizon for persistency calculation: 305 (default), 'max' (use max DIM in data), or an integer.

Returns: float: The requested characteristic value.

Raises: Exception: If inputs are invalid, the model/characteristic is unsupported, an API key is missing for Bayesian fitting, or the characteristic cannot be computed.

def lactation_curve_characteristic_function(model='wood', characteristic=None, lactation_length=305):
 94def lactation_curve_characteristic_function(model='wood', characteristic=None, lactation_length=305):
 95    """Build (or fetch from cache) a symbolic expression and fast numeric function for an LCC.
 96
 97    This function derives the requested **lactation curve characteristic** for a given
 98    model using SymPy (derivative / root finding / integration). It returns the symbolic
 99    expression, the tuple of parameter symbols (argument order), and a lambdified
100    numeric function that can be evaluated with numerical parameters.
101
102    The symbolic derivation and integration are done only once per (model, characteristic)
103    and then **cached** for reuse.
104
105    Args:
106        model (str): Model name. Options:
107            'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer',
108            'brody', 'sikka', 'nelder', 'dhanoa', 'emmans', 'hayashi',
109            'rook', 'dijkstra', 'prasad'.
110        characteristic (str | None): Desired characteristic. Options:
111            'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
112            If `None` or unrecognized, a dict of all available characteristics is returned
113            (with `persistency` possibly `None` if derivation is not feasible).
114        lactation_length (int): Length of lactation in days used in persistency
115            computation (default 305).
116
117    Returns:
118        tuple:
119            expr: SymPy expression (or dict of expressions if `characteristic` is None).
120            params: Tuple of SymPy symbols for model parameters (argument order).
121            func: Lambdified numeric function `f(*params)` (or dict of functions).
122
123    Raises:
124        Exception: If the model is unknown, or if no positive real solution for
125            peak timing/yield exists where required.
126    """
127    # check the cache
128    storage = (model, characteristic)
129    if storage in _LCC_CACHE:
130        return (_LCC_CACHE[storage]["expr"],
131                _LCC_CACHE[storage]["params"],
132                _LCC_CACHE[storage]["func"])
133
134    # make sure model is all lowercase
135    model = model.lower()
136
137    # define functions
138    if model == 'brody':
139        # === BRODY 1 ===
140        a, b, k1, k2, t = symbols('a b k1 k2 t', real=True, positive=True)
141        function = a * exp(-k1 * t) - b * exp(-k2 * t)
142
143    elif model == 'sikka':
144        # === SIKKA ===
145        a, b, c, t = symbols('a b c t', real=True, positive=True)
146        function = a * exp(b * t - c * t ** 2)
147
148    elif model == 'fischer':
149        # === FISCHER ===
150        a, b, c, t = symbols('a b c t', real=True, positive=True)
151        function = a - b * t - a * exp(-c * t)
152
153    elif model == 'nelder':
154        # === NELDER ===
155        a, b, c, t = symbols('a b c t', real=True, positive=True)
156        function = t / (a + b * t + c * t ** 2)
157
158    elif model == 'wood':
159        # === WOOD ===
160        a, b, c, t = symbols('a b c t', real=True, positive=True)
161        function = a * t ** b * exp(-c * t)
162
163    elif model == 'dhanoa':
164        # === DHANOA ===
165        a, b, c, t = symbols('a b c t', real=True, positive=True)
166        function = a * t ** (b * c) * exp(-c * t)
167
168    elif model == 'emmans':
169        # === EMMANS ===
170        a, b, c, d, t = symbols('a b c d t')
171        function = a * exp(-exp(d - b * t)) * exp(-c * t)
172
173    elif model == 'ali_schaeffer':
174        # ====ALI=====
175        a, b, c, d, k, t = symbols('a b c d k t', real=True, positive=True)
176        function = (
177            a
178            + b * (t / 340)
179            + c * (t / 340) ** 2
180            + d * log(t / 340)
181            + k * log((340 / t) ** 2)
182        )
183
184    elif model == 'wilmink':
185        # === WILMINK ===
186        a, b, c, k, t = symbols('a b c k t', real=True, positive=True)
187        function = a + b * t + c * exp(-k * t)
188
189    elif model == 'hayashi':
190        # === HAYASHI ===
191        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
192        function = b * (exp(-t / c) - exp(-t / (a * c)))
193
194    elif model == 'rook':
195        # === ROOK ===
196        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
197        function = a * (1 / (1 + b / (c + t))) * exp(-d * t)
198
199    elif model == 'dijkstra':
200        # === DIJKSTRA ===
201        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
202        function = a * exp((b * (1 - exp(-c * t)) / c) - d * t)
203
204    elif model == 'prasad':
205        # === PRASAD ===
206        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
207        function = a + b * t + c * t ** 2 + d / t
208
209    elif model == 'milkbot':
210        # === MILKBOT ===
211        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
212        function = a * (1 - exp((c - t) / b) / 2) * exp(-d * t)
213
214    else:
215        raise Exception("Unknown model")
216
217    # find derivative
218    fdiff = diff(function, t)
219    # solve derivative for when it is zero to find the function for time of peak
220    tpeak = solve(fdiff, t)
221
222    # define the end of lactation
223    T = lactation_length  # days in milk
224
225    # Persistency = average slope after peak, does not work for all models so therefore try except
226    persistency = None
227    try:
228        if tpeak:
229            tmp = (function.subs(t, T) - function.subs(t, tpeak[0])) / (T - tpeak[0])
230            tmp = tmp.cancel()  # light simplification
231            if is_valid_sympy_expr(tmp):
232                persistency = tmp
233    except Exception:
234        persistency = None
235
236    if characteristic != 'cumulative_milk_yield':
237        if tpeak:  # Check if the list is not empty
238            peak_expr = simplify(function.subs(t, tpeak[0]))
239        else:
240            raise Exception('No positive real solution for time to peak and peak yield found')
241
242    # find function for cumulative milk yield over the first 305 days of the lactation
243    cum_my_expr = integrate(function, (t, 0, 305))
244
245    # Sorted parameter list (exclude t)
246    params = tuple(sorted([s for s in function.free_symbols if s.name != "t"], key=lambda x: x.name))
247
248    # ----------------------------------------------------
249    # Select requested characteristic
250    # ----------------------------------------------------
251    if characteristic == "time_to_peak":
252        expr = tpeak[0]
253    elif characteristic == "peak_yield":
254        expr = peak_expr
255    elif characteristic == "cumulative_milk_yield":
256        expr = cum_my_expr
257    elif characteristic == 'persistency':
258        if persistency is None:
259            raise Exception("Persistency could not be computed symbolically")
260        expr = persistency
261    else:
262        # Return all four if None or 'all'
263        expr = {
264            "time_to_peak": tpeak[0],
265            "peak_yield": peak_expr,
266            "persistency": persistency,  # possibly None
267            "cumulative_milk_yield": cum_my_expr,
268        }
269
270    # ----------------------------------------------------
271    # Build fast numeric function with lambdify
272    # ----------------------------------------------------
273    if isinstance(expr, dict):
274        func = {name: lambdify(params, ex, modules=["numpy", 'scipy'])
275                for name, ex in expr.items()
276                if ex is not None}
277    else:
278        func = lambdify(params, expr, modules=["numpy", 'scipy'])
279
280    # ----------------------------------------------------
281    # Store in cache
282    # ----------------------------------------------------
283    _LCC_CACHE[storage] = {
284        "expr": expr,
285        "params": params,
286        "func": func
287    }
288
289    return expr, params, func

Build (or fetch from cache) a symbolic expression and fast numeric function for an LCC.

This function derives the requested lactation curve characteristic for a given model using SymPy (derivative / root finding / integration). It returns the symbolic expression, the tuple of parameter symbols (argument order), and a lambdified numeric function that can be evaluated with numerical parameters.

The symbolic derivation and integration are done only once per (model, characteristic) and then cached for reuse.

Args: model (str): Model name. Options: 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer', 'brody', 'sikka', 'nelder', 'dhanoa', 'emmans', 'hayashi', 'rook', 'dijkstra', 'prasad'. characteristic (str | None): Desired characteristic. Options: 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'. If None or unrecognized, a dict of all available characteristics is returned (with persistency possibly None if derivation is not feasible). lactation_length (int): Length of lactation in days used in persistency computation (default 305).

Returns: tuple: expr: SymPy expression (or dict of expressions if characteristic is None). params: Tuple of SymPy symbols for model parameters (argument order). func: Lambdified numeric function f(*params) (or dict of functions).

Raises: Exception: If the model is unknown, or if no positive real solution for peak timing/yield exists where required.

def numeric_cumulative_yield( dim, milkrecordings, model, fitting='frequentist', lactation_length=305, **kwargs) -> float:
469def numeric_cumulative_yield(dim, milkrecordings, model, fitting='frequentist', lactation_length=305, **kwargs) -> float:
470    """Compute cumulative milk yield numerically over a given horizon.
471
472    Adds up the fitted milk yield for the first `lactation_length` days of the
473    predicted yield curve.
474
475    Args:
476        dim: DIM values.
477        milkrecordings: Milk recordings (kg).
478        model: Model name.
479        fitting: 'frequentist' or 'bayesian'.
480        lactation_length: Number of days to integrate (default 305).
481        **kwargs: Additional arguments forwarded to `fit_lactation_curve`.
482
483    Returns:
484        float: Cumulative milk yield over the specified horizon.
485    """
486    y = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, **kwargs)
487    return np.trapezoid(y[:lactation_length], dx=1)

Compute cumulative milk yield numerically over a given horizon.

Adds up the fitted milk yield for the first lactation_length days of the predicted yield curve.

Args: dim: DIM values. milkrecordings: Milk recordings (kg). model: Model name. fitting: 'frequentist' or 'bayesian'. lactation_length: Number of days to integrate (default 305). **kwargs: Additional arguments forwarded to fit_lactation_curve.

Returns: float: Cumulative milk yield over the specified horizon.

def numeric_peak_yield( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', continent='USA') -> float:
490def numeric_peak_yield(dim, milkrecordings, model, fitting='frequentist', key=None, parity=3,
491                       breed='H', continent='USA') -> float:
492    """Compute peak yield numerically from the fitted curve.
493
494    Args:
495        dim: DIM values.
496        milkrecordings: Milk recordings (kg).
497        model: Model name.
498        fitting: 'frequentist' or 'bayesian'.
499        key: API key for MilkBot (Bayesian).
500        parity: Parity for Bayesian fitting.
501        breed: Breed for Bayesian fitting ('H' or 'J').
502        continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').
503
504    Returns:
505        float: Maximum predicted milk yield.
506    """
507    # Fit the curve to get predicted milk yields
508    yields = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, key=key, parity=parity, breed=breed, continent=continent)
509    # Find the peak yield
510    peak_yield = np.max(yields)
511    return peak_yield

Compute peak yield numerically from the fitted curve.

Args: dim: DIM values. milkrecordings: Milk recordings (kg). model: Model name. fitting: 'frequentist' or 'bayesian'. key: API key for MilkBot (Bayesian). parity: Parity for Bayesian fitting. breed: Breed for Bayesian fitting ('H' or 'J'). continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').

Returns: float: Maximum predicted milk yield.

def numeric_time_to_peak( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', continent='USA') -> int:
441def numeric_time_to_peak(dim, milkrecordings, model, fitting='frequentist', key=None, parity=3,
442                         breed='H', continent='USA') -> int:
443    """Compute time to peak using a numeric approach.
444
445    Fits the curve (frequentist or Bayesian), evaluates the predicted yields,
446    and returns the DIM corresponding to the maximum predicted yield.
447
448    Args:
449        dim: DIM values.
450        milkrecordings: Milk recordings (kg).
451        model: Model name.
452        fitting: 'frequentist' or 'bayesian'.
453        key: API key for MilkBot (Bayesian).
454        parity: Parity for Bayesian fitting.
455        breed: Breed for Bayesian fitting ('H' or 'J').
456        continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').
457
458    Returns:
459        int: DIM at which the curve attains its maximum (1-indexed).
460    """
461    # Fit the curve to get predicted milk yields
462    yields = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, key=key, parity=parity, breed=breed, continent=continent)
463    # Find the index of the peak yield
464    peak_idx = np.argmax(yields)
465    # Return the corresponding DIM
466    return int(peak_idx + 1)  # +1 because DIM starts at 1, not 0

Compute time to peak using a numeric approach.

Fits the curve (frequentist or Bayesian), evaluates the predicted yields, and returns the DIM corresponding to the maximum predicted yield.

Args: dim: DIM values. milkrecordings: Milk recordings (kg). model: Model name. fitting: 'frequentist' or 'bayesian'. key: API key for MilkBot (Bayesian). parity: Parity for Bayesian fitting. breed: Breed for Bayesian fitting ('H' or 'J'). continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').

Returns: int: DIM at which the curve attains its maximum (1-indexed).

def persistency_fitted_curve( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', continent='USA', lactation_length=305) -> float:
539def persistency_fitted_curve(dim, milkrecordings, model, fitting='frequentist', key=None, parity=3,
540                             breed='H', continent='USA', lactation_length=305) -> float:
541    """Persistency as the average slope after peak until end of lactation (numeric).
542
543    This is the default approach because symbolic derivation is not feasible for
544    all models. It computes:
545        `(yield_at_end - yield_at_peak) / (lactation_length - time_to_peak)`
546
547    Args:
548        dim: DIM values.
549        milkrecordings: Milk recordings (kg).
550        model: Model name. Options include 'milkbot' (Bayesian or frequentist),
551            'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
552        fitting: 'frequentist' or 'bayesian'.
553        key: API key (only for Bayesian fitting).
554        parity: Parity of the cow; values above 3 treated as 3 (Bayesian).
555        breed: 'H' or 'J' (Bayesian).
556        continent: 'USA', 'EU', or 'CHEN' (Bayesian).
557        lactation_length (int | str): 305 (default), 'max' (use max DIM), or integer.
558
559    Returns:
560        float: Average slope after the peak until end of lactation.
561    """
562    # calculate time to peak
563    t_peak = numeric_time_to_peak(dim, milkrecordings, model, fitting=fitting, key=key, parity=parity, breed=breed, continent=continent)
564
565    # determine lactation length
566    if lactation_length == 'max':
567        lactation_length = max(dim)
568    elif isinstance(lactation_length, int):
569        lactation_length = lactation_length
570    else:
571        lactation_length = 305
572
573    # calculate milk yield at peak
574    yields = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, key=key, parity=parity, breed=breed, continent=continent)
575    peak_yield = yields[int(t_peak) - 1]  # -1 to prevent index error
576    # calculate milk yield at end of lactation
577    end_yield = yields[int(lactation_length) - 1]  # -1 to prevent index error
578
579    # calculate persistency
580    persistency = (end_yield - peak_yield) / (lactation_length - t_peak)
581    return persistency

Persistency as the average slope after peak until end of lactation (numeric).

This is the default approach because symbolic derivation is not feasible for all models. It computes: (yield_at_end - yield_at_peak) / (lactation_length - time_to_peak)

Args: dim: DIM values. milkrecordings: Milk recordings (kg). model: Model name. Options include 'milkbot' (Bayesian or frequentist), 'wood', 'wilmink', 'ali_schaeffer', 'fischer'. fitting: 'frequentist' or 'bayesian'. key: API key (only for Bayesian fitting). parity: Parity of the cow; values above 3 treated as 3 (Bayesian). breed: 'H' or 'J' (Bayesian). continent: 'USA', 'EU', or 'CHEN' (Bayesian). lactation_length (int | str): 305 (default), 'max' (use max DIM), or integer.

Returns: float: Average slope after the peak until end of lactation.

def persistency_milkbot(d) -> float:
527def persistency_milkbot(d) -> float:
528    """Persistency from the MilkBot model (Ehrlich, 2013): `Persistency = 0.693 / d`.
529
530    Args:
531        d (float): Parameter `d` of the MilkBot model.
532
533    Returns:
534        float: Persistency value from the MilkBot formula.
535    """
536    return 0.693 / d

Persistency from the MilkBot model (Ehrlich, 2013): Persistency = 0.693 / d.

Args: d (float): Parameter d of the MilkBot model.

Returns: float: Persistency value from the MilkBot formula.

def persistency_wood(b, c) -> float:
514def persistency_wood(b, c) -> float:
515    """Persistency from Wood et al. (1984): `Persistency = -(b + 1) * ln(c)`.
516
517    Args:
518        b (float): Parameter `b` of the Wood model.
519        c (float): Parameter `c` of the Wood model.
520
521    Returns:
522        float: Persistency value from the Wood formula.
523    """
524    return float(-(b + 1) * ln(c))

Persistency from Wood et al. (1984): Persistency = -(b + 1) * ln(c).

Args: b (float): Parameter b of the Wood model. c (float): Parameter c of the Wood model.

Returns: float: Persistency value from the Wood formula.

def test_interval_method( df, days_in_milk_col=None, milking_yield_col=None, test_id_col=None, default_test_id=1):
 39def test_interval_method(
 40    df, days_in_milk_col=None, milking_yield_col=None, test_id_col=None, default_test_id=1
 41):
 42    """Compute 305-day total milk yield using the ICAR Test Interval Method.
 43
 44    The method applies:
 45    - Linear projection from calving to the first test day,
 46    - Trapezoidal integration between consecutive test days,
 47    - Linear projection from the last test day to DIM=305.
 48
 49    Args:
 50        df (pd.DataFrame): Input DataFrame with at least DaysInMilk, MilkingYield,
 51            and (optionally) TestId columns (names can be provided via arguments
 52            or matched via known aliases, case-insensitive).
 53        days_in_milk_col (str | None): Optional column name override for DaysInMilk.
 54        milking_yield_col (str | None): Optional column name override for MilkingYield.
 55        test_id_col (str | None): Optional column name override for TestId.
 56        default_test_id (Any): If TestId is missing, a new `TestId` column is created
 57            with this value.
 58
 59    Returns:
 60        pd.DataFrame: Two-column DataFrame with
 61            - "TestId": identifier per lactation,
 62            - "Total305Yield": computed total milk yield over 305 days.
 63
 64    Raises:
 65        ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found.
 66
 67    Notes:
 68        - Records with DIM > 305 are dropped before computation.
 69        - At least two data points per TestId are required for trapezoidal integration;
 70          otherwise the lactation is skipped.
 71    """
 72    result = []
 73
 74    # create a bit more flexibility in naming the columns and when only one lactation is put in without a testid
 75
 76    # Define accepted variations for each logical column
 77    # Accepted aliases (case-insensitive)
 78    aliases = {
 79        "DaysInMilk": ["daysinmilk", "dim", "testday"],
 80        "MilkingYield": ["milkingyield", "testdaymilkyield", "milkyield", "yield"],
 81        "TestId": ["animalid", "testid", "id"],
 82    }
 83
 84    # Create a mapping from lowercase to actual column names
 85    col_lookup = {col.lower(): col for col in df.columns}
 86
 87    def get_col_name(override, possible_names):
 88        """Return a matching actual column name from `df`, or `None` if not found.
 89
 90        Args:
 91            override (str | None): Explicit column name provided by the user.
 92            possible_names (list[str]): List of acceptable aliases (lowercase).
 93
 94        Returns:
 95            str | None: The actual column name present in `df`, or `None` if no match.
 96        """
 97        if override:
 98            return col_lookup.get(override.lower())
 99        for name in possible_names:
100            if name in col_lookup:
101                return col_lookup[name]
102        return None
103
104    # Resolve columns
105    dim_col = get_col_name(days_in_milk_col, aliases["DaysInMilk"])
106    if not dim_col:
107        raise ValueError("No DaysInMilk column found in DataFrame.")
108
109    my_col = get_col_name(milking_yield_col, aliases["MilkingYield"])
110    if not my_col:
111        raise ValueError("No MilkingYield column found in DataFrame.")
112
113    id_col = get_col_name(test_id_col, aliases["TestId"])
114    if not id_col:
115        id_col = "TestId"
116        df[id_col] = default_test_id
117
118    # Filter out records where Day > 305
119    df = df[df[dim_col] <= 305]
120
121    # Iterate over each lactation
122    for lactation in df[id_col].unique():
123        lactation_df = df[df[id_col] == lactation].copy()
124
125        # Sort by DaysInMilk ascending
126        lactation_df.sort_values(by=dim_col, ascending=True, inplace=True)
127
128        if len(lactation_df) < 2:
129            print(f"Skipping TestId {lactation}: not enough data points for interpolation.")
130            continue
131
132        # Start and end points
133        start = lactation_df.iloc[0]
134        end = lactation_df.iloc[-1]
135
136        # Start contribution
137        MY0 = start[dim_col] * start[my_col]
138
139        # End contribution
140        MYend = (306 - end[dim_col]) * end[my_col]
141
142        # Intermediate trapezoidal contributions
143        lactation_df["width"] = lactation_df[dim_col].diff().shift(-1)
144        lactation_df["avg_yield"] = (lactation_df[my_col] + lactation_df[my_col].shift(-1)) / 2
145        lactation_df["trapezoid_area"] = lactation_df["width"] * lactation_df["avg_yield"]
146
147        total_intermediate = lactation_df["trapezoid_area"].sum()
148
149        total_yield = MY0 + total_intermediate + MYend
150        result.append((lactation, total_yield))
151
152    return pd.DataFrame(result, columns=["TestId", "Total305Yield"])

Compute 305-day total milk yield using the ICAR Test Interval Method.

The method applies:

  • Linear projection from calving to the first test day,
  • Trapezoidal integration between consecutive test days,
  • Linear projection from the last test day to DIM=305.

Args: df (pd.DataFrame): Input DataFrame with at least DaysInMilk, MilkingYield, and (optionally) TestId columns (names can be provided via arguments or matched via known aliases, case-insensitive). days_in_milk_col (str | None): Optional column name override for DaysInMilk. milking_yield_col (str | None): Optional column name override for MilkingYield. test_id_col (str | None): Optional column name override for TestId. default_test_id (Any): If TestId is missing, a new TestId column is created with this value.

Returns: pd.DataFrame: Two-column DataFrame with - "TestId": identifier per lactation, - "Total305Yield": computed total milk yield over 305 days.

Raises: ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found.

Notes: - Records with DIM > 305 are dropped before computation. - At least two data points per TestId are required for trapezoidal integration; otherwise the lactation is skipped.