lactationcurve.characteristics

 1from .lactation_curve_characteristics import (
 2    calculate_characteristic,
 3    lactation_curve_characteristic_function,
 4    numeric_cumulative_yield,
 5    numeric_peak_yield,
 6    numeric_time_to_peak,
 7    persistency_fitted_curve,
 8    persistency_milkbot,
 9    persistency_wood,
10)
11from .test_interval_method import test_interval_method
12
13__all__ = [
14    "calculate_characteristic",
15    "lactation_curve_characteristic_function",
16    "numeric_cumulative_yield",
17    "numeric_peak_yield",
18    "numeric_time_to_peak",
19    "persistency_fitted_curve",
20    "persistency_milkbot",
21    "persistency_wood",
22    "test_interval_method",
23]
def calculate_characteristic( dim, milkrecordings, model='wood', characteristic='cumulative_milk_yield', fitting='frequentist', key=None, parity=3, breed='H', continent='USA', persistency_method='derived', lactation_length=305):
292def calculate_characteristic(dim, milkrecordings, model='wood', characteristic='cumulative_milk_yield',
293                             fitting='frequentist', key=None, parity=3, breed='H', continent='USA',
294                             persistency_method='derived', lactation_length=305):
295    """Evaluate a lactation curve characteristic from observed test-day data.
296
297    This function fits the requested model (frequentist or Bayesian via MilkBot),
298    retrieves model parameters, and evaluates the requested characteristic using the
299    symbolic expression (if available), falling back to numeric methods when needed.
300
301    Args:
302        dim (Int): Days in milk (DIM).
303        milkrecordings (Float): Milk recordings (kg or lbs) for each DIM.
304        model (str): Model name. Supported for this function:
305            'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
306        characteristic (str): One of 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
307        fitting (str): 'frequentist' (default) or 'bayesian'.
308        key (str | None): API key for MilkBot Bayesian fitting.
309        parity (Int): Parity of the cow; values above 3 are considered as 3 (Bayesian).
310        breed (str): 'H' (Holstein) or 'J' (Jersey) (Bayesian).
311        continent (str): 'USA', 'EU', or 'CHEN' (Bayesian).
312        persistency_method (str): 'derived' (average slope after peak; default) or 'literature'
313            (only for Wood and MilkBot).
314        lactation_length (Int | str): Horizon for persistency calculation:
315            305 (default), 'max' (use max DIM in data), or an integer.
316
317    Returns:
318        float: The requested characteristic value.
319
320    Raises:
321        Exception: If inputs are invalid, the model/characteristic is unsupported,
322            an API key is missing for Bayesian fitting, or the characteristic cannot be computed.
323    """
324    # check and prepare input
325    inputs = validate_and_prepare_inputs(
326        dim,
327        milkrecordings,
328        model=model,
329        fitting=fitting,
330        breed=breed,
331        parity=parity,
332        continent=continent,
333        persistency_method=persistency_method,
334        lactation_length=lactation_length
335    )
336
337    dim: ndarray = inputs.dim
338    milkrecordings: ndarray = inputs.milkrecordings
339    model: str | None = inputs.model
340    fitting: str | None = inputs.fitting
341    breed: str | None = inputs.breed
342    parity: int | None = inputs.parity
343    continent: str | None = inputs.continent
344    persistency_method: str | None = inputs.persistency_method
345    lactation_length: int | str | None = inputs.lactation_length
346
347    if model not in ['milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer']:
348        raise Exception('this function currently only works for the milkbot, wood, wilmink, ali_schaeffer and fischer models')
349
350    characteristic_options: list[str] = ["time_to_peak", "peak_yield", "cumulative_milk_yield", "persistency"]
351
352    if characteristic in characteristic_options:
353        if fitting == 'frequentist':
354
355            # Get fitted parameters from your fitting function
356            fitted_params = get_lc_parameters(dim, milkrecordings, model)
357
358            if characteristic != 'persistency':
359                # Try symbolic formula first
360                expr, params, fn = lactation_curve_characteristic_function(model, characteristic, lactation_length)
361                with np.errstate(divide='ignore', invalid='ignore'):  # get rid of warnings for invalid operations
362                    value = fn(*fitted_params)
363
364                # If symbolic formula fails or is invalid (use numeric approach)
365                if value is None or not np.isfinite(value) or (characteristic == 'time_to_peak' and value <= 0):
366                    if characteristic == 'time_to_peak':
367                        value = numeric_time_to_peak(dim, milkrecordings, model, fitting=fitting,
368                                                     key=key, parity=parity, breed=breed, continent=continent)
369                    elif characteristic == 'peak_yield':
370                        value = numeric_peak_yield(dim, milkrecordings, model, fitting=fitting,
371                                                   key=key, parity=parity, breed=breed, continent=continent)
372                    elif characteristic == 'cumulative_milk_yield':
373                        value = numeric_cumulative_yield(dim, milkrecordings, model, fitting=fitting,
374                                                         lactation_length=lactation_length,
375                                                         key=key, parity=parity, breed=breed, continent=continent)
376
377            else:
378                if persistency_method == 'derived':
379                    # find lactation length from data
380                    if lactation_length == 'max':
381                        lactation_length = max(dim)
382                    elif isinstance(lactation_length, int):
383                        lactation_length = lactation_length
384                    else:
385                        lactation_length = 305
386                    value = persistency_fitted_curve(dim, milkrecordings, model, fitting='frequentist', lactation_length=lactation_length)
387
388                else:
389                    if model == 'wood':
390                        value = persistency_wood(fitted_params[1], fitted_params[2])
391
392                    elif model == 'milkbot':
393                        value = persistency_milkbot(fitted_params[3])
394
395                    else:
396                        raise Exception(
397                            'Currently only the Wood model and MilkBot model have a separate model function from the literature integrated for persistency. if persistency="derived" is selected, persistency can be calculated for every model as the average slope of the lactation after the peak.')
398            try:
399                return float(value)
400            except ValueError:
401                raise Exception('Could not compute characteristic, possibly due to invalid fitted parameters')
402
403        else:
404            if model == 'milkbot':
405                if key is None:
406                    raise Exception('Key needed to use Bayesian fitting engine MilkBot')
407                else:
408                    fitted_params_bayes = bayesian_fit_milkbot_single_lactation(dim, milkrecordings, key, parity, breed, continent)
409                    fitted_params_bayes = fitted_params_bayes['scale'], fitted_params_bayes['ramp'], fitted_params_bayes['offset'], fitted_params_bayes['decay']
410
411                    if characteristic != 'persistency':
412                        # Get the symbolic expression and model parameters
413                        expr, param_symbols, fn = lactation_curve_characteristic_function(model, characteristic)
414                        value = fn(*fitted_params_bayes)
415
416                    else:
417                        if persistency_method == 'derived':
418                            # find lactation length from data
419                            if lactation_length == 'max':
420                                lactation_length = max(dim)
421                            elif isinstance(lactation_length, int):
422                                lactation_length = lactation_length
423                            else:
424                                lactation_length = 305
425                            value = persistency_fitted_curve(dim, milkrecordings, model, fitting='bayesian',
426                                                             key=key, parity=parity, breed=breed, continent=continent,
427                                                             lactation_length=lactation_length)
428
429                        else:
430                            value = persistency_milkbot(fitted_params_bayes[3])
431
432                    return float(value)
433            else:
434                raise Exception('Bayesian fitting is currently only implemented for MilkBot models')
435
436    else:
437        raise Exception('Unknown characteristic')

Evaluate a lactation curve characteristic from observed test-day data.

This function fits the requested model (frequentist or Bayesian via MilkBot), retrieves model parameters, and evaluates the requested characteristic using the symbolic expression (if available), falling back to numeric methods when needed.

Arguments:
  • dim (Int): Days in milk (DIM).
  • milkrecordings (Float): Milk recordings (kg or lbs) for each DIM.
  • model (str): Model name. Supported for this function: 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
  • characteristic (str): One of 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
  • fitting (str): 'frequentist' (default) or 'bayesian'.
  • key (str | None): API key for MilkBot Bayesian fitting.
  • parity (Int): Parity of the cow; values above 3 are considered as 3 (Bayesian).
  • breed (str): 'H' (Holstein) or 'J' (Jersey) (Bayesian).
  • continent (str): 'USA', 'EU', or 'CHEN' (Bayesian).
  • persistency_method (str): 'derived' (average slope after peak; default) or 'literature' (only for Wood and MilkBot).
  • lactation_length (Int | str): Horizon for persistency calculation: 305 (default), 'max' (use max DIM in data), or an integer.
Returns:

float: The requested characteristic value.

Raises:
  • Exception: If inputs are invalid, the model/characteristic is unsupported, an API key is missing for Bayesian fitting, or the characteristic cannot be computed.
def lactation_curve_characteristic_function(model='wood', characteristic=None, lactation_length=305):
 94def lactation_curve_characteristic_function(model='wood', characteristic=None, lactation_length=305):
 95    """Build (or fetch from cache) a symbolic expression and fast numeric function for an LCC.
 96
 97    This function derives the requested **lactation curve characteristic** for a given
 98    model using SymPy (derivative / root finding / integration). It returns the symbolic
 99    expression, the tuple of parameter symbols (argument order), and a lambdified
100    numeric function that can be evaluated with numerical parameters.
101
102    The symbolic derivation and integration are done only once per (model, characteristic)
103    and then **cached** for reuse.
104
105    Args:
106        model (str): Model name. Options:
107            'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer',
108            'brody', 'sikka', 'nelder', 'dhanoa', 'emmans', 'hayashi',
109            'rook', 'dijkstra', 'prasad'.
110        characteristic (str | None): Desired characteristic. Options:
111            'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
112            If `None` or unrecognized, a dict of all available characteristics is returned
113            (with `persistency` possibly `None` if derivation is not feasible).
114        lactation_length (int): Length of lactation in days used in persistency
115            computation (default 305).
116
117    Returns:
118        tuple:
119            expr: SymPy expression (or dict of expressions if `characteristic` is None).
120            params: Tuple of SymPy symbols for model parameters (argument order).
121            func: Lambdified numeric function `f(*params)` (or dict of functions).
122
123    Raises:
124        Exception: If the model is unknown, or if no positive real solution for
125            peak timing/yield exists where required.
126    """
127    # check the cache
128    storage = (model, characteristic)
129    if storage in _LCC_CACHE:
130        return (_LCC_CACHE[storage]["expr"],
131                _LCC_CACHE[storage]["params"],
132                _LCC_CACHE[storage]["func"])
133
134    # make sure model is all lowercase
135    model = model.lower()
136
137    # define functions
138    if model == 'brody':
139        # === BRODY 1 ===
140        a, b, k1, k2, t = symbols('a b k1 k2 t', real=True, positive=True)
141        function = a * exp(-k1 * t) - b * exp(-k2 * t)
142
143    elif model == 'sikka':
144        # === SIKKA ===
145        a, b, c, t = symbols('a b c t', real=True, positive=True)
146        function = a * exp(b * t - c * t ** 2)
147
148    elif model == 'fischer':
149        # === FISCHER ===
150        a, b, c, t = symbols('a b c t', real=True, positive=True)
151        function = a - b * t - a * exp(-c * t)
152
153    elif model == 'nelder':
154        # === NELDER ===
155        a, b, c, t = symbols('a b c t', real=True, positive=True)
156        function = t / (a + b * t + c * t ** 2)
157
158    elif model == 'wood':
159        # === WOOD ===
160        a, b, c, t = symbols('a b c t', real=True, positive=True)
161        function = a * t ** b * exp(-c * t)
162
163    elif model == 'dhanoa':
164        # === DHANOA ===
165        a, b, c, t = symbols('a b c t', real=True, positive=True)
166        function = a * t ** (b * c) * exp(-c * t)
167
168    elif model == 'emmans':
169        # === EMMANS ===
170        a, b, c, d, t = symbols('a b c d t')
171        function = a * exp(-exp(d - b * t)) * exp(-c * t)
172
173    elif model == 'ali_schaeffer':
174        # ====ALI=====
175        a, b, c, d, k, t = symbols('a b c d k t', real=True, positive=True)
176        function = (
177            a
178            + b * (t / 340)
179            + c * (t / 340) ** 2
180            + d * log(t / 340)
181            + k * log((340 / t) ** 2)
182        )
183
184    elif model == 'wilmink':
185        # === WILMINK ===
186        a, b, c, k, t = symbols('a b c k t', real=True, positive=True)
187        function = a + b * t + c * exp(-k * t)
188
189    elif model == 'hayashi':
190        # === HAYASHI ===
191        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
192        function = b * (exp(-t / c) - exp(-t / (a * c)))
193
194    elif model == 'rook':
195        # === ROOK ===
196        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
197        function = a * (1 / (1 + b / (c + t))) * exp(-d * t)
198
199    elif model == 'dijkstra':
200        # === DIJKSTRA ===
201        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
202        function = a * exp((b * (1 - exp(-c * t)) / c) - d * t)
203
204    elif model == 'prasad':
205        # === PRASAD ===
206        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
207        function = a + b * t + c * t ** 2 + d / t
208
209    elif model == 'milkbot':
210        # === MILKBOT ===
211        a, b, c, d, t = symbols('a b c d t', real=True, positive=True)
212        function = a * (1 - exp((c - t) / b) / 2) * exp(-d * t)
213
214    else:
215        raise Exception("Unknown model")
216
217    # find derivative
218    fdiff = diff(function, t)
219    # solve derivative for when it is zero to find the function for time of peak
220    tpeak = solve(fdiff, t)
221
222    # define the end of lactation
223    T = lactation_length  # days in milk
224
225    # Persistency = average slope after peak, does not work for all models so therefore try except
226    persistency = None
227    try:
228        if tpeak:
229            tmp = (function.subs(t, T) - function.subs(t, tpeak[0])) / (T - tpeak[0])
230            tmp = tmp.cancel()  # light simplification
231            if is_valid_sympy_expr(tmp):
232                persistency = tmp
233    except Exception:
234        persistency = None
235
236    if characteristic != 'cumulative_milk_yield':
237        if tpeak:  # Check if the list is not empty
238            peak_expr = simplify(function.subs(t, tpeak[0]))
239        else:
240            raise Exception('No positive real solution for time to peak and peak yield found')
241
242    # find function for cumulative milk yield over the first 305 days of the lactation
243    cum_my_expr = integrate(function, (t, 0, 305))
244
245    # Sorted parameter list (exclude t)
246    params = tuple(sorted([s for s in function.free_symbols if s.name != "t"], key=lambda x: x.name))
247
248    # ----------------------------------------------------
249    # Select requested characteristic
250    # ----------------------------------------------------
251    if characteristic == "time_to_peak":
252        expr = tpeak[0]
253    elif characteristic == "peak_yield":
254        expr = peak_expr
255    elif characteristic == "cumulative_milk_yield":
256        expr = cum_my_expr
257    elif characteristic == 'persistency':
258        if persistency is None:
259            raise Exception("Persistency could not be computed symbolically")
260        expr = persistency
261    else:
262        # Return all four if None or 'all'
263        expr = {
264            "time_to_peak": tpeak[0],
265            "peak_yield": peak_expr,
266            "persistency": persistency,  # possibly None
267            "cumulative_milk_yield": cum_my_expr,
268        }
269
270    # ----------------------------------------------------
271    # Build fast numeric function with lambdify
272    # ----------------------------------------------------
273    if isinstance(expr, dict):
274        func = {name: lambdify(params, ex, modules=["numpy", 'scipy'])
275                for name, ex in expr.items()
276                if ex is not None}
277    else:
278        func = lambdify(params, expr, modules=["numpy", 'scipy'])
279
280    # ----------------------------------------------------
281    # Store in cache
282    # ----------------------------------------------------
283    _LCC_CACHE[storage] = {
284        "expr": expr,
285        "params": params,
286        "func": func
287    }
288
289    return expr, params, func

Build (or fetch from cache) a symbolic expression and fast numeric function for an LCC.

This function derives the requested lactation curve characteristic for a given model using SymPy (derivative / root finding / integration). It returns the symbolic expression, the tuple of parameter symbols (argument order), and a lambdified numeric function that can be evaluated with numerical parameters.

The symbolic derivation and integration are done only once per (model, characteristic) and then cached for reuse.

Arguments:
  • model (str): Model name. Options: 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer', 'brody', 'sikka', 'nelder', 'dhanoa', 'emmans', 'hayashi', 'rook', 'dijkstra', 'prasad'.
  • characteristic (str | None): Desired characteristic. Options: 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'. If None or unrecognized, a dict of all available characteristics is returned (with persistency possibly None if derivation is not feasible).
  • lactation_length (int): Length of lactation in days used in persistency computation (default 305).
Returns:

tuple: expr: SymPy expression (or dict of expressions if characteristic is None). params: Tuple of SymPy symbols for model parameters (argument order). func: Lambdified numeric function f(*params) (or dict of functions).

Raises:
  • Exception: If the model is unknown, or if no positive real solution for peak timing/yield exists where required.
def numeric_cumulative_yield( dim, milkrecordings, model, fitting='frequentist', lactation_length=305, **kwargs) -> float:
469def numeric_cumulative_yield(dim, milkrecordings, model, fitting='frequentist', lactation_length=305, **kwargs) -> float:
470    """Compute cumulative milk yield numerically over a given horizon.
471
472    Adds up the fitted milk yield for the first `lactation_length` days of the
473    predicted yield curve.
474
475    Args:
476        dim: DIM values.
477        milkrecordings: Milk recordings (kg).
478        model: Model name.
479        fitting: 'frequentist' or 'bayesian'.
480        lactation_length: Number of days to integrate (default 305).
481        **kwargs: Additional arguments forwarded to `fit_lactation_curve`.
482
483    Returns:
484        float: Cumulative milk yield over the specified horizon.
485    """
486    y = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, **kwargs)
487    return np.trapezoid(y[:lactation_length], dx=1)

Compute cumulative milk yield numerically over a given horizon.

Adds up the fitted milk yield for the first lactation_length days of the predicted yield curve.

Arguments:
  • dim: DIM values.
  • milkrecordings: Milk recordings (kg).
  • model: Model name.
  • fitting: 'frequentist' or 'bayesian'.
  • lactation_length: Number of days to integrate (default 305).
  • **kwargs: Additional arguments forwarded to fit_lactation_curve.
Returns:

float: Cumulative milk yield over the specified horizon.

def numeric_peak_yield( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', continent='USA') -> float:
490def numeric_peak_yield(dim, milkrecordings, model, fitting='frequentist', key=None, parity=3,
491                       breed='H', continent='USA') -> float:
492    """Compute peak yield numerically from the fitted curve.
493
494    Args:
495        dim: DIM values.
496        milkrecordings: Milk recordings (kg).
497        model: Model name.
498        fitting: 'frequentist' or 'bayesian'.
499        key: API key for MilkBot (Bayesian).
500        parity: Parity for Bayesian fitting.
501        breed: Breed for Bayesian fitting ('H' or 'J').
502        continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').
503
504    Returns:
505        float: Maximum predicted milk yield.
506    """
507    # Fit the curve to get predicted milk yields
508    yields = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, key=key, parity=parity, breed=breed, continent=continent)
509    # Find the peak yield
510    peak_yield = np.max(yields)
511    return peak_yield

Compute peak yield numerically from the fitted curve.

Arguments:
  • dim: DIM values.
  • milkrecordings: Milk recordings (kg).
  • model: Model name.
  • fitting: 'frequentist' or 'bayesian'.
  • key: API key for MilkBot (Bayesian).
  • parity: Parity for Bayesian fitting.
  • breed: Breed for Bayesian fitting ('H' or 'J').
  • continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').
Returns:

float: Maximum predicted milk yield.

def numeric_time_to_peak( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', continent='USA') -> int:
441def numeric_time_to_peak(dim, milkrecordings, model, fitting='frequentist', key=None, parity=3,
442                         breed='H', continent='USA') -> int:
443    """Compute time to peak using a numeric approach.
444
445    Fits the curve (frequentist or Bayesian), evaluates the predicted yields,
446    and returns the DIM corresponding to the maximum predicted yield.
447
448    Args:
449        dim: DIM values.
450        milkrecordings: Milk recordings (kg).
451        model: Model name.
452        fitting: 'frequentist' or 'bayesian'.
453        key: API key for MilkBot (Bayesian).
454        parity: Parity for Bayesian fitting.
455        breed: Breed for Bayesian fitting ('H' or 'J').
456        continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').
457
458    Returns:
459        int: DIM at which the curve attains its maximum (1-indexed).
460    """
461    # Fit the curve to get predicted milk yields
462    yields = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, key=key, parity=parity, breed=breed, continent=continent)
463    # Find the index of the peak yield
464    peak_idx = np.argmax(yields)
465    # Return the corresponding DIM
466    return int(peak_idx + 1)  # +1 because DIM starts at 1, not 0

Compute time to peak using a numeric approach.

Fits the curve (frequentist or Bayesian), evaluates the predicted yields, and returns the DIM corresponding to the maximum predicted yield.

Arguments:
  • dim: DIM values.
  • milkrecordings: Milk recordings (kg).
  • model: Model name.
  • fitting: 'frequentist' or 'bayesian'.
  • key: API key for MilkBot (Bayesian).
  • parity: Parity for Bayesian fitting.
  • breed: Breed for Bayesian fitting ('H' or 'J').
  • continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').
Returns:

int: DIM at which the curve attains its maximum (1-indexed).

def persistency_fitted_curve( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', continent='USA', lactation_length=305) -> float:
539def persistency_fitted_curve(dim, milkrecordings, model, fitting='frequentist', key=None, parity=3,
540                             breed='H', continent='USA', lactation_length=305) -> float:
541    """Persistency as the average slope after peak until end of lactation (numeric).
542
543    This is the default approach because symbolic derivation is not feasible for
544    all models. It computes:
545        `(yield_at_end - yield_at_peak) / (lactation_length - time_to_peak)`
546
547    Args:
548        dim: DIM values.
549        milkrecordings: Milk recordings (kg).
550        model: Model name. Options include 'milkbot' (Bayesian or frequentist),
551            'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
552        fitting: 'frequentist' or 'bayesian'.
553        key: API key (only for Bayesian fitting).
554        parity: Parity of the cow; values above 3 treated as 3 (Bayesian).
555        breed: 'H' or 'J' (Bayesian).
556        continent: 'USA', 'EU', or 'CHEN' (Bayesian).
557        lactation_length (int | str): 305 (default), 'max' (use max DIM), or integer.
558
559    Returns:
560        float: Average slope after the peak until end of lactation.
561    """
562    # calculate time to peak
563    t_peak = numeric_time_to_peak(dim, milkrecordings, model, fitting=fitting, key=key, parity=parity, breed=breed, continent=continent)
564
565    # determine lactation length
566    if lactation_length == 'max':
567        lactation_length = max(dim)
568    elif isinstance(lactation_length, int):
569        lactation_length = lactation_length
570    else:
571        lactation_length = 305
572
573    # calculate milk yield at peak
574    yields = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, key=key, parity=parity, breed=breed, continent=continent)
575    peak_yield = yields[int(t_peak) - 1]  # -1 to prevent index error
576    # calculate milk yield at end of lactation
577    end_yield = yields[int(lactation_length) - 1]  # -1 to prevent index error
578
579    # calculate persistency
580    persistency = (end_yield - peak_yield) / (lactation_length - t_peak)
581    return persistency

Persistency as the average slope after peak until end of lactation (numeric).

This is the default approach because symbolic derivation is not feasible for all models. It computes: (yield_at_end - yield_at_peak) / (lactation_length - time_to_peak)

Arguments:
  • dim: DIM values.
  • milkrecordings: Milk recordings (kg).
  • model: Model name. Options include 'milkbot' (Bayesian or frequentist), 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
  • fitting: 'frequentist' or 'bayesian'.
  • key: API key (only for Bayesian fitting).
  • parity: Parity of the cow; values above 3 treated as 3 (Bayesian).
  • breed: 'H' or 'J' (Bayesian).
  • continent: 'USA', 'EU', or 'CHEN' (Bayesian).
  • lactation_length (int | str): 305 (default), 'max' (use max DIM), or integer.
Returns:

float: Average slope after the peak until end of lactation.

def persistency_milkbot(d) -> float:
527def persistency_milkbot(d) -> float:
528    """Persistency from the MilkBot model (Ehrlich, 2013): `Persistency = 0.693 / d`.
529
530    Args:
531        d (float): Parameter `d` of the MilkBot model.
532
533    Returns:
534        float: Persistency value from the MilkBot formula.
535    """
536    return 0.693 / d

Persistency from the MilkBot model (Ehrlich, 2013): Persistency = 0.693 / d.

Arguments:
  • d (float): Parameter d of the MilkBot model.
Returns:

float: Persistency value from the MilkBot formula.

def persistency_wood(b, c) -> float:
514def persistency_wood(b, c) -> float:
515    """Persistency from Wood et al. (1984): `Persistency = -(b + 1) * ln(c)`.
516
517    Args:
518        b (float): Parameter `b` of the Wood model.
519        c (float): Parameter `c` of the Wood model.
520
521    Returns:
522        float: Persistency value from the Wood formula.
523    """
524    return float(-(b + 1) * ln(c))

Persistency from Wood et al. (1984): Persistency = -(b + 1) * ln(c).

Arguments:
  • b (float): Parameter b of the Wood model.
  • c (float): Parameter c of the Wood model.
Returns:

float: Persistency value from the Wood formula.

def test_interval_method( df, days_in_milk_col=None, milking_yield_col=None, test_id_col=None, default_test_id=1):
 39def test_interval_method(
 40    df, days_in_milk_col=None, milking_yield_col=None, test_id_col=None, default_test_id=1
 41):
 42    """Compute 305-day total milk yield using the ICAR Test Interval Method.
 43
 44    The method applies:
 45    - Linear projection from calving to the first test day,
 46    - Trapezoidal integration between consecutive test days,
 47    - Linear projection from the last test day to DIM=305.
 48
 49    Args:
 50        df (pd.DataFrame): Input DataFrame with at least DaysInMilk, MilkingYield,
 51            and (optionally) TestId columns (names can be provided via arguments
 52            or matched via known aliases, case-insensitive).
 53        days_in_milk_col (str | None): Optional column name override for DaysInMilk.
 54        milking_yield_col (str | None): Optional column name override for MilkingYield.
 55        test_id_col (str | None): Optional column name override for TestId.
 56        default_test_id (Any): If TestId is missing, a new `TestId` column is created
 57            with this value.
 58
 59    Returns:
 60        pd.DataFrame: Two-column DataFrame with
 61            - "TestId": identifier per lactation,
 62            - "Total305Yield": computed total milk yield over 305 days.
 63
 64    Raises:
 65        ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found.
 66
 67    Notes:
 68        - Records with DIM > 305 are dropped before computation.
 69        - At least two data points per TestId are required for trapezoidal integration;
 70          otherwise the lactation is skipped.
 71    """
 72    result = []
 73
 74    # create a bit more flexibility in naming the columns and when only one lactation is put in without a testid
 75
 76    # Define accepted variations for each logical column
 77    # Accepted aliases (case-insensitive)
 78    aliases = {
 79        "DaysInMilk": ["daysinmilk", "dim", "testday"],
 80        "MilkingYield": ["milkingyield", "testdaymilkyield", "milkyield", "yield"],
 81        "TestId": ["animalid", "testid", "id"],
 82    }
 83
 84    # Create a mapping from lowercase to actual column names
 85    col_lookup = {col.lower(): col for col in df.columns}
 86
 87    def get_col_name(override, possible_names):
 88        """Return a matching actual column name from `df`, or `None` if not found.
 89
 90        Args:
 91            override (str | None): Explicit column name provided by the user.
 92            possible_names (list[str]): List of acceptable aliases (lowercase).
 93
 94        Returns:
 95            str | None: The actual column name present in `df`, or `None` if no match.
 96        """
 97        if override:
 98            return col_lookup.get(override.lower())
 99        for name in possible_names:
100            if name in col_lookup:
101                return col_lookup[name]
102        return None
103
104    # Resolve columns
105    dim_col = get_col_name(days_in_milk_col, aliases["DaysInMilk"])
106    if not dim_col:
107        raise ValueError("No DaysInMilk column found in DataFrame.")
108
109    my_col = get_col_name(milking_yield_col, aliases["MilkingYield"])
110    if not my_col:
111        raise ValueError("No MilkingYield column found in DataFrame.")
112
113    id_col = get_col_name(test_id_col, aliases["TestId"])
114    if not id_col:
115        id_col = "TestId"
116        df[id_col] = default_test_id
117
118    # Filter out records where Day > 305
119    df = df[df[dim_col] <= 305]
120
121    # Iterate over each lactation
122    for lactation in df[id_col].unique():
123        lactation_df = df[df[id_col] == lactation].copy()
124
125        # Sort by DaysInMilk ascending
126        lactation_df.sort_values(by=dim_col, ascending=True, inplace=True)
127
128        if len(lactation_df) < 2:
129            print(f"Skipping TestId {lactation}: not enough data points for interpolation.")
130            continue
131
132        # Start and end points
133        start = lactation_df.iloc[0]
134        end = lactation_df.iloc[-1]
135
136        # Start contribution
137        MY0 = start[dim_col] * start[my_col]
138
139        # End contribution
140        MYend = (306 - end[dim_col]) * end[my_col]
141
142        # Intermediate trapezoidal contributions
143        lactation_df["width"] = lactation_df[dim_col].diff().shift(-1)
144        lactation_df["avg_yield"] = (lactation_df[my_col] + lactation_df[my_col].shift(-1)) / 2
145        lactation_df["trapezoid_area"] = lactation_df["width"] * lactation_df["avg_yield"]
146
147        total_intermediate = lactation_df["trapezoid_area"].sum()
148
149        total_yield = MY0 + total_intermediate + MYend
150        result.append((lactation, total_yield))
151
152    return pd.DataFrame(result, columns=["TestId", "Total305Yield"])

Compute 305-day total milk yield using the ICAR Test Interval Method.

The method applies:

  • Linear projection from calving to the first test day,
  • Trapezoidal integration between consecutive test days,
  • Linear projection from the last test day to DIM=305.
Arguments:
  • df (pd.DataFrame): Input DataFrame with at least DaysInMilk, MilkingYield, and (optionally) TestId columns (names can be provided via arguments or matched via known aliases, case-insensitive).
  • days_in_milk_col (str | None): Optional column name override for DaysInMilk.
  • milking_yield_col (str | None): Optional column name override for MilkingYield.
  • test_id_col (str | None): Optional column name override for TestId.
  • default_test_id (Any): If TestId is missing, a new TestId column is created with this value.
Returns:

pd.DataFrame: Two-column DataFrame with - "TestId": identifier per lactation, - "Total305Yield": computed total milk yield over 305 days.

Raises:
  • ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found.
Notes:
  • Records with DIM > 305 are dropped before computation.
  • At least two data points per TestId are required for trapezoidal integration; otherwise the lactation is skipped.