lactationcurve.characteristics

View Source

 1from . import best_predict, lactation_curve_characteristics, method_test_interval
 2from .best_predict import (
 3    best_predict_method,
 4    best_predict_method_single_lac,
 5    build_covariance_matrix,
 6    center_lactation_data,
 7    fit_autocorrelation_matrix,
 8    fit_standard_lc,
 9    pivot_milk_recordings_to_matrix,
10    preprocess_measured_data,
11)
12from .ISLC import (
13    ISLC,
14    ISLC_method,
15    ISLC_original,
16    ISLC_original_method,
17    create_standard_lc_representation,
18    interpolation_standard_lc,
19    linear_interpd_all_to_grid,
20    linear_interpd_closest_to_grid,
21)
22from .lactation_curve_characteristics import (
23    calculate_characteristic,
24    lactation_curve_characteristic_function,
25    numeric_cumulative_yield,
26    numeric_peak_yield,
27    numeric_time_to_peak,
28    persistency_fitted_curve,
29    persistency_milkbot,
30    persistency_wood,
31)
32from .method_test_interval import test_interval_method
33
34__all__ = [
35    "ISLC",
36    "ISLC_method",
37    "ISLC_original",
38    "ISLC_original_method",
39    "best_predict",
40    "best_predict_method",
41    "best_predict_method_single_lac",
42    "build_covariance_matrix",
43    "calculate_characteristic",
44    "center_lactation_data",
45    "create_standard_lc_representation",
46    "fit_autocorrelation_matrix",
47    "fit_standard_lc",
48    "interpolation_standard_lc",
49    "lactation_curve_characteristics",
50    "lactation_curve_characteristic_function",
51    "linear_interpd_all_to_grid",
52    "linear_interpd_closest_to_grid",
53    "method_test_interval",
54    "numeric_cumulative_yield",
55    "numeric_peak_yield",
56    "numeric_time_to_peak",
57    "persistency_fitted_curve",
58    "persistency_milkbot",
59    "persistency_wood",
60    "pivot_milk_recordings_to_matrix",
61    "preprocess_measured_data",
62    "test_interval_method",
63]

def ISLC( df: pandas.core.frame.DataFrame, days_in_milk_col: str | None = None, milking_yield_col: str | None = None, test_id_col: str | None = None, default_test_id: int = 0, standard_lc_305: pandas.core.series.Series | NDArray[numpy.float64] = STANDARD_CURVE, correlation_matrix: pandas.core.frame.DataFrame = CORR_MATRIX, std_per_grid_day: NDArray[numpy.float64] = STDs, max_dim: int | str = 305, fit_standard_lc_from_data: bool = False, reference_df: pandas.core.frame.DataFrame | None = None) -> pandas.core.frame.DataFrame: View Source

220def ISLC(
221    df: pd.DataFrame,
222    days_in_milk_col: str | None = None,
223    milking_yield_col: str | None = None,
224    test_id_col: str | None = None,
225    default_test_id: int = 0,
226    standard_lc_305: pd.Series | npt.NDArray[np.float64] = STANDARD_CURVE,
227    correlation_matrix: pd.DataFrame = CORR_MATRIX,
228    std_per_grid_day: npt.NDArray[np.float64] = STDs,
229    max_dim: int | str = 305,
230    fit_standard_lc_from_data: bool = False,
231    reference_df: pd.DataFrame | None = None,
232) -> pd.DataFrame:
233    """Estimate cumulative lactation yield per TestId using ISLC.
234
235    This variant applies :func:`ISLC_method` to standardized lactation records.
236    By default it uses the package-provided standard curve, correlation matrix,
237    and per-grid day standard deviations. Optionally, these assets can be refit
238    from a reference dataset by providing a pandas dataframe at 'reference_df ='
239    when ``fit_standard_lc_from_data`` is True.
240    Alternative for customization is to set standard_lc_305,
241    correlation_matrix, and std_per_grid_day directly.
242
243    Args:
244        df: Input test-day records.
245        days_in_milk_col: Name of DIM column in ``df``.
246        milking_yield_col: Name of milk-yield column in ``df``.
247        test_id_col: Name of lactation identifier column in ``df``.
248        default_test_id: Fallback TestId value when ``test_id_col`` is absent.
249        standard_lc_305: Standard lactation curve values used by ISLC.
250        correlation_matrix: Correlation matrix aligned with ISLC grid.
251        std_per_grid_day: Standard deviations aligned with ISLC grid.
252        max_dim: Maximum DIM to include
253            (or ``"max"`` so highest test day DIM is considered the final lactation day).
254        fit_standard_lc_from_data: If True, fit representation from ``reference_df``.
255        reference_df: Reference records used when fitting curve and covariance.
256
257    Returns:
258        DataFrame with columns ``TestId`` and ``LactationMilkYield``.
259
260    Raises:
261        ValueError: If required columns are missing or fit mode has no reference data.
262    """
263    # Standardize columns and filter DIM <= 305
264    df = standardize_lactation_columns(
265        df,
266        days_in_milk_col=days_in_milk_col,
267        milking_yield_col=milking_yield_col,
268        test_id_col=test_id_col,
269        default_test_id=default_test_id,
270        max_dim=max_dim,
271    )
272
273    # Fit covariance/standard_lc if requested
274    if fit_standard_lc_from_data is True:
275        if reference_df is None:
276            raise ValueError("Provide reference_df to fit your own standard lactation curve.")
277        reference_df = standardize_lactation_columns(
278            reference_df,
279            days_in_milk_col=days_in_milk_col,
280            milking_yield_col=milking_yield_col,
281            test_id_col=test_id_col,
282            default_test_id=default_test_id,
283            max_dim=305,
284        )
285        dim = reference_df["DaysInMilk"]
286        milk_yield = reference_df["MilkingYield"]
287        standard_full_lc_305 = fit_lactation_curve(dim, milkrecordings=milk_yield, model="wilmink")
288        # convert to pandas series
289        standard_full_lc_305 = pd.Series(standard_full_lc_305)
290
291        # fit matrix
292        (
293            correlation_matrix,
294            std_per_grid_day,
295            standard_curve_fitted,
296        ) = create_standard_lc_representation(
297            df=reference_df,
298            standard_lactation_curve=pd.Series(standard_full_lc_305),
299            # reference_df is standardized above, so fixed names are required here
300            days_in_milk_col="DaysInMilk",
301            milking_yield_col="MilkingYield",
302            interpolation_method=interpolation_standard_lc,
303            small_grid=False,
304        )
305
306        # Align fitted curve with default internal convention (day 0 equals day 1).
307        standard_lc_305 = np.insert(
308            np.asarray(standard_curve_fitted, dtype=float),
309            0,
310            float(standard_curve_fitted.iloc[0]),
311        )
312
313    rows: list[dict[str, Any]] = []
314    for test_id, group in df.groupby("TestId", sort=False):
315        if group.empty:
316            rows.append({"TestId": test_id, "LactationMilkYield": np.nan})
317            continue
318
319        try:
320            cumulative_yield = ISLC_method(
321                df=group,
322                standard_lc=standard_lc_305,
323                correlation_matrix=correlation_matrix,
324                std_per_grid_day=std_per_grid_day,
325                days_in_milk_col="DaysInMilk",
326                milking_yield_col="MilkingYield",
327            )
328        except ValueError:
329            cumulative_yield = np.nan
330
331        rows.append({"TestId": test_id, "LactationMilkYield": cumulative_yield})
332
333    return pd.DataFrame(rows, columns=pd.Index(["TestId", "LactationMilkYield"]))

Estimate cumulative lactation yield per TestId using ISLC.

This variant applies ISLC_method() to standardized lactation records. By default it uses the package-provided standard curve, correlation matrix, and per-grid day standard deviations. Optionally, these assets can be refit from a reference dataset by providing a pandas dataframe at 'reference_df =' when fit_standard_lc_from_data is True. Alternative for customization is to set standard_lc_305, correlation_matrix, and std_per_grid_day directly.

Arguments:

df: Input test-day records.
days_in_milk_col: Name of DIM column in df.
milking_yield_col: Name of milk-yield column in df.
test_id_col: Name of lactation identifier column in df.
default_test_id: Fallback TestId value when test_id_col is absent.
standard_lc_305: Standard lactation curve values used by ISLC.
correlation_matrix: Correlation matrix aligned with ISLC grid.
std_per_grid_day: Standard deviations aligned with ISLC grid.
max_dim: Maximum DIM to include (or "max" so highest test day DIM is considered the final lactation day).
fit_standard_lc_from_data: If True, fit representation from reference_df.
reference_df: Reference records used when fitting curve and covariance.

Returns:

DataFrame with columns TestId and LactationMilkYield.

Raises:

ValueError: If required columns are missing or fit mode has no reference data.

def ISLC_method( df: pandas.core.frame.DataFrame, standard_lc: pandas.core.series.Series | NDArray[numpy.float64], correlation_matrix: pandas.core.frame.DataFrame, std_per_grid_day: NDArray[numpy.float64], days_in_milk_col: str = 'DaysInMilk', milking_yield_col: str = 'MilkingYield') -> float: View Source

336def ISLC_method(
337    df: pd.DataFrame,
338    standard_lc: pd.Series | npt.NDArray[np.float64],
339    correlation_matrix: pd.DataFrame,
340    std_per_grid_day: npt.NDArray[np.float64],
341    days_in_milk_col: str = "DaysInMilk",
342    milking_yield_col: str = "MilkingYield",
343) -> float:
344    """Estimate total lactation yield by predicting missing grid days, then integrating.
345
346    The method fills missing grid-day yields with the ICAR-style relation:
347
348        yp = gp + b* (yi - gi)
349
350        where for each missing day p, the nearest measured day i is used and
351        b* = corr(i,p) * std(p) / std(i). The resulting complete grid profile is
352        integrated with the test-interval style weighting used elsewhere in this
353        package.
354
355    Notes:
356        The integration is performed over the days present after merging
357        measured and predicted points. If the input contains DIM beyond 305,
358        total yield can reflect that extended horizon.
359    """
360    # add zero and 305 to the grid
361    grid = [0] + GRID_DAYS + [305]
362    dim_col = days_in_milk_col
363    yield_col = milking_yield_col
364
365    standard_lc_series = _curve_to_series(standard_lc)
366
367    # Extract the measured days
368    measured_days = df[dim_col].to_numpy()
369    if measured_days.size == 0:
370        raise ValueError("Input df must contain at least one measured day.")
371
372    # interpolate the measured days to the grid using the standard lactation curve
373    interpolated_df = interpolation_standard_lc(
374        group=df,
375        days_in_milk_col=dim_col,
376        milking_yield_col=yield_col,
377        standard_lc=standard_lc_series,
378    )
379
380    # drop unnecessary columns
381    interpolated_df = interpolated_df.drop(columns=[dim_col, yield_col])
382
383    # rename grid colum
384    interpolated_df = interpolated_df.rename(
385        columns={"GridDay": dim_col, "MilkYieldInterp": yield_col}
386    )[[dim_col, yield_col]]
387
388    measured_subset = cast(pd.DataFrame, df.loc[:, [dim_col, yield_col]])
389    interpolated_subset = cast(pd.DataFrame, interpolated_df.loc[:, [dim_col, yield_col]])
390
391    # join the interpolated values with the original measurements, keeping all measurements
392    merged_df = cast(
393        pd.DataFrame,
394        pd.merge(
395            measured_subset,
396            interpolated_subset,
397            on=dim_col,
398            how="outer",
399            suffixes=("_meas", "_interp"),
400        ),
401    )
402    meas_series = cast(pd.Series, merged_df[f"{yield_col}_meas"])
403    interp_series = cast(pd.Series, merged_df[f"{yield_col}_interp"])
404    merged_df[yield_col] = cast(pd.Series, meas_series.combine_first(interp_series))
405    df = cast(
406        pd.DataFrame,
407        merged_df.loc[:, [dim_col, yield_col]].sort_values(by=dim_col),
408    )
409
410    # days missing from measurement → need prediction
411    days_to_predict = [day for day in grid if day not in df[dim_col].values]
412
413    predicted_rows: list[dict[str, float | int]] = []
414
415    for day in days_to_predict:
416        # identify closest measured day
417        dim_series = cast(pd.Series, interpolated_df[dim_col])
418        grid_vals = np.asarray(dim_series.to_numpy(dtype=float), dtype=float)
419        closest_idx = int(np.argmin(np.abs(grid_vals - float(day))))
420        closest_day = int(dim_series.iloc[closest_idx])
421
422        # extract mean milk yield from the standard lactation curve for the day to predict
423        expected_yield_grid_day = _curve_value(standard_lc_series, day)
424        expected_yield_measured_day = _curve_value(standard_lc_series, closest_day)
425
426        # calculate difference between measured and expected yield at closest measured day
427        diff = (
428            interpolated_df.loc[interpolated_df[dim_col] == closest_day, yield_col].iloc[0]
429            - expected_yield_measured_day
430        )
431
432        # calculate the correlation-based correction factor b*
433        # b* = corr(i,p) * std(p) / std(i)
434        pi = list(grid).index(closest_day)
435        qi = list(grid).index(day)
436        bpj = correlation_matrix.iloc[pi, qi]
437        std_q = std_per_grid_day[qi]
438        std_p = std_per_grid_day[pi]
439        b_star = 0.0 if std_p == 0 else float(bpj * std_q / std_p)
440
441        # predict the yield for the missing day
442        predicted_yield = expected_yield_grid_day + b_star * diff
443        predicted_rows.append({dim_col: int(day), yield_col: float(predicted_yield)})
444
445    if predicted_rows:
446        df = pd.concat([df, pd.DataFrame(predicted_rows)], ignore_index=True)
447
448    # apply the test interval method to calculate total milk yield over 305 days
449    # sort the dataframe by days in milk and keep one value per day
450    df = df.drop_duplicates(subset=[dim_col], keep="last")
451    df = df.sort_values(by=dim_col)
452    # Trapezoidal contributions
453    df["width"] = df[dim_col].diff().shift(-1)
454    df["avg_yield"] = (df[yield_col] + df[yield_col].shift(-1)) / 2
455    df["trapezoid_area"] = df["width"] * df["avg_yield"]
456
457    trapezoid_values = np.asarray(pd.to_numeric(df["trapezoid_area"], errors="coerce"), dtype=float)
458    total_yield = float(np.nansum(trapezoid_values))
459
460    return total_yield

Estimate total lactation yield by predicting missing grid days, then integrating.

The method fills missing grid-day yields with the ICAR-style relation:

yp = gp + b* (yi - gi)

where for each missing day p, the nearest measured day i is used and
b* = corr(i,p) * std(p) / std(i). The resulting complete grid profile is
integrated with the test-interval style weighting used elsewhere in this
package.

Notes:

The integration is performed over the days present after merging measured and predicted points. If the input contains DIM beyond 305, total yield can reflect that extended horizon.

def ISLC_original( df: pandas.core.frame.DataFrame, days_in_milk_col: str | None = None, milking_yield_col: str | None = None, test_id_col: str | None = None, default_test_id: int = 0, standard_lc_305: pandas.core.series.Series | NDArray[numpy.float64] = STANDARD_CURVE, correlation_matrix: pandas.core.frame.DataFrame = CORR_MATRIX, std_per_grid_day: NDArray[numpy.float64] = STDs, fit_standard_lc_from_data: bool = False, reference_df: pandas.core.frame.DataFrame | None = None) -> pandas.core.frame.DataFrame: View Source

576def ISLC_original(
577    df: pd.DataFrame,
578    days_in_milk_col: str | None = None,
579    milking_yield_col: str | None = None,
580    test_id_col: str | None = None,
581    default_test_id: int = 0,
582    standard_lc_305: pd.Series | npt.NDArray[np.float64] = STANDARD_CURVE,
583    correlation_matrix: pd.DataFrame = CORR_MATRIX,
584    std_per_grid_day: npt.NDArray[np.float64] = STDs,
585    fit_standard_lc_from_data: bool = False,
586    reference_df: pd.DataFrame | None = None,
587) -> pd.DataFrame:
588    """Compute estimated 305-day yields using the original grid-based ISLC variant
589    from the original paper by Dr. Wilmink in 1987.
590
591    The function groups standardized input by ``TestId``, interpolates to the
592    fixed GRID_DAYS grid, and applies :func:`ISLC_original_method` per lactation.
593
594    Args:
595       df: Input records with DIM and milk yield columns.
596       days_in_milk_col: Name of DIM column in ``df``.
597       milking_yield_col: Name of milk-yield column in ``df``.
598       test_id_col: Name of lactation identifier column in ``df``.
599       default_test_id: Fallback TestId when ``test_id_col`` is absent.
600       standard_lc_305: Standard lactation curve used for interpolation.
601       correlation_matrix: Correlation matrix over GRID_DAYS.
602       std_per_grid_day: Standard deviations per GRID_DAYS element.
603       fit_standard_lc_from_data: If True, refit standard curve and covariance
604           representation from ``reference_df`` before prediction.
605       reference_df: Reference population used when
606           ``fit_standard_lc_from_data=True``. The reference data is
607           standardized to ``DaysInMilk``/``MilkingYield`` and fit using
608           ``small_grid=True`` so correlation and standard-deviation assets are
609           aligned to ``GRID_DAYS``.
610
611    Returns:
612        DataFrame with columns ``LactationMilkYield`` and ``TestId``.
613
614    Raises:
615        ValueError: If required columns (DaysInMilk or MilkingYield) cannot be
616            found, or if ``fit_standard_lc_from_data=True`` and
617            ``reference_df`` is not provided.
618
619    Notes:
620        Records with DIM > 305 are dropped before interpolation and scoring.
621    """
622    # Standardize columns and filter DIM <= 305
623    df = standardize_lactation_columns(
624        df,
625        days_in_milk_col=days_in_milk_col,
626        milking_yield_col=milking_yield_col,
627        test_id_col=test_id_col,
628        default_test_id=default_test_id,
629        max_dim=305,
630    )
631
632    # Fit covariance/standard_lc if requested
633    if fit_standard_lc_from_data is True:
634        if reference_df is None:
635            raise ValueError("Provide reference_df to fit your own standard lactation curve.")
636        reference_df = standardize_lactation_columns(
637            reference_df,
638            days_in_milk_col=days_in_milk_col,
639            milking_yield_col=milking_yield_col,
640            test_id_col=test_id_col,
641            default_test_id=default_test_id,
642            max_dim=305,
643        )
644        dim = reference_df["DaysInMilk"]
645        milk_yield = reference_df["MilkingYield"]
646        standard_full_lc_305 = fit_lactation_curve(dim, milkrecordings=milk_yield, model="wilmink")
647        standard_full_lc_305 = pd.Series(standard_full_lc_305)
648
649        # fit matrix
650        (
651            correlation_matrix,
652            std_per_grid_day,
653            standard_curve_fitted,
654        ) = create_standard_lc_representation(
655            df=reference_df,
656            standard_lactation_curve=pd.Series(standard_full_lc_305),
657            # reference_df is standardized above, so fixed names are required here
658            days_in_milk_col="DaysInMilk",
659            milking_yield_col="MilkingYield",
660            interpolation_method=interpolation_standard_lc,
661            small_grid=True,
662        )
663
664        # Align fitted curve with default internal convention (day 0 equals day 1).
665        standard_lc_305 = np.insert(
666            np.asarray(standard_curve_fitted, dtype=float),
667            0,
668            float(standard_curve_fitted.iloc[0]),
669        )
670    rows: list[dict[str, Any]] = []
671    for test_id, group in df.groupby("TestId", sort=False):
672        lactation = interpolation_standard_lc(
673            group,
674            days_in_milk_col="DaysInMilk",
675            milking_yield_col="MilkingYield",
676            standard_lc=standard_lc_305,
677        )
678        if lactation.empty:
679            rows.append({"TestId": test_id, "305_milk_yield": np.nan})
680            continue
681
682        cumulative_yield = ISLC_original_method(
683            df=lactation,
684            standard_lc=standard_lc_305,
685            correlation_matrix=correlation_matrix,
686            std_per_grid_day=std_per_grid_day,
687        )
688        rows.append({"TestId": test_id, "LactationMilkYield": cumulative_yield})
689
690    return pd.DataFrame(rows, columns=pd.Index(["TestId", "LactationMilkYield"]))

Compute estimated 305-day yields using the original grid-based ISLC variant from the original paper by Dr. Wilmink in 1987.

The function groups standardized input by TestId, interpolates to the fixed GRID_DAYS grid, and applies ISLC_original_method() per lactation.

Arguments:

df: Input records with DIM and milk yield columns.
days_in_milk_col: Name of DIM column in df.
milking_yield_col: Name of milk-yield column in df.
test_id_col: Name of lactation identifier column in df.
default_test_id: Fallback TestId when test_id_col is absent.
standard_lc_305: Standard lactation curve used for interpolation.
correlation_matrix: Correlation matrix over GRID_DAYS.
std_per_grid_day: Standard deviations per GRID_DAYS element.
fit_standard_lc_from_data: If True, refit standard curve and covariance representation from reference_df before prediction.
reference_df: Reference population used when fit_standard_lc_from_data=True. The reference data is standardized to DaysInMilk/MilkingYield and fit using small_grid=True so correlation and standard-deviation assets are aligned to GRID_DAYS.

Returns:

DataFrame with columns LactationMilkYield and TestId.

Raises:

ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found, or if fit_standard_lc_from_data=True and reference_df is not provided.

Notes:

Records with DIM > 305 are dropped before interpolation and scoring.

def ISLC_original_method( df: pandas.core.frame.DataFrame, standard_lc: pandas.core.series.Series | NDArray[numpy.float64], correlation_matrix: pandas.core.frame.DataFrame, std_per_grid_day: NDArray[numpy.float64]) -> float: View Source

463def ISLC_original_method(
464    df: pd.DataFrame,
465    standard_lc: pd.Series | npt.NDArray[np.float64],
466    correlation_matrix: pd.DataFrame,
467    std_per_grid_day: npt.NDArray[np.float64],
468) -> float:
469    """Estimate 305-day milk yield for a single interpolated lactation
470    from the original paper by Dr. Wilmink in 1987.
471
472    This function computes an estimated 305-day yield by combining three
473    components: known production from interpolated measurements, the
474    population mean for missing grid days (from ``standard_lc``), and a
475    correlation-based deviation correction derived from ``correlation_matrix``.
476
477    Args:
478        df: DataFrame containing at least the columns ``GridDay`` and
479            ``MilkYieldInterp`` (values already interpolated onto the grid).
480        standard_lc: Standard lactation curve values indexed by grid day.
481        correlation_matrix: Correlation coefficients between grid-day yields
482            (matrix-like, rows/columns ordered as the grid).
483        std_per_grid_day: 1-D array of standard deviations for each grid day.
484
485    Returns:
486        A float with the estimated 305-day milk yield.
487
488    Raises:
489        ValueError: if any measured day in ``df`` is not present in the
490            expected global ``grid``.
491
492    Notes:
493        - The routine assumes fixed recording intervals of 20 days, except
494          that the final interval (if day 290 is present) is 25 days.
495        - The implementation expects a global ``grid`` variable to be
496          defined and aligned with ``standard_lc`` and
497          ``std_per_grid_day``.
498        - The amount of estimated milk yields depends on the
499            availability of measured milk yields.
500        - Lactation yield is:
501            sum(known measurements (p) + estimated measurements (q)).
502        - In the implementation according to the CRV E2 document,
503            there should also be a correction factor for the difference between
504             the expected and actual complete lactation yield of the previous lactation.
505            However, this correction factor is not included in the current implementation
506            as it requires additional data (previous lactation yield) that is not always available.
507    """
508    grid = GRID_DAYS
509    standard_lc_series = _curve_to_series(standard_lc)
510
511    # --- Validate that all measurements lie on the expected grid -------
512    measured_days = df["GridDay"].values
513    if any(day not in grid for day in measured_days):
514        raise ValueError("At least one DIM in the input df is not part of the grid.")
515
516    # --- Prepare basic components -------------------------------------
517    df = df.sort_values("GridDay")
518
519    # days missing from measurement → need prediction
520    days_to_predict = [day for day in grid if day not in measured_days]
521    pred_idx = [list(grid).index(d) for d in days_to_predict]
522
523    # mean expected milk yield from standard curve
524    mean_lc = pd.Series([_curve_value(standard_lc_series, g) for g in grid], index=grid)
525
526    # --- Compute known production (sum of actual measurements) ---------
527    if 290 in measured_days:
528        # special last interval: 25 days
529        known_prod = df["MilkYieldInterp"][:-1].sum() * 20 + df["MilkYieldInterp"].iloc[-1] * 25
530    else:
531        known_prod = df["MilkYieldInterp"].sum() * 20
532
533    # --- Compute population mean for missing days ----------------------
534    pop_mean_missing = mean_lc.loc[days_to_predict]
535
536    if 290 in pop_mean_missing.index:
537        population_mean = pop_mean_missing.iloc[:-1].sum() * 20 + pop_mean_missing.iloc[-1] * 25
538    else:
539        population_mean = pop_mean_missing.sum() * 20
540
541    # --- Compute correlation-based correction --------------------------
542    days_to_predict_np = np.array(days_to_predict)
543    measured_days_np = np.array(measured_days)
544
545    # for each missing day: find closest measured day
546    closest_measured = [
547        measured_days_np[np.argmin(np.abs(measured_days_np - g))] for g in days_to_predict_np
548    ]
549    closest_idx = [list(grid).index(d) for d in closest_measured]
550
551    b_star_list = []
552    for pi, qi in zip(closest_idx, pred_idx):
553        bpj = correlation_matrix.iloc[pi, qi]
554        stdj = std_per_grid_day[qi]
555        stdp = std_per_grid_day[pi]
556        b_star_list.append(bpj * stdj / stdp)
557
558    # scale correction by interval lengths
559    if 290 in measured_days:
560        # last interval belongs to prediction group
561        correction_weighted = sum(b_star_list[:-1]) * 20
562    else:
563        correction_weighted = sum(b_star_list[:-1]) * 20 + b_star_list[-1] * 25
564
565    last_day = df["GridDay"].iloc[-1]
566    last_yield = df["MilkYieldInterp"].iloc[-1]
567    mean_last = mean_lc.loc[last_day]
568
569    correction = correction_weighted * (last_yield - mean_last)
570
571    # --- Final sum -----------------------------------------------------
572    final_milk_yield = known_prod + population_mean + correction
573    return float(final_milk_yield)

Estimate 305-day milk yield for a single interpolated lactation from the original paper by Dr. Wilmink in 1987.

This function computes an estimated 305-day yield by combining three components: known production from interpolated measurements, the population mean for missing grid days (from standard_lc), and a correlation-based deviation correction derived from correlation_matrix.

Arguments:

df: DataFrame containing at least the columns GridDay and MilkYieldInterp (values already interpolated onto the grid).
standard_lc: Standard lactation curve values indexed by grid day.
correlation_matrix: Correlation coefficients between grid-day yields (matrix-like, rows/columns ordered as the grid).
std_per_grid_day: 1-D array of standard deviations for each grid day.

Returns:

A float with the estimated 305-day milk yield.

Raises:

ValueError: if any measured day in df is not present in the expected global grid.

Notes:

The routine assumes fixed recording intervals of 20 days, except that the final interval (if day 290 is present) is 25 days.

The implementation expects a global grid variable to be defined and aligned with standard_lc and std_per_grid_day.

The amount of estimated milk yields depends on the availability of measured milk yields.

Lactation yield is: sum(known measurements (p) + estimated measurements (q)).

In the implementation according to the CRV E2 document, there should also be a correction factor for the difference between the expected and actual complete lactation yield of the previous lactation. However, this correction factor is not included in the current implementation as it requires additional data (previous lactation yield) that is not always available.

def best_predict_method( df: pandas.core.frame.DataFrame, standard_lc: numpy.ndarray = STANDARD_CURVE, days_in_milk_col: str | None = None, milking_yield_col: str | None = None, test_id_col: str | None = None, default_test_id: int = 0, covariance_matrix: numpy.ndarray | None = COV_MATRIX, fit_standard_lc_from_data: bool = False, reference_df: pandas.core.frame.DataFrame | None = None) -> pandas.core.frame.DataFrame: View Source

499def best_predict_method(
500    df: pd.DataFrame,
501    standard_lc: np.ndarray = STANDARD_CURVE,
502    days_in_milk_col: str | None = None,
503    milking_yield_col: str | None = None,
504    test_id_col: str | None = None,
505    default_test_id: int = 0,
506    covariance_matrix: np.ndarray | None = COV_MATRIX,
507    fit_standard_lc_from_data: bool = False,
508    reference_df: pd.DataFrame | None = None,
509) -> pd.DataFrame:
510    """Apply best prediction to one or more lactations.
511
512    By default this function uses the package-provided standard curve and covariance matrix.
513    But it is also possible to provide your own standard curve and covariance matrix,
514    for example when you want to fit these ingredients from your own reference population.
515    This can be done in two ways: either by fitting the covariance matrix and standard curve
516    directly from a reference dataset by providing a pandas dataframe at 'reference_df ='
517    when ``fit_standard_lc_from_data`` is True.
518    Alternative for customization is to set standard_lc_305 and covariance_matrix
519    directly in the function call.
520
521    Args:
522        df: Input observations. If ``TestId`` is missing, all rows are treated
523            as one lactation.
524        standard_lc: Expected daily milk yield lactation curve on days 1..305.
525            If not provided, the package's default curve is used.
526            Or fit your own standard curve from a reference dataset by
527            providing a pandas dataframe at 'reference_df ='
528            when ``fit_standard_lc_from_data`` is True.
529        days_in_milk_col: Optional input column name for days in milk. If
530            provided, it is mapped to ``DaysInMilk``.
531        milking_yield_col: Optional input column name for milk yield. If
532            provided, it is mapped to ``MilkingYield``.
533        test_id_col: Optional input column name for lactation/test identifier.
534            If provided, it is mapped to ``TestId``.
535        default_test_id: Fallback test id used when no test-id column is
536            available.
537        covariance_matrix: Optional prefit covariance matrix. If omitted,
538            the default matrix is used or
539             ``reference_df`` can be used to fit one for your own data.
540        fit_standard_lc_from_data: Whether to fit covariance information from
541            ``reference_df`` instead of using a provided covariance matrix.
542        reference_df: Reference dataframe used when ``covariance_matrix`` and
543            ``standard_lc`` are not provided and ``fit_standard_lc_from_data``
544            is True.
545
546    Returns:
547        Dataframe with columns ``TestId`` and ``LactationMilkYield``.
548
549    Raises:
550        ValueError: If neither ``covariance_matrix`` nor ``reference_df`` is
551            provided.
552    """
553    # Standardize columns and filter DIM <= 305
554    df = standardize_lactation_columns(
555        df,
556        days_in_milk_col=days_in_milk_col,
557        milking_yield_col=milking_yield_col,
558        test_id_col=test_id_col,
559        default_test_id=default_test_id,
560        max_dim=305,
561    )
562
563    # Fit covariance if not provided
564    if fit_standard_lc_from_data:
565        if reference_df is None:
566            raise ValueError("Provide reference_df to fit your own standard lactation curve.")
567        reference_df = standardize_lactation_columns(
568            reference_df,
569            days_in_milk_col=days_in_milk_col,
570            milking_yield_col=milking_yield_col,
571            test_id_col=test_id_col,
572            default_test_id=default_test_id,
573            max_dim=305,
574        )
575        covariance_matrix = cast(
576            np.ndarray, fit_autocorrelation_matrix(reference_df, standard_lc)["B_hat"]
577        )
578
579    covariance_matrix_array = cast(np.ndarray, covariance_matrix)
580
581    df = df.copy()
582
583    results = []
584
585    for test_id, lactation in df.groupby("TestId"):
586        pred = best_predict_method_single_lac(
587            lactation,
588            standard_lc,
589            covariance_matrix_array,
590        )
591        results.append({"TestId": test_id, "LactationMilkYield": pred})
592
593    return pd.DataFrame(results)

Apply best prediction to one or more lactations.

By default this function uses the package-provided standard curve and covariance matrix. But it is also possible to provide your own standard curve and covariance matrix, for example when you want to fit these ingredients from your own reference population. This can be done in two ways: either by fitting the covariance matrix and standard curve directly from a reference dataset by providing a pandas dataframe at 'reference_df =' when fit_standard_lc_from_data is True. Alternative for customization is to set standard_lc_305 and covariance_matrix directly in the function call.

Arguments:

df: Input observations. If TestId is missing, all rows are treated as one lactation.
standard_lc: Expected daily milk yield lactation curve on days 1..305. If not provided, the package's default curve is used. Or fit your own standard curve from a reference dataset by providing a pandas dataframe at 'reference_df =' when fit_standard_lc_from_data is True.
days_in_milk_col: Optional input column name for days in milk. If provided, it is mapped to DaysInMilk.
milking_yield_col: Optional input column name for milk yield. If provided, it is mapped to MilkingYield.
test_id_col: Optional input column name for lactation/test identifier. If provided, it is mapped to TestId.
default_test_id: Fallback test id used when no test-id column is available.
covariance_matrix: Optional prefit covariance matrix. If omitted, the default matrix is used or reference_df can be used to fit one for your own data.
fit_standard_lc_from_data: Whether to fit covariance information from reference_df instead of using a provided covariance matrix.
reference_df: Reference dataframe used when covariance_matrix and standard_lc are not provided and fit_standard_lc_from_data is True.

Returns:

Dataframe with columns TestId and LactationMilkYield.

Raises:

ValueError: If neither covariance_matrix nor reference_df is provided.

def best_predict_method_single_lac( lactation: pandas.core.frame.DataFrame, standard_lc: numpy.ndarray = STANDARD_CURVE, covariance_matrix: numpy.ndarray = COV_MATRIX) -> float: View Source

429def best_predict_method_single_lac(
430    lactation: pd.DataFrame,
431    standard_lc: np.ndarray = STANDARD_CURVE,
432    covariance_matrix: np.ndarray = COV_MATRIX,
433) -> float:
434    """Predict 305-day cumulative yield for one lactation.
435
436    Observed test-day deviations are projected over all 305 days using the
437    covariance structure and then added to the baseline cumulative standard
438    curve.
439
440    By default this function uses the package-provided standard curve and covariance matrix.
441    But it is also possible to provide your own standard curve and covariance matrix,
442    for example when you want to fit these ingredients from your own reference population.
443
444
445    Args:
446        lactation: Observed records for one lactation.
447        standard_lc: Population mean daily yield profile.
448        covariance_matrix: Day-to-day covariance matrix on the 305-day grid.
449
450    Returns:
451        Predicted cumulative 305-day milk yield.
452
453    Notes:
454        Duplicate day records are resolved with ``keep="last"`` before
455        prediction. If no valid observations remain in days 1..305, the method
456        returns the cumulative standard curve.
457    """
458    filtered_lactation = lactation.loc[
459        (lactation["DaysInMilk"] >= 1) & (lactation["DaysInMilk"] <= 305)
460    ].copy()
461    filtered_lactation = filtered_lactation.drop_duplicates(subset=["DaysInMilk"], keep="last")
462    filtered_lactation = filtered_lactation.sort_values("DaysInMilk")
463
464    corrected_series = preprocess_measured_data(
465        filtered_lactation,
466        standard_lc=standard_lc,
467    )
468
469    if filtered_lactation.empty:
470        return float(np.sum(standard_lc))
471
472    obs_idx_1based = filtered_lactation["DaysInMilk"].to_numpy(dtype=int)  # DaysInMilk: 1-305
473    obs_idx_0based = obs_idx_1based - 1  # Convert to 0-based matrix indices: 0-304
474    y_obs = corrected_series.loc[obs_idx_1based].to_numpy(
475        dtype=float
476    )  # corrected_series is indexed by DaysInMilk (1-305)
477
478    # Extract covariance blocks
479    B_oo = covariance_matrix[
480        np.ix_(obs_idx_0based, obs_idx_0based)
481    ]  # Use 0-based indices for matrix
482    B_mo = covariance_matrix[:, obs_idx_0based]  # Use 0-based indices for matrix
483
484    # solve
485    c, lower = cho_factor(B_oo)
486    alpha = cho_solve((c, lower), y_obs)
487
488    # Predict full deviation curve
489    y_estimate = B_mo @ alpha
490
491    # Total milk = baseline + deviation
492    deviation = np.sum(y_estimate)
493
494    total = np.sum(standard_lc) + deviation
495
496    return total

Predict 305-day cumulative yield for one lactation.

Observed test-day deviations are projected over all 305 days using the covariance structure and then added to the baseline cumulative standard curve.

Arguments:

lactation: Observed records for one lactation.
standard_lc: Population mean daily yield profile.
covariance_matrix: Day-to-day covariance matrix on the 305-day grid.

Returns:

Predicted cumulative 305-day milk yield.

Notes:

Duplicate day records are resolved with keep="last" before prediction. If no valid observations remain in days 1..305, the method returns the cumulative standard curve.

def build_covariance_matrix(rho: float, size: int) -> numpy.ndarray: View Source

276def build_covariance_matrix(rho: float, size: int) -> np.ndarray:
277    """Construct a covariance matrix.
278
279    Cole et al. (2007) estimated correlations among test-day yields using a
280    simplified model with an identity matrix (I) for daily measurement error
281    and an autoregressive matrix (E) for biological change. E is defined as
282    ``Eij = r ** |i-j|`` where ``i`` and ``j`` are test-day DIM and
283    ``0 < r < 1``.
284
285    Element ``(i, j)`` is ``rho ** abs(i - j)``.
286
287    Args:
288        rho: AR(1) correlation parameter.
289        size: Matrix dimension.
290
291    Returns:
292        A ``(size, size)`` AR(1) correlation matrix.
293    """
294    idx = np.arange(size)
295    M = np.abs(idx[:, None] - idx[None, :])
296    return rho**M

Construct a covariance matrix.

Cole et al. (2007) estimated correlations among test-day yields using a simplified model with an identity matrix (I) for daily measurement error and an autoregressive matrix (E) for biological change. E is defined as Eij = r ** |i-j| where i and j are test-day DIM and 0 < r < 1.

Element (i, j) is rho ** abs(i - j).

Arguments:

rho: AR(1) correlation parameter.
size: Matrix dimension.

Returns:

A (size, size) AR(1) correlation matrix.

def calculate_characteristic( dim, milkrecordings, model='wood', characteristic='cumulative_milk_yield', fitting='frequentist', key=None, parity=3, breed='H', continent='USA', custom_priors=None, milk_unit='kg', persistency_method='derived', lactation_length=305) -> float | None: View Source

323def calculate_characteristic(
324    dim,
325    milkrecordings,
326    model="wood",
327    characteristic="cumulative_milk_yield",
328    fitting="frequentist",
329    key=None,
330    parity=3,
331    breed="H",
332    continent="USA",
333    custom_priors=None,
334    milk_unit="kg",
335    persistency_method="derived",
336    lactation_length=305,
337) -> float | None:
338    """Evaluate a lactation curve characteristic from observed test-day data.
339
340    This function fits the requested model (frequentist or Bayesian via MilkBot),
341    retrieves model parameters, and evaluates the requested characteristic using the
342    symbolic expression (if available), falling back to numeric methods when needed.
343
344    Args:
345        dim (Int): Days in milk (DIM).
346        milkrecordings (Float): Milk recordings (kg or lbs) for each DIM.
347        model (str): Model name. Supported for this function:
348            'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
349        characteristic (str): One of 'time_to_peak', 'peak_yield',
350            'cumulative_milk_yield', 'persistency'.
351        fitting (str): 'frequentist' (default) or 'bayesian'.
352        key (str | None): API key for MilkBot Bayesian fitting.
353        parity (Int): Parity of the cow; values above 3 are considered as 3 (Bayesian).
354        breed (str): 'H' (Holstein) or 'J' (Jersey) (Bayesian).
355        custom_priors: provide your own priors for Bayesian fitting.
356            provide as dictionary using build_prior() function
357            to create the priors in the right format.
358            Alternative use priors from the literature provided by the string command 'CHEN'
359        milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
360        continent (str): 'USA' or 'EU' (Bayesian).
361        persistency_method (str): 'derived' (average slope after peak; default) or 'literature'
362            (only for Wood and MilkBot).
363        lactation_length (Int | str): Horizon for persistency calculation:
364            305 (default), 'max' (use max DIM in data), or an integer.
365
366    Returns:
367        float | None: The requested characteristic value.
368        Or None if value is negative (except persistency)
369        or cannot be computed symbolically.
370
371    Raises:
372        Exception: If inputs are invalid, the model/characteristic is unsupported,
373            an API key is missing for Bayesian fitting, or the characteristic cannot be computed.
374    """
375    # check and prepare input
376    inputs = validate_and_prepare_inputs(
377        dim,
378        milkrecordings,
379        model=model,
380        fitting=fitting,
381        breed=breed,
382        parity=parity,
383        continent=continent,
384        custom_priors=custom_priors,
385        milk_unit=milk_unit,
386        persistency_method=persistency_method,
387        lactation_length=lactation_length,
388    )
389
390    dim = inputs.dim
391    milkrecordings = inputs.milkrecordings
392    model = inputs.model
393    fitting = inputs.fitting
394    breed = inputs.breed
395    parity = inputs.parity
396    continent = inputs.continent
397    custom_priors = inputs.custom_priors
398    milk_unit = inputs.milk_unit
399    persistency_method = inputs.persistency_method
400    lactation_length = inputs.lactation_length
401
402    # After validation, these fields are guaranteed non-None for the paths below
403    assert model is not None
404    assert fitting is not None
405
406    if model not in ["milkbot", "wood", "wilmink", "ali_schaeffer", "fischer"]:
407        msg = (
408            "this function currently only works for"
409            " the milkbot, wood, wilmink,"
410            " ali_schaeffer and fischer models"
411        )
412        raise Exception(msg)
413
414    characteristic_options: list[str] = [
415        "time_to_peak",
416        "peak_yield",
417        "cumulative_milk_yield",
418        "persistency",
419    ]
420
421    if characteristic in characteristic_options:
422        if fitting == "frequentist":
423            # Get fitted parameters from your fitting function
424            fitted_params = get_lc_parameters(dim, milkrecordings, model)
425            assert fitted_params is not None
426
427            if characteristic != "persistency":
428                # Try symbolic formula first
429                assert isinstance(lactation_length, int)
430                try:
431                    expr, params, fn = lactation_curve_characteristic_function(
432                        model, characteristic, lactation_length
433                    )
434                    with np.errstate(
435                        divide="ignore", invalid="ignore"
436                    ):  # get rid of warnings for invalid operations
437                        value = fn(*fitted_params)
438                except Exception:
439                    value = None
440
441                # If symbolic formula fails or is invalid (use numeric approach)
442                if (
443                    value is None
444                    or not np.isfinite(value)
445                    or (characteristic == "time_to_peak" and value <= 0)
446                ):
447                    assert parity is not None
448                    assert breed is not None
449                    assert continent is not None
450                    if characteristic == "time_to_peak":
451                        value = numeric_time_to_peak(
452                            dim,
453                            milkrecordings,
454                            model,
455                            fitting=fitting,
456                            key=key,
457                            parity=parity,
458                            breed=breed,
459                            continent=continent,
460                        )
461                    elif characteristic == "peak_yield":
462                        value = numeric_peak_yield(
463                            dim,
464                            milkrecordings,
465                            model,
466                            fitting=fitting,
467                            key=key,
468                            parity=parity,
469                            breed=breed,
470                            continent=continent,
471                        )
472                    elif characteristic == "cumulative_milk_yield":
473                        value = numeric_cumulative_yield(
474                            dim,
475                            milkrecordings,
476                            model,
477                            fitting=fitting,
478                            lactation_length=lactation_length,
479                            key=key,
480                            parity=parity,
481                            breed=breed,
482                            continent=continent,
483                        )
484
485            else:
486                if persistency_method == "derived":
487                    # find lactation length from data
488                    if lactation_length == "max":
489                        lactation_length = max(dim)
490                    elif isinstance(lactation_length, int):
491                        lactation_length = lactation_length
492                    else:
493                        lactation_length = 305
494                    value = persistency_fitted_curve(
495                        dim,
496                        milkrecordings,
497                        model,
498                        fitting="frequentist",
499                        lactation_length=lactation_length,
500                    )
501
502                else:
503                    if model == "wood":
504                        value = persistency_wood(fitted_params[1], fitted_params[2])
505
506                    elif model == "milkbot":
507                        assert len(fitted_params) >= 4
508                        value = persistency_milkbot(fitted_params[3])
509
510                    else:
511                        msg = (
512                            "Currently only the Wood model"
513                            " and MilkBot model have a"
514                            " separate model function from"
515                            " the literature integrated for"
516                            " persistency. if"
517                            ' persistency="derived" is'
518                            " selected, persistency can be"
519                            " calculated for every model as"
520                            " the average slope of the"
521                            " lactation after the peak."
522                        )
523                        raise Exception(msg)
524            try:
525                if value is None:
526                    return None
527                if characteristic != "persistency" and value < 0:
528                    return None
529                return float(value)
530            except ValueError:
531                raise Exception("Could not compute characteristic")
532
533        else:
534            if model == "milkbot":
535                if key is None:
536                    raise Exception("Key needed to use Bayesian fitting engine MilkBot")
537                else:
538                    assert parity is not None
539                    assert breed is not None
540                    assert continent is not None
541                    assert isinstance(milk_unit, str)
542                    fitted_params_bayes = bayesian_fit_milkbot_single_lactation(
543                        dim,
544                        milkrecordings,
545                        key=key,
546                        parity=parity,
547                        breed=breed,
548                        custom_priors=custom_priors,
549                        continent=continent,
550                        milk_unit=milk_unit,
551                    )
552                    fitted_params_bayes = (
553                        fitted_params_bayes["scale"],
554                        fitted_params_bayes["ramp"],
555                        fitted_params_bayes["offset"],
556                        fitted_params_bayes["decay"],
557                    )
558
559                    if characteristic != "persistency":
560                        # Get the symbolic expression and model parameters
561                        try:
562                            expr, param_symbols, fn = lactation_curve_characteristic_function(
563                                model, characteristic
564                            )
565                            with np.errstate(divide="ignore", invalid="ignore"):
566                                value = fn(*fitted_params_bayes)
567                        except Exception:
568                            value = None
569
570                        if (
571                            value is None
572                            or not np.isfinite(value)
573                            or (characteristic == "time_to_peak" and value <= 0)
574                        ):
575                            if characteristic == "time_to_peak":
576                                value = numeric_time_to_peak(
577                                    dim,
578                                    milkrecordings,
579                                    model,
580                                    fitting=fitting,
581                                    key=key,
582                                    parity=parity,
583                                    breed=breed,
584                                    custom_priors=custom_priors,
585                                    milk_unit=milk_unit,
586                                    continent=continent,
587                                )
588                            elif characteristic == "peak_yield":
589                                value = numeric_peak_yield(
590                                    dim,
591                                    milkrecordings,
592                                    model,
593                                    fitting=fitting,
594                                    key=key,
595                                    parity=parity,
596                                    breed=breed,
597                                    custom_priors=custom_priors,
598                                    milk_unit=milk_unit,
599                                    continent=continent,
600                                )
601                            elif characteristic == "cumulative_milk_yield":
602                                if lactation_length == "max":
603                                    numeric_lactation_length = max(dim)
604                                elif isinstance(lactation_length, int):
605                                    numeric_lactation_length = lactation_length
606                                else:
607                                    numeric_lactation_length = 305
608                                value = numeric_cumulative_yield(
609                                    dim,
610                                    milkrecordings,
611                                    model,
612                                    fitting=fitting,
613                                    lactation_length=numeric_lactation_length,
614                                    key=key,
615                                    parity=parity,
616                                    breed=breed,
617                                    custom_priors=custom_priors,
618                                    milk_unit=milk_unit,
619                                    continent=continent,
620                                )
621
622                    else:
623                        if persistency_method == "derived":
624                            # find lactation length from data
625                            if lactation_length == "max":
626                                lactation_length = max(dim)
627                            elif isinstance(lactation_length, int):
628                                lactation_length = lactation_length
629                            else:
630                                lactation_length = 305
631                            assert isinstance(milk_unit, str)
632                            value = persistency_fitted_curve(
633                                dim,
634                                milkrecordings,
635                                model,
636                                fitting="bayesian",
637                                key=key,
638                                parity=parity,
639                                breed=breed,
640                                custom_priors=custom_priors,
641                                milk_unit=milk_unit,
642                                continent=continent,
643                                lactation_length=lactation_length,
644                            )
645
646                        else:
647                            value = persistency_milkbot(fitted_params_bayes[3])
648
649                try:
650                    if value is None:
651                        return None
652                    if characteristic != "persistency" and value < 0:
653                        return None
654                    return float(value)
655                except ValueError:
656                    raise Exception("Could not compute characteristic")
657            else:
658                raise Exception("Bayesian fitting is currently only implemented for MilkBot models")
659
660    else:
661        raise Exception("Unknown characteristic")

Evaluate a lactation curve characteristic from observed test-day data.

This function fits the requested model (frequentist or Bayesian via MilkBot), retrieves model parameters, and evaluates the requested characteristic using the symbolic expression (if available), falling back to numeric methods when needed.

Arguments:

dim (Int): Days in milk (DIM).
milkrecordings (Float): Milk recordings (kg or lbs) for each DIM.
model (str): Model name. Supported for this function: 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
characteristic (str): One of 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
fitting (str): 'frequentist' (default) or 'bayesian'.
key (str | None): API key for MilkBot Bayesian fitting.
parity (Int): Parity of the cow; values above 3 are considered as 3 (Bayesian).
breed (str): 'H' (Holstein) or 'J' (Jersey) (Bayesian).
custom_priors: provide your own priors for Bayesian fitting. provide as dictionary using build_prior() function to create the priors in the right format. Alternative use priors from the literature provided by the string command 'CHEN'
milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
continent (str): 'USA' or 'EU' (Bayesian).
persistency_method (str): 'derived' (average slope after peak; default) or 'literature' (only for Wood and MilkBot).
lactation_length (Int | str): Horizon for persistency calculation: 305 (default), 'max' (use max DIM in data), or an integer.

Returns:

float | None: The requested characteristic value. Or None if value is negative (except persistency) or cannot be computed symbolically.

Raises:

Exception: If inputs are invalid, the model/characteristic is unsupported, an API key is missing for Bayesian fitting, or the characteristic cannot be computed.

def center_lactation_data( milk_matrix: numpy.ndarray, standard_lc: numpy.ndarray, day_mean_method: str = 'standard_lc') -> numpy.ndarray: View Source

247def center_lactation_data(
248    milk_matrix: np.ndarray,
249    standard_lc: np.ndarray,
250    day_mean_method: str = "standard_lc",
251) -> np.ndarray:
252    """Center lactation yields before covariance estimation.
253
254    Args:
255        milk_matrix: Yield matrix with lactations in rows and days in columns.
256        standard_lc: Expected day-wise milk yield profile.
257        day_mean_method: Mean-centering strategy. Supported values are
258            ``"standard_lc"`` (default) and ``"data"``.
259
260    Returns:
261        A centered matrix with the same shape as ``milk_matrix``.
262
263    Raises:
264        ValueError: If ``day_mean_method`` is not supported.
265    """
266    if day_mean_method == "standard_lc":
267        day_mean = standard_lc
268    elif day_mean_method == "data":
269        day_mean = np.nanmean(milk_matrix, axis=0)
270    else:
271        raise ValueError("day_mean_method must be 'standard_lc' or 'data'.")
272
273    return milk_matrix - day_mean

Center lactation yields before covariance estimation.

Arguments:

milk_matrix: Yield matrix with lactations in rows and days in columns.
standard_lc: Expected day-wise milk yield profile.
day_mean_method: Mean-centering strategy. Supported values are "standard_lc" (default) and "data".

Returns:

A centered matrix with the same shape as milk_matrix.

Raises:

ValueError: If day_mean_method is not supported.

def create_standard_lc_representation( df: pandas.core.frame.DataFrame, standard_lactation_curve: pandas.core.series.Series, days_in_milk_col: str, milking_yield_col: str, interpolation_method: lactationcurve.characteristics.ISLC.InterpolationMethod = <function interpolation_standard_lc>, lc_model: str = 'Wilmink', small_grid: bool = False) -> tuple[pandas.core.frame.DataFrame, NDArray[numpy.float64], pandas.core.series.Series]: View Source

 936def create_standard_lc_representation(
 937    df: pd.DataFrame,
 938    standard_lactation_curve: pd.Series,
 939    days_in_milk_col: str,
 940    milking_yield_col: str,
 941    interpolation_method: InterpolationMethod = interpolation_standard_lc,
 942    lc_model: str = "Wilmink",
 943    small_grid: bool = False,
 944) -> tuple[pd.DataFrame, npt.NDArray[np.float64], pd.Series]:
 945    """Create a standard lactation curve with correlation matrix from data.
 946
 947    This function estimates the correlation structure and standard deviations
 948    of milk yields across grid days, and refits a Wilmink lactation curve model to
 949    the interpolated data. The Wilmink model is hereby the default,
 950    but any model from the LC package can be used.
 951    The outputs can be used as inputs to the
 952    ``ISLC_method`` or ``ISLC_original`` functions for predicting 305-d yields.
 953
 954    The process standardizes yields per animal and computes row-wise
 955    (per-animal) statistics, then derives relationships across grid days.
 956
 957    Args:
 958        df: Input DataFrame containing test-day records. Must include a
 959            ``TestId`` column to identify unique lactations. Also requires
 960            columns specified by ``days_in_milk_col`` and ``milking_yield_col``.
 961        standard_lactation_curve: A Series providing a reference lactation
 962            curve (e.g., the fitted Wilmink curve on which to base
 963            interpolation).
 964        days_in_milk_col: Name of the column in ``df`` containing days in milk (DIM).
 965        milking_yield_col: Name of the column in ``df`` containing measured
 966            milk yields.
 967        interpolation_method: An interpolation method conforming to
 968            ``InterpolationMethod`` protocol (default: ``interpolation_standard_lc``).
 969            Must be callable with signature
 970            (group, days_in_milk_col, milking_yield_col, standard_lc).
 971        lc_model: The lactation curve model to fit to the interpolated data. Default is "Wilmink".
 972        small_grid: If True, build representation on GRID_DAYS only.
 973
 974    Returns:
 975        A tuple of three elements:
 976        - corr (pd.DataFrame): Correlation matrix between grid-day yields.
 977        - std_per_grid_day (np.ndarray): Standard deviations of standardized
 978          yields per grid day.
 979        - standard_lactation_curve_grid (pd.Series): Refitted Wilmink model
 980          indexed by DIM (1–305).
 981
 982    Notes:
 983        - Expects ``df`` to have a ``TestId`` column to group lactations.
 984        - Performs standardization per animal (row-wise), so output
 985          correlations and standard deviations reflect between-day variation
 986          within standardized animal profiles.
 987        - Uses ``fit_lactation_curve`` from the ``lactationcurve`` package
 988          with model='Wilmink' and fitting='frequentist'.
 989          However other models can be used by specifying the ``lc_model`` argument.
 990    """
 991
 992    if "TestId" not in df.columns:
 993        raise ValueError("DataFrame must contain a 'TestId' column.")
 994    if days_in_milk_col not in df.columns or milking_yield_col not in df.columns:
 995        raise ValueError(f"Columns '{days_in_milk_col}' and '{milking_yield_col}' must be in df.")
 996
 997    interpolated_groups: list[pd.DataFrame] = []
 998    for test_id, group in df.groupby("TestId", sort=False):
 999        try:
1000            interp_group = interpolation_method(
1001                group,
1002                days_in_milk_col=days_in_milk_col,
1003                milking_yield_col=milking_yield_col,
1004                standard_lc=standard_lactation_curve,
1005                small_grid=small_grid,
1006            )
1007        except TypeError:
1008            # Backward compatibility for custom interpolation callables
1009            # that do not yet accept small_grid.
1010            interp_group = interpolation_method(
1011                group,
1012                days_in_milk_col=days_in_milk_col,
1013                milking_yield_col=milking_yield_col,
1014                standard_lc=standard_lactation_curve,
1015            )
1016        if interp_group.empty:
1017            continue
1018        if "TestId" not in interp_group.columns:
1019            interp_group = interp_group.copy()
1020            interp_group["TestId"] = test_id
1021        interpolated_groups.append(interp_group)
1022
1023    if not interpolated_groups:
1024        raise ValueError("No interpolated rows were produced; cannot build representation.")
1025
1026    df_grid = pd.concat(interpolated_groups, ignore_index=True)
1027
1028    # define Znp
1029    # pivot df to make GridDay the columns
1030    # Z should be the unscaled pivot table (TestId × GridDay)
1031    Z = df_grid.pivot(index="TestId", columns="GridDay", values="MilkYieldInterp")
1032
1033    # Standardize records per cow (so each row has mean=0, std=1)
1034    Z_std = (Z.sub(Z.mean(axis=1), axis=0)).div(Z.std(axis=1), axis=0)
1035
1036    # Convert to NumPy array
1037    Znp = Z_std.to_numpy()
1038
1039    # calculate correlation matrix
1040    corr = pd.DataFrame(Znp, columns=Z.columns).corr()
1041
1042    # calculate standard deviations for each grid day from the Znp matrix
1043    std_per_grid_day = np.nanstd(Znp, axis=0)  # ignores NaN
1044
1045    # fit Wilmink model to get mean values and std for each cow
1046    standard_lactation_curve_grid = pd.Series(
1047        fit_lactation_curve(
1048            df_grid["GridDay"].values,
1049            df_grid["MilkYieldInterp"].values,
1050            model=lc_model,
1051            fitting="frequentist",
1052        ),
1053        index=range(1, 306),
1054    )
1055    return corr, std_per_grid_day, standard_lactation_curve_grid

Create a standard lactation curve with correlation matrix from data.

This function estimates the correlation structure and standard deviations of milk yields across grid days, and refits a Wilmink lactation curve model to the interpolated data. The Wilmink model is hereby the default, but any model from the LC package can be used. The outputs can be used as inputs to the ISLC_method or ISLC_original functions for predicting 305-d yields.

The process standardizes yields per animal and computes row-wise (per-animal) statistics, then derives relationships across grid days.

Arguments:

df: Input DataFrame containing test-day records. Must include a TestId column to identify unique lactations. Also requires columns specified by days_in_milk_col and milking_yield_col.
standard_lactation_curve: A Series providing a reference lactation curve (e.g., the fitted Wilmink curve on which to base interpolation).
days_in_milk_col: Name of the column in df containing days in milk (DIM).
milking_yield_col: Name of the column in df containing measured milk yields.
interpolation_method: An interpolation method conforming to InterpolationMethod protocol (default: interpolation_standard_lc). Must be callable with signature (group, days_in_milk_col, milking_yield_col, standard_lc).
lc_model: The lactation curve model to fit to the interpolated data. Default is "Wilmink".
small_grid: If True, build representation on GRID_DAYS only.

Returns:

A tuple of three elements:

corr (pd.DataFrame): Correlation matrix between grid-day yields.

std_per_grid_day (np.ndarray): Standard deviations of standardized yields per grid day.

standard_lactation_curve_grid (pd.Series): Refitted Wilmink model indexed by DIM (1–305).

Notes:

Expects df to have a TestId column to group lactations.

Performs standardization per animal (row-wise), so output correlations and standard deviations reflect between-day variation within standardized animal profiles.

Uses fit_lactation_curve from the lactationcurve package with model='Wilmink' and fitting='frequentist'. However other models can be used by specifying the lc_model argument.

def fit_autocorrelation_matrix( df: pandas.core.frame.DataFrame, standard_lc: numpy.ndarray) -> dict[str, numpy.ndarray | float]: View Source

299def fit_autocorrelation_matrix(
300    df: pd.DataFrame, standard_lc: np.ndarray
301) -> dict[str, np.ndarray | float]:
302    """Estimate covariance parameters for best prediction.
303
304    The model is ``B = b1 * I + b2 * E`` where ``E`` is an AR(1) correlation
305    matrix. Parameters are optimized in transformed space and mapped back to
306    enforce ``b1 > 0``, ``b2 > 0``, and ``0 < rho < 1``.
307
308    Args:
309        df: Reference milk-recording dataframe.
310        standard_lc: Population mean curve used for centering.
311
312    Returns:
313        Dictionary with:
314        - ``"B_hat"``: fitted covariance matrix.
315        - ``"R_hat"``: correlation matrix derived from ``B_hat``.
316        - ``"b1"``, ``"b2"``, ``"rho"``: fitted scalar parameters.
317    """
318    milk_matrix = pivot_milk_recordings_to_matrix(df)
319    centered_matrix = center_lactation_data(milk_matrix, standard_lc)
320    n_lactations, n_days = centered_matrix.shape
321    observed_indices = [np.where(~np.isnan(centered_matrix[i]))[0] for i in range(n_lactations)]
322
323    def negative_log_likelihood(params: np.ndarray) -> float:
324        p_b1, p_b2, p_rho = params
325        b1 = float(np.exp(p_b1))
326        b2 = float(np.exp(p_b2))
327        rho = float(1 / (1 + np.exp(-p_rho)))  # now rho in (0,1)
328        correlation_matrix = build_covariance_matrix(rho, n_days)
329
330        total = 0.0
331        for lactation_idx, day_indices in enumerate(observed_indices):
332            observation_count = len(day_indices)
333            if observation_count == 0:
334                continue
335
336            observations = centered_matrix[lactation_idx, day_indices]
337            correlation_subset = correlation_matrix[np.ix_(day_indices, day_indices)]
338            sigma = b1 * np.eye(observation_count) + b2 * correlation_subset
339
340            # Numerical safeguards: try Cholesky and penalize non-PD parameters.
341            try:
342                cholesky_factor, lower = cho_factor(sigma, check_finite=False)
343                solution = cho_solve((cholesky_factor, lower), observations, check_finite=False)
344            except LinAlgError:
345                # penalty for non-PD
346                return float(1e12 + np.sum(np.abs(params)))
347
348            quadratic_form = float(observations @ solution)
349            log_determinant = 2.0 * np.sum(np.log(np.diag(cholesky_factor)))
350            total += 0.5 * (
351                log_determinant + quadratic_form + observation_count * np.log(2 * np.pi)
352            )
353
354        # return total negative log-likelihood
355        return float(total)
356
357    # initial guesses and optimization. A 50/50 split in variance is assumed as starting point
358    initial_variance = max(float(np.nanvar(centered_matrix)), 1e-6)
359    initial_params = [
360        np.log(0.5 * initial_variance),
361        np.log(0.5 * initial_variance),
362        0.5,
363    ]
364
365    result = minimize(
366        negative_log_likelihood,
367        x0=initial_params,
368        method="L-BFGS-B",
369        options={"maxiter": 2000, "ftol": 1e-8},
370    )
371
372    if not result.success:
373        print(f"Optimization warning: {result.message}")
374
375    log_b1_hat, log_b2_hat, logit_rho_hat = result.x
376    b1_hat = float(np.exp(log_b1_hat))
377    b2_hat = float(np.exp(log_b2_hat))
378    rho_hat = float(1 / (1 + np.exp(-logit_rho_hat)))
379    correlation_matrix = build_covariance_matrix(rho_hat, n_days)
380    covariance_matrix = b1_hat * np.eye(n_days) + b2_hat * correlation_matrix
381
382    # convert to correlation matrix
383    std = np.sqrt(np.diag(covariance_matrix))
384    correlation_matrix = covariance_matrix / np.outer(std, std)
385
386    return {
387        "B_hat": covariance_matrix,
388        "R_hat": correlation_matrix,
389        "b1": b1_hat,
390        "b2": b2_hat,
391        "rho": rho_hat,
392    }

Estimate covariance parameters for best prediction.

The model is B = b1 * I + b2 * E where E is an AR(1) correlation matrix. Parameters are optimized in transformed space and mapped back to enforce b1 > 0, b2 > 0, and 0 < rho < 1.

Arguments:

df: Reference milk-recording dataframe.
standard_lc: Population mean curve used for centering.

Returns:

Dictionary with:

"B_hat": fitted covariance matrix.

"R_hat": correlation matrix derived from B_hat.

"b1", "b2", "rho": fitted scalar parameters.

def fit_standard_lc(df: pandas.core.frame.DataFrame, lc_model: str = 'Wood') -> numpy.ndarray: View Source

216def fit_standard_lc(df: pd.DataFrame, lc_model: str = "Wood") -> np.ndarray:
217    """Fit a population-level standard lactation curve.
218
219    The curve is fit with the package's frequentist Wood model and returned on
220    the fixed day-1..305 grid.
221
222    Args:
223        df: Reference dataframe containing ``DaysInMilk`` and ``MilkingYield``.
224        lc_model: The lactation curve model to fit.
225        Default is the "Wood" lactation curve model.
226
227    Returns:
228        A NumPy array of expected daily milk yield for days 1..305.
229
230    Notes:
231        This mean curve acts as the baseline in best prediction. Individual
232        lactations are represented as deviations around this population profile.
233    """
234    standard_lc = pd.Series(
235        fit_lactation_curve(
236            df["DaysInMilk"].values,
237            df["MilkingYield"].values,
238            model=lc_model,
239            fitting="frequentist",
240        ),
241        index=range(1, 306),
242    )
243
244    return standard_lc.to_numpy(dtype=float)

Fit a population-level standard lactation curve.

The curve is fit with the package's frequentist Wood model and returned on the fixed day-1..305 grid.

Arguments:

df: Reference dataframe containing DaysInMilk and MilkingYield.
lc_model: The lactation curve model to fit.
Default is the "Wood" lactation curve model.

Returns:

A NumPy array of expected daily milk yield for days 1..305.

Notes:

This mean curve acts as the baseline in best prediction. Individual lactations are represented as deviations around this population profile.

def interpolation_standard_lc( group: pandas.core.frame.DataFrame, days_in_milk_col: str, milking_yield_col: str, standard_lc: pandas.core.series.Series | NDArray[numpy.float64] | None = None, small_grid: bool = False) -> pandas.core.frame.DataFrame: View Source

703def interpolation_standard_lc(
704    group: pd.DataFrame,
705    days_in_milk_col: str,
706    milking_yield_col: str,
707    standard_lc: pd.Series | npt.NDArray[np.float64] | None = None,
708    small_grid: bool = False,
709) -> pd.DataFrame:
710    """Interpolate a single lactation onto the DIM grid guided by a standard curve.
711
712    The function interpolates measured yields for one animal onto the
713    predefined grid of DIM values. When a grid day coincides with a measured
714    DIM the observed yield is used; otherwise the interpolation adjusts the
715    linear slope between neighboring measurements by the difference in the
716    standard curve, inspired by the CRV E2 approach.
717
718    The interpolation formula applied for a grid day ``gday`` between two
719    measured points (x1,y1) and (x2,y2) is::
720
721        slope = ((y2 - y1) - (g2 - g1)) / (x2 - x1)
722        yi = gi + (slope * (gday - x1) + (y1 - g1))
723
724    Args:
725        group: DataFrame with test-day records for a single animal. Must
726            contain columns named by ``days_in_milk_col`` and
727            ``milking_yield_col``.
728        days_in_milk_col: Name of the DIM column in ``group``.
729        milking_yield_col: Name of the milk-yield column in ``group``.
730        standard_lc: A Series indexed by DIM providing the expected yield on
731            each grid day; used to guide the interpolation shape.
732        small_grid: If True, interpolate on GRID_DAYS only; otherwise include
733            day 0 and day 305 in the interpolation target grid.
734
735    Returns:
736        A DataFrame with one row per interpolated grid day containing the
737        original identifying columns (from the first row of ``group``), plus
738        the columns ``GridDay`` and ``MilkYieldInterp``.
739
740    Notes:
741        - No interpolation is performed outside the range bounded by the
742          first and last measured DIM for the lactation (those grid days are
743          skipped).
744                - No interpolation is performed for grid days where the right
745                    neighboring measured DIM is >= 305, because this lies outside the
746                    intended standard-curve support.
747        - The implementation defines a local default ``grid`` of
748          [10, 30, ..., 290].
749
750
751    """
752    if small_grid:
753        grid = GRID_DAYS
754    else:
755        # add zero and 305 to the grid
756        grid = [0] + GRID_DAYS + [305]
757
758    if standard_lc is None:
759        raise ValueError("A standard lactation curve is required for interpolation_standard_lc.")
760
761    standard_lc_series = _curve_to_series(standard_lc)
762
763    # Sort and ensure unique DIMs
764    group = group.sort_values(days_in_milk_col).drop_duplicates(days_in_milk_col)
765    dims = group[days_in_milk_col].tolist()
766
767    rows = []
768
769    # loop over the grid days
770    for gday in grid:
771        # --- CASE 1: Exact measured day ---------------------------
772        if gday in dims:
773            y_val = group.loc[group[days_in_milk_col] == gday, milking_yield_col].iloc[0]
774
775            row = group.iloc[0].to_dict()
776            row["GridDay"] = gday
777            row["MilkYieldInterp"] = float(y_val)
778            rows.append(row)
779            continue
780
781        # --- CASE 2: interpolate between measured days -------------
782        before_df = cast(pd.DataFrame, group.loc[group[days_in_milk_col] < gday])
783        after_df = cast(pd.DataFrame, group.loc[group[days_in_milk_col] > gday])
784        before = before_df.tail(1)
785        after = after_df.head(1)
786
787        # CASE 3 No interpolation if grid day is outside the range of measured DIM
788        if before.empty or after.empty:
789            continue
790
791        # CASE 4: No interpolation if the day after is day 305 or higher,
792        # as this is outside the range of the standard curve
793        if float(after.iloc[0][days_in_milk_col]) >= 305:
794            continue
795
796        x1 = float(before.iloc[0][days_in_milk_col])
797        y1 = float(before.iloc[0][milking_yield_col])
798        x2 = float(after.iloc[0][days_in_milk_col])
799        y2 = float(after.iloc[0][milking_yield_col])
800
801        # expected yields from standard lactation curve
802        g1 = _curve_value(standard_lc_series, int(x1))
803        g2 = _curve_value(standard_lc_series, int(x2))
804        gi = _curve_value(standard_lc_series, int(gday))
805
806        # Wilmink-based interpolation formula
807        slope = ((y2 - y1) - (g2 - g1)) / (x2 - x1)
808        yi = gi + (slope * (gday - x1) + (y1 - g1))
809
810        row = group.iloc[0].to_dict()
811        row["GridDay"] = gday
812        row["MilkYieldInterp"] = float(yi)
813        rows.append(row)
814
815    if not rows:
816        return pd.DataFrame(
817            columns=pd.Index([*group.columns.to_list(), "GridDay", "MilkYieldInterp"])
818        )
819
820    return pd.DataFrame(rows)

Interpolate a single lactation onto the DIM grid guided by a standard curve.

The function interpolates measured yields for one animal onto the predefined grid of DIM values. When a grid day coincides with a measured DIM the observed yield is used; otherwise the interpolation adjusts the linear slope between neighboring measurements by the difference in the standard curve, inspired by the CRV E2 approach.

The interpolation formula applied for a grid day gday between two measured points (x1,y1) and (x2,y2) is::

slope = ((y2 - y1) - (g2 - g1)) / (x2 - x1)
yi = gi + (slope * (gday - x1) + (y1 - g1))

Arguments:

group: DataFrame with test-day records for a single animal. Must contain columns named by days_in_milk_col and milking_yield_col.
days_in_milk_col: Name of the DIM column in group.
milking_yield_col: Name of the milk-yield column in group.
standard_lc: A Series indexed by DIM providing the expected yield on each grid day; used to guide the interpolation shape.
small_grid: If True, interpolate on GRID_DAYS only; otherwise include day 0 and day 305 in the interpolation target grid.

Returns:

A DataFrame with one row per interpolated grid day containing the original identifying columns (from the first row of group), plus the columns GridDay and MilkYieldInterp.

Notes:

No interpolation is performed outside the range bounded by the first and last measured DIM for the lactation (those grid days are skipped). - No interpolation is performed for grid days where the right neighboring measured DIM is >= 305, because this lies outside the intended standard-curve support.

The implementation defines a local default grid of [10, 30, ..., 290].

def lactation_curve_characteristic_function(model='wood', characteristic=None, lactation_length=305) -> tuple: View Source

 95def lactation_curve_characteristic_function(
 96    model="wood", characteristic=None, lactation_length=305
 97) -> tuple:
 98    """Build (or fetch from cache) a symbolic expression and fast numeric function for an LCC.
 99
100    This function derives the requested **lactation curve characteristic** for a given
101    model using SymPy (derivative / root finding / integration). It returns the symbolic
102    expression, the tuple of parameter symbols (argument order), and a lambdified
103    numeric function that can be evaluated with numerical parameters.
104
105    The symbolic derivation and integration are done only once per (model, characteristic)
106    and then **cached** for reuse.
107
108    Args:
109        model (str): Model name. Options:
110            'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer',
111            'brody', 'sikka', 'nelder', 'dhanoa', 'emmans', 'hayashi',
112            'rook', 'dijkstra', 'prasad'.
113        characteristic (str | None): Desired characteristic. Options:
114            'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
115            If `None` or unrecognized, a dict of all available characteristics is returned
116            (with `persistency` possibly `None` if derivation is not feasible).
117        lactation_length (int): Length of lactation in days used in persistency
118            and cumulative milk yield computation (default 305).
119
120    Returns:
121        tuple:
122            expr: SymPy expression (or dict of expressions if `characteristic` is None).
123            params: Tuple of SymPy symbols for model parameters (argument order).
124            func: Lambdified numeric function `f(*params)` (or dict of functions).
125
126    Raises:
127        Exception: If the model is unknown, or if no positive real solution for
128            peak timing/yield exists where required. In this case None is returned
129             for the characteristic,
130             and the function can be evaluated with numeric methods as a fallback.
131    """
132    # check the cache
133    storage = (model, characteristic)
134    if storage in _LCC_CACHE:
135        return (
136            _LCC_CACHE[storage]["expr"],
137            _LCC_CACHE[storage]["params"],
138            _LCC_CACHE[storage]["func"],
139        )
140
141    # make sure model is all lowercase
142    model = model.lower()
143
144    # define functions
145    if model == "brody":
146        # === BRODY 1 ===
147        a, b, k1, k2, t = symbols("a b k1 k2 t", real=True, positive=True)
148        function = a * exp(-k1 * t) - b * exp(-k2 * t)
149
150    elif model == "sikka":
151        # === SIKKA ===
152        a, b, c, t = symbols("a b c t", real=True, positive=True)
153        function = a * exp(b * t - c * t**2)
154
155    elif model == "fischer":
156        # === FISCHER ===
157        a, b, c, t = symbols("a b c t", real=True, positive=True)
158        function = a - b * t - a * exp(-c * t)
159
160    elif model == "nelder":
161        # === NELDER ===
162        a, b, c, t = symbols("a b c t", real=True, positive=True)
163        function = t / (a + b * t + c * t**2)
164
165    elif model == "wood":
166        # === WOOD ===
167        a, b, c, t = symbols("a b c t", real=True, positive=True)
168        function = a * t**b * exp(-c * t)
169
170    elif model == "dhanoa":
171        # === DHANOA ===
172        a, b, c, t = symbols("a b c t", real=True, positive=True)
173        function = a * t ** (b * c) * exp(-c * t)
174
175    elif model == "emmans":
176        # === EMMANS ===
177        a, b, c, d, t = symbols("a b c d t", real=True, positive=True)
178        function = a * exp(-exp(d - b * t)) * exp(-c * t)
179
180    elif model == "ali_schaeffer":
181        # ====ALI=====
182        a, b, c, d, k, t = symbols("a b c d k t", real=True)
183
184        t1 = t / 305
185        w1 = log(305 / t)
186
187        function = a + b * t1 + c * t1**2 + d * w1 + k * w1**2
188
189    elif model == "wilmink":
190        # === WILMINK ===
191        a, c, k, t = symbols("a c k t", real=True, positive=True)
192        b = symbols("b", real=True)  # b can be negative in Wilmink
193        function = a + b * exp(-k * t) - c * t
194
195    elif model == "hayashi":
196        # === HAYASHI ===
197        a, b, c, t = symbols("a b c t", real=True, positive=True)
198        function = b * (exp(-t / c) - exp(-t / (a * c)))
199
200    elif model == "rook":
201        # === ROOK ===
202        a, b, c, d, t = symbols("a b c d t", real=True, positive=True)
203        function = a * (1 / (1 + b / (c + t))) * exp(-d * t)
204
205    elif model == "dijkstra":
206        # === DIJKSTRA ===
207        a, b, c, d, t = symbols("a b c d t", real=True, positive=True)
208        function = a * exp((b * (1 - exp(-c * t)) / c) - d * t)
209
210    elif model == "prasad":
211        # === PRASAD ===
212        a, b, c, d, t = symbols("a b c d t", real=True, positive=True)
213        function = a + b * t + c * t**2 + d / t
214
215    elif model == "milkbot":
216        # === MILKBOT ===
217        a, b, c, d, t = symbols("a b c d t", real=True, positive=True)
218        function = a * (1 - exp((c - t) / b) / 2) * exp(-d * t)
219
220    else:
221        raise Exception("Unknown model")
222
223    # solve derivative for when it is zero to find the function for time of peak
224    fdiff = diff(function, t)
225
226    tpeak = None  # default
227
228    valid_roots = []
229
230    try:
231        candidate_roots = solve(fdiff, t)
232
233        for r in candidate_roots:
234            r_simplified = simplify(r)
235
236            if r_simplified.is_real is not False:
237                valid_roots.append(r_simplified)
238
239    except Exception:
240        valid_roots = []
241
242    tpeak = valid_roots[0] if valid_roots else None
243
244    # override to prevent sympy error
245    if model == "dijkstra":
246        tpeak = simplify(log(b / d) / c)
247
248    # define the end of lactation
249    T = lactation_length  # days in milk
250
251    # Persistency = average slope after peak, does not work for all models so therefore try except
252    persistency = None
253    try:
254        if tpeak:
255            tmp = (function.subs(t, T) - function.subs(t, tpeak)) / (T - tpeak)
256            tmp = tmp.cancel()  # light simplification
257            if is_valid_sympy_expr(tmp):
258                persistency = tmp
259    except Exception:
260        persistency = None
261
262    if characteristic != "cumulative_milk_yield":
263        if tpeak:  # Check if the list is not empty
264            peak_expr = simplify(function.subs(t, tpeak))
265        else:
266            raise Exception("No real solution for time to peak and peak yield found")
267
268    # find function for cumulative milk yield of the lactation.
269    # A lactation length of 305 is the default, but this value can
270    # also be provided as a parameter in the characteristic function.
271    cum_my_expr = integrate(function, (t, 0, lactation_length), meijerg=False)
272
273    if cum_my_expr == 0:
274        cum_my_expr = Integral(function, (t, 0, lactation_length))
275
276    # Sorted parameter list (exclude t)
277    params = tuple(
278        sorted([s for s in function.free_symbols if s.name != "t"], key=lambda x: x.name)
279    )
280
281    # ----------------------------------------------------
282    # Select requested characteristic
283    # ----------------------------------------------------
284    if characteristic == "time_to_peak":
285        expr = tpeak
286    elif characteristic == "peak_yield":
287        expr = peak_expr
288    elif characteristic == "cumulative_milk_yield":
289        expr = cum_my_expr
290    elif characteristic == "persistency":
291        if persistency is None:
292            raise Exception("Persistency could not be computed symbolically")
293        expr = persistency
294    else:
295        # Return all four if None or 'all'
296        expr = {
297            "time_to_peak": tpeak,
298            "peak_yield": peak_expr,
299            "persistency": persistency,  # possibly None
300            "cumulative_milk_yield": cum_my_expr,
301        }
302
303    # ----------------------------------------------------
304    # Build fast numeric function with lambdify
305    # ----------------------------------------------------
306    if isinstance(expr, dict):
307        func = {
308            name: lambdify(params, ex, modules=["numpy", "scipy"])
309            for name, ex in expr.items()
310            if ex is not None
311        }
312    else:
313        func = lambdify(params, expr, modules=["numpy", "scipy"])
314
315    # ----------------------------------------------------
316    # Store in cache
317    # ----------------------------------------------------
318    _LCC_CACHE[storage] = {"expr": expr, "params": params, "func": func}
319
320    return expr, params, func

Build (or fetch from cache) a symbolic expression and fast numeric function for an LCC.

This function derives the requested lactation curve characteristic for a given model using SymPy (derivative / root finding / integration). It returns the symbolic expression, the tuple of parameter symbols (argument order), and a lambdified numeric function that can be evaluated with numerical parameters.

The symbolic derivation and integration are done only once per (model, characteristic) and then cached for reuse.

Arguments:

model (str): Model name. Options: 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer', 'brody', 'sikka', 'nelder', 'dhanoa', 'emmans', 'hayashi', 'rook', 'dijkstra', 'prasad'.
characteristic (str | None): Desired characteristic. Options: 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'. If None or unrecognized, a dict of all available characteristics is returned (with persistency possibly None if derivation is not feasible).
lactation_length (int): Length of lactation in days used in persistency and cumulative milk yield computation (default 305).

Returns:

tuple: expr: SymPy expression (or dict of expressions if characteristic is None). params: Tuple of SymPy symbols for model parameters (argument order). func: Lambdified numeric function f(*params) (or dict of functions).

Raises:

Exception: If the model is unknown, or if no positive real solution for peak timing/yield exists where required. In this case None is returned for the characteristic, and the function can be evaluated with numeric methods as a fallback.

def linear_interpd_all_to_grid( group: pandas.core.frame.DataFrame, days_in_milk_col: str, milking_yield_col: str, standard_lc: pandas.core.series.Series | None = None, small_grid: bool = False) -> pandas.core.frame.DataFrame: View Source

823def linear_interpd_all_to_grid(
824    group: pd.DataFrame,
825    days_in_milk_col: str,
826    milking_yield_col: str,
827    standard_lc: pd.Series | None = None,
828    small_grid: bool = False,
829) -> pd.DataFrame:
830    """Linearly interpolate all grid days for a lactation.
831
832    This helper uses linear interpolation (with extrapolation) to produce
833    milk-yield values for every grid day regardless of whether the grid day
834    lies between measured observations.
835
836    Args:
837        group: DataFrame containing measured DIM and yield values for one
838            lactation.
839        days_in_milk_col: Name of the DIM column.
840        milking_yield_col: Name of the milk-yield column.
841        small_grid: If True, interpolate on GRID_DAYS only; otherwise include
842            day 0 and day 305.
843
844    Returns:
845        A DataFrame with identifying columns copied from ``group``'s first
846        row and columns ``GridDay`` and ``MilkYieldInterp`` containing the
847        interpolated (or extrapolated) yields for every grid day.
848    """
849    if small_grid:
850        grid = GRID_DAYS
851    else:
852        # add zero and 305 to the grid
853        grid = [0] + GRID_DAYS + [305]
854
855    group = group.sort_values(days_in_milk_col).drop_duplicates(subset=days_in_milk_col)
856    f = interp1d(
857        group[days_in_milk_col],
858        group[milking_yield_col],
859        kind="linear",
860        fill_value=cast(Any, "extrapolate"),
861    )
862
863    base = {
864        col: group[col].iloc[0]
865        for col in group.columns
866        if col not in [days_in_milk_col, milking_yield_col]
867    }
868    base["GridDay"] = grid
869    base["MilkYieldInterp"] = f(grid)
870
871    return pd.DataFrame(base)

Linearly interpolate all grid days for a lactation.

This helper uses linear interpolation (with extrapolation) to produce milk-yield values for every grid day regardless of whether the grid day lies between measured observations.

Arguments:

group: DataFrame containing measured DIM and yield values for one lactation.
days_in_milk_col: Name of the DIM column.
milking_yield_col: Name of the milk-yield column.
small_grid: If True, interpolate on GRID_DAYS only; otherwise include day 0 and day 305.

Returns:

A DataFrame with identifying columns copied from group's first row and columns GridDay and MilkYieldInterp containing the interpolated (or extrapolated) yields for every grid day.

def linear_interpd_closest_to_grid( group: pandas.core.frame.DataFrame, days_in_milk_col: str, milking_yield_col: str, standard_lc: pandas.core.series.Series | None = None, small_grid: bool = False) -> pandas.core.frame.DataFrame | None: View Source

876def linear_interpd_closest_to_grid(
877    group: pd.DataFrame,
878    days_in_milk_col: str,
879    milking_yield_col: str,
880    standard_lc: pd.Series | None = None,
881    small_grid: bool = False,
882) -> pd.DataFrame | None:
883    """Linearly interpolate grid days between measured observations.
884
885    This helper returns interpolated yields only for grid days that lie
886    between the first and last measured DIM for the lactation. If no grid
887    days fall within the measured range the function returns ``None``.
888
889    Args:
890        group: DataFrame for a single lactation.
891        days_in_milk_col: Name of the DIM column.
892        milking_yield_col: Name of the milk-yield column.
893        small_grid: If True, use GRID_DAYS only; otherwise include
894            day 0 and day 305.
895
896    Returns:
897        A DataFrame with identifying columns plus ``GridDay`` and
898        ``MilkYieldInterp``, or ``None`` if there are no grid days within
899        the measured range.
900    """
901    if small_grid:
902        grid = GRID_DAYS
903    else:
904        # add zero and 305 to the grid
905        grid = [0] + GRID_DAYS + [305]
906
907    group = group.sort_values(days_in_milk_col).drop_duplicates(subset=days_in_milk_col)
908
909    # Find the range of interpolatable DIM
910    min_dim = group[days_in_milk_col].min()
911    max_dim = group[days_in_milk_col].max()
912    grid_in_range = [g for g in grid if min_dim <= g <= max_dim]
913
914    # apply linear interpolation
915    if not grid_in_range:
916        return None
917    f = interp1d(
918        group[days_in_milk_col],
919        group[milking_yield_col],
920        kind="linear",
921        fill_value=cast(Any, np.nan),
922    )
923
924    # create a new dataframe with the newly created columns
925    base = {
926        col: group[col].iloc[0]
927        for col in group.columns
928        if col not in [days_in_milk_col, milking_yield_col]
929    }
930    base["GridDay"] = grid_in_range
931    base["MilkYieldInterp"] = f(grid_in_range)
932
933    return pd.DataFrame(base)

Linearly interpolate grid days between measured observations.

This helper returns interpolated yields only for grid days that lie between the first and last measured DIM for the lactation. If no grid days fall within the measured range the function returns None.

Arguments:

group: DataFrame for a single lactation.
days_in_milk_col: Name of the DIM column.
milking_yield_col: Name of the milk-yield column.
small_grid: If True, use GRID_DAYS only; otherwise include day 0 and day 305.

Returns:

A DataFrame with identifying columns plus GridDay and MilkYieldInterp, or None if there are no grid days within the measured range.

def numeric_cumulative_yield( dim, milkrecordings, model, fitting='frequentist', lactation_length=305, **kwargs) -> float: View Source

720def numeric_cumulative_yield(
721    dim, milkrecordings, model, fitting="frequentist", lactation_length=305, **kwargs
722) -> float:
723    """Compute cumulative milk yield numerically over a given horizon.
724
725    Adds up the fitted milk yield for the first `lactation_length` days of the
726    predicted yield curve.
727
728    Args:
729        dim: DIM values.
730        milkrecordings: Milk recordings (kg).
731        model: Model name.
732        fitting: 'frequentist' or 'bayesian'.
733        lactation_length: Number of days to integrate (default 305).
734        **kwargs: Additional arguments forwarded to `fit_lactation_curve`.
735
736    Returns:
737        float: Cumulative milk yield over the specified horizon.
738    """
739    y = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, **kwargs)
740    return float(np.trapezoid(y[:lactation_length], dx=1))

Compute cumulative milk yield numerically over a given horizon.

Adds up the fitted milk yield for the first lactation_length days of the predicted yield curve.

Arguments:

dim: DIM values.
milkrecordings: Milk recordings (kg).
model: Model name.
fitting: 'frequentist' or 'bayesian'.
lactation_length: Number of days to integrate (default 305).
**kwargs: Additional arguments forwarded to fit_lactation_curve.

Returns:

float: Cumulative milk yield over the specified horizon.

def numeric_peak_yield( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', custom_priors=None, milk_unit='kg', continent='USA') -> float: View Source

743def numeric_peak_yield(
744    dim,
745    milkrecordings,
746    model,
747    fitting="frequentist",
748    key=None,
749    parity=3,
750    breed="H",
751    custom_priors=None,
752    milk_unit="kg",
753    continent="USA",
754) -> float:
755    """Compute peak yield numerically from the fitted curve.
756
757    Args:
758        dim: DIM values.
759        milkrecordings: Milk recordings (kg).
760        model: Model name.
761        fitting: 'frequentist' or 'bayesian'.
762        key: API key for MilkBot (Bayesian).
763        parity: Parity for Bayesian fitting.
764        breed: Breed for Bayesian fitting ('H' or 'J').
765        continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').
766
767    Returns:
768        float: Maximum predicted milk yield.
769    """
770    # Fit the curve to get predicted milk yields
771    yields = fit_lactation_curve(
772        dim,
773        milkrecordings,
774        model,
775        fitting=fitting,
776        key=key,
777        parity=parity,
778        breed=breed,
779        custom_priors=custom_priors,
780        milk_unit=milk_unit,
781        continent=continent,
782    )
783    # Find the peak yield
784    peak_yield = np.max(yields)
785    return peak_yield

Compute peak yield numerically from the fitted curve.

Arguments:

dim: DIM values.
milkrecordings: Milk recordings (kg).
model: Model name.
fitting: 'frequentist' or 'bayesian'.
key: API key for MilkBot (Bayesian).
parity: Parity for Bayesian fitting.
breed: Breed for Bayesian fitting ('H' or 'J').
continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').

Returns:

float: Maximum predicted milk yield.

def numeric_time_to_peak( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', custom_priors=None, milk_unit='kg', continent='USA') -> int: View Source

665def numeric_time_to_peak(
666    dim,
667    milkrecordings,
668    model,
669    fitting="frequentist",
670    key=None,
671    parity=3,
672    breed="H",
673    custom_priors=None,
674    milk_unit="kg",
675    continent="USA",
676) -> int:
677    """Compute time to peak using a numeric approach.
678
679    Fits the curve (frequentist or Bayesian), evaluates the predicted yields,
680    and returns the DIM corresponding to the maximum predicted yield.
681
682    Args:
683        dim: DIM values.
684        milkrecordings: Milk recordings (kg).
685        model: Model name.
686        fitting: 'frequentist' or 'bayesian'.
687        key: API key for MilkBot (Bayesian).
688        parity: Parity for Bayesian fitting.
689        breed: Breed for Bayesian fitting ('H' or 'J').
690        custom_priors: provide your own priors for Bayesian fitting.
691            provide as dictionary using build_prior() function to set
692            the priors in the right format. Alternative use priors from
693            the literature provided by the string command 'CHEN'
694        milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
695        continent: Prior source for Bayesian ('USA', 'EU').
696
697
698    Returns:
699        int: DIM at which the curve attains its maximum (1-indexed).
700    """
701    # Fit the curve to get predicted milk yields
702    yields = fit_lactation_curve(
703        dim,
704        milkrecordings,
705        model,
706        fitting=fitting,
707        key=key,
708        parity=parity,
709        breed=breed,
710        custom_priors=custom_priors,
711        milk_unit=milk_unit,
712        continent=continent,
713    )
714    # Find the index of the peak yield
715    peak_idx = np.argmax(yields)
716    # Return the corresponding DIM
717    return int(peak_idx + 1)  # +1 because DIM starts at 1, not 0

Compute time to peak using a numeric approach.

Fits the curve (frequentist or Bayesian), evaluates the predicted yields, and returns the DIM corresponding to the maximum predicted yield.

Arguments:

dim: DIM values.
milkrecordings: Milk recordings (kg).
model: Model name.
fitting: 'frequentist' or 'bayesian'.
key: API key for MilkBot (Bayesian).
parity: Parity for Bayesian fitting.
breed: Breed for Bayesian fitting ('H' or 'J').
custom_priors: provide your own priors for Bayesian fitting. provide as dictionary using build_prior() function to set the priors in the right format. Alternative use priors from the literature provided by the string command 'CHEN'
milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
continent: Prior source for Bayesian ('USA', 'EU').

Returns:

int: DIM at which the curve attains its maximum (1-indexed).

def persistency_fitted_curve( dim, milkrecordings, model, fitting='frequentist', key=None, parity=3, breed='H', custom_priors=None, milk_unit='kg', continent='USA', lactation_length=305) -> float: View Source

813def persistency_fitted_curve(
814    dim,
815    milkrecordings,
816    model,
817    fitting="frequentist",
818    key=None,
819    parity=3,
820    breed="H",
821    custom_priors=None,
822    milk_unit="kg",
823    continent="USA",
824    lactation_length=305,
825) -> float:
826    """Persistency as the average slope after peak until end of lactation (numeric).
827
828    This is the default approach because symbolic derivation is not feasible for
829    all models. It computes:
830        `(yield_at_end - yield_at_peak) / (lactation_length - time_to_peak)`
831
832    Args:
833        dim: DIM values.
834        milkrecordings: Milk recordings (kg).
835        model: Model name. Options include 'milkbot' (Bayesian or frequentist),
836            'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
837        fitting: 'frequentist' or 'bayesian'.
838        key: API key (only for Bayesian fitting).
839        parity: Parity of the cow; values above 3 treated as 3 (Bayesian).
840        breed: 'H' or 'J' (Bayesian).
841        custom_priors: provide your own priors for Bayesian fitting.
842            provide as dictionary using build_prior() function to
843            create the priors in the right format. Alternative use
844            priors from the literature provided by the string
845            command 'CHEN'
846        milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
847        continent: 'USA', 'EU', or 'CHEN' (Bayesian).
848        lactation_length (int | str): 305 (default), 'max' (use max DIM), or integer.
849
850    Returns:
851        float: Average slope after the peak until end of lactation.
852    """
853    # calculate time to peak
854    t_peak = numeric_time_to_peak(
855        dim,
856        milkrecordings,
857        model,
858        fitting=fitting,
859        key=key,
860        parity=parity,
861        breed=breed,
862        custom_priors=custom_priors,
863        milk_unit=milk_unit,
864        continent=continent,
865    )
866
867    # determine lactation length
868    if lactation_length == "max":
869        lactation_length = max(dim)
870    elif isinstance(lactation_length, int):
871        lactation_length = lactation_length
872    else:
873        lactation_length = 305
874
875    # calculate milk yield at peak
876    yields = fit_lactation_curve(
877        dim,
878        milkrecordings,
879        model,
880        fitting=fitting,
881        key=key,
882        parity=parity,
883        breed=breed,
884        custom_priors=custom_priors,
885        milk_unit=milk_unit,
886        continent=continent,
887    )
888    peak_yield = yields[int(t_peak) - 1]  # -1 to prevent index error
889    # calculate milk yield at end of lactation
890    end_yield = yields[int(lactation_length) - 1]  # -1 to prevent index error
891
892    # calculate persistency
893    persistency = (end_yield - peak_yield) / (lactation_length - t_peak)
894    return persistency

Persistency as the average slope after peak until end of lactation (numeric).

This is the default approach because symbolic derivation is not feasible for all models. It computes: (yield_at_end - yield_at_peak) / (lactation_length - time_to_peak)

Arguments:

dim: DIM values.
milkrecordings: Milk recordings (kg).
model: Model name. Options include 'milkbot' (Bayesian or frequentist), 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
fitting: 'frequentist' or 'bayesian'.
key: API key (only for Bayesian fitting).
parity: Parity of the cow; values above 3 treated as 3 (Bayesian).
breed: 'H' or 'J' (Bayesian).
custom_priors: provide your own priors for Bayesian fitting. provide as dictionary using build_prior() function to create the priors in the right format. Alternative use priors from the literature provided by the string command 'CHEN'
milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
continent: 'USA', 'EU', or 'CHEN' (Bayesian).
lactation_length (int | str): 305 (default), 'max' (use max DIM), or integer.

Returns:

float: Average slope after the peak until end of lactation.

def persistency_milkbot(d) -> float: View Source

801def persistency_milkbot(d) -> float:
802    """Persistency from the MilkBot model (Ehrlich, 2013): `Persistency = 0.693 / d`.
803
804    Args:
805        d (float): Parameter `d` of the MilkBot model.
806
807    Returns:
808        float: Persistency value from the MilkBot formula.
809    """
810    return 0.693 / d

Persistency from the MilkBot model (Ehrlich, 2013): Persistency = 0.693 / d.

Arguments:

d (float): Parameter d of the MilkBot model.

Returns:

float: Persistency value from the MilkBot formula.

def persistency_wood(b, c) -> float: View Source

788def persistency_wood(b, c) -> float:
789    """Persistency from Wood et al. (1984): `Persistency = -(b + 1) * ln(c)`.
790
791    Args:
792        b (float): Parameter `b` of the Wood model.
793        c (float): Parameter `c` of the Wood model.
794
795    Returns:
796        float: Persistency value from the Wood formula.
797    """
798    return float(-(b + 1) * ln(c))

Persistency from Wood et al. (1984): Persistency = -(b + 1) * ln(c).

Arguments:

b (float): Parameter b of the Wood model.
c (float): Parameter c of the Wood model.

Returns:

float: Persistency value from the Wood formula.

def pivot_milk_recordings_to_matrix(df: pandas.core.frame.DataFrame) -> numpy.ndarray: View Source

188def pivot_milk_recordings_to_matrix(df: pd.DataFrame) -> np.ndarray:
189    """Convert long-format recordings to a fixed 305-day matrix.
190
191    Rows represent lactations (``TestId``) and columns represent days in milk
192    from 1 through 305. Missing observations are kept as ``NaN``.
193
194    Args:
195        df: Dataframe with ``TestId``, ``DaysInMilk``, and ``MilkingYield``.
196
197    Returns:
198        A NumPy array of shape ``(n_lactations, 305)``.
199    """
200    # ensure sorting
201    df = df.sort_values(["TestId", "DaysInMilk"])
202
203    # pivot to wide format
204    milk_recordings_pivot = df.pivot_table(
205        index="TestId", columns="DaysInMilk", values="MilkingYield"
206    )
207
208    # enforce fixed 305-day grid alignment used by best-prediction
209    milk_recordings_pivot = milk_recordings_pivot.reindex(columns=range(1, 306))
210
211    # convert to numpy matrix
212    Y = milk_recordings_pivot.to_numpy()
213    return Y

Convert long-format recordings to a fixed 305-day matrix.

Rows represent lactations (TestId) and columns represent days in milk from 1 through 305. Missing observations are kept as NaN.

Arguments:

df: Dataframe with TestId, DaysInMilk, and MilkingYield.

Returns:

A NumPy array of shape (n_lactations, 305).

def preprocess_measured_data( lactation: pandas.core.frame.DataFrame, standard_lc: numpy.ndarray) -> pandas.core.series.Series: View Source

398def preprocess_measured_data(lactation: pd.DataFrame, standard_lc: np.ndarray) -> pd.Series:
399    """Build a 305-day deviation vector for a single lactation.
400
401    For observed days, this computes ``MilkingYield - standard_lc[day]``.
402    The result is reindexed to days 1..305 with unobserved days filled as zero.
403
404    Args:
405        lactation: Single-lactation dataframe with ``DaysInMilk`` and
406            ``MilkingYield``.
407        standard_lc: Expected daily milk yield profile.
408
409    Returns:
410        A Series indexed by day 1..305 containing milk-yield deviations.
411    """
412
413    # calculate the difference between the expected (population mean) and measured milk yield
414
415    # extract the expected milk yields for the measured DaysInMilk in the df
416    day_idx = lactation["DaysInMilk"].to_numpy(dtype=int) - 1
417    expected = np.asarray(standard_lc, dtype=float)[day_idx]
418
419    # Subtract
420    lactation["MilkDifference"] = lactation["MilkingYield"].to_numpy(dtype=float) - expected
421
422    # Create a Series of length 305 with missing values = 0
423    milk_difference = cast(pd.Series, lactation.set_index("DaysInMilk")["MilkDifference"])
424    corrected_series = milk_difference.reindex(range(1, 306), fill_value=0)
425
426    return corrected_series

Build a 305-day deviation vector for a single lactation.

For observed days, this computes MilkingYield - standard_lc[day]. The result is reindexed to days 1..305 with unobserved days filled as zero.

Arguments:

lactation: Single-lactation dataframe with DaysInMilk and MilkingYield.
standard_lc: Expected daily milk yield profile.

Returns:

A Series indexed by day 1..305 containing milk-yield deviations.

def test_interval_method( df: pandas.core.frame.DataFrame, days_in_milk_col: str | None = None, milking_yield_col: str | None = None, test_id_col: str | None = None, default_test_id: int = 0, max_dim: int = 305) -> pandas.core.frame.DataFrame: View Source

 88def test_interval_method(
 89    df: pd.DataFrame,
 90    days_in_milk_col: str | None = None,
 91    milking_yield_col: str | None = None,
 92    test_id_col: str | None = None,
 93    default_test_id: int = 0,
 94    max_dim: int = 305,
 95) -> pd.DataFrame:
 96    """Compute total lactation milk yield using the ICAR Test Interval Method.
 97
 98    The method applies:
 99    - First test day milk yield from calving to the first test day,
100    - Trapezoidal integration between consecutive test days,
101    - Last test day milk yield from the last test day to DIM = max_dim (default = 305).
102
103    Args:
104        df (pd.DataFrame): Input DataFrame with at least DaysInMilk and
105            MilkingYield columns, plus an optional TestId column. Column names
106            can be provided explicitly or matched via known aliases.
107        days_in_milk_col (str | None): Optional column name override for
108            DaysInMilk.
109        milking_yield_col (str | None): Optional column name override for
110            MilkingYield.
111        test_id_col (str | None): Optional column name override for TestId.
112        default_test_id (int): Value used to create a default TestId column if
113            one is missing.
114        max_dim (int): Lactation length used to calculate cumulative
115            production. The default is 305 days.
116            Records with DIM > max_dim are excluded.
117
118    Returns:
119        pd.DataFrame: Two-column DataFrame with
120            - "TestId": identifier per lactation,
121            - "LactationMilkYield": computed total milk yield over the
122              specified window.
123
124    Raises:
125        ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found.
126
127    Notes:
128        - Records with DIM > max_dim are dropped before computation.
129        - At least two data points per TestId are required for trapezoidal integration;
130          otherwise the lactation is skipped.
131    """
132
133    # Standardize columns and filter DIM <= max_dim
134    df = standardize_lactation_columns(
135        df,
136        days_in_milk_col=days_in_milk_col,
137        milking_yield_col=milking_yield_col,
138        test_id_col=test_id_col,
139        default_test_id=default_test_id,
140        max_dim=max_dim,
141    )
142
143    result = []
144
145    # Iterate over each lactation
146    for lactation in df["TestId"].unique():
147        lactation_df = pd.DataFrame(df[df["TestId"] == lactation])
148
149        # Sort by DaysInMilk ascending
150        lactation_df.sort_values(by="DaysInMilk", ascending=True, inplace=True)
151
152        if len(lactation_df) < 2:
153            print(f"Skipping TestId {lactation}: not enough data points for interpolation.")
154            continue
155
156        # Start and end points
157        start = lactation_df.iloc[0]
158        end = lactation_df.iloc[-1]
159
160        # Start contribution
161        MY0 = start["DaysInMilk"] * start["MilkingYield"]
162
163        # End contribution
164        MYend = (max_dim + 1 - end["DaysInMilk"]) * end["MilkingYield"]
165
166        # Intermediate trapezoidal contributions
167        lactation_df["width"] = lactation_df["DaysInMilk"].diff().shift(-1)
168        lactation_df["avg_yield"] = (
169            lactation_df["MilkingYield"] + lactation_df["MilkingYield"].shift(-1)
170        ) / 2
171        lactation_df["trapezoid_area"] = lactation_df["width"] * lactation_df["avg_yield"]
172
173        total_intermediate = lactation_df["trapezoid_area"].sum()
174
175        total_yield = MY0 + total_intermediate + MYend
176        result.append((lactation, total_yield))
177
178    return pd.DataFrame(result, columns=pd.Index(["TestId", "LactationMilkYield"]))

Compute total lactation milk yield using the ICAR Test Interval Method.

The method applies:

First test day milk yield from calving to the first test day,
Trapezoidal integration between consecutive test days,
Last test day milk yield from the last test day to DIM = max_dim (default = 305).

Arguments:

df (pd.DataFrame): Input DataFrame with at least DaysInMilk and MilkingYield columns, plus an optional TestId column. Column names can be provided explicitly or matched via known aliases.
days_in_milk_col (str | None): Optional column name override for DaysInMilk.
milking_yield_col (str | None): Optional column name override for MilkingYield.
test_id_col (str | None): Optional column name override for TestId.
default_test_id (int): Value used to create a default TestId column if one is missing.
max_dim (int): Lactation length used to calculate cumulative production. The default is 305 days. Records with DIM > max_dim are excluded.

Returns:

pd.DataFrame: Two-column DataFrame with - "TestId": identifier per lactation, - "LactationMilkYield": computed total milk yield over the specified window.

Raises:

ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found.

Notes:

Records with DIM > max_dim are dropped before computation.

At least two data points per TestId are required for trapezoidal integration; otherwise the lactation is skipped.