lactationcurve.characteristics
1from . import best_predict, lactation_curve_characteristics, method_test_interval 2from .best_predict import ( 3 best_predict_method, 4 best_predict_method_single_lac, 5 build_covariance_matrix, 6 center_lactation_data, 7 fit_autocorrelation_matrix, 8 fit_standard_lc, 9 pivot_milk_recordings_to_matrix, 10 preprocess_measured_data, 11) 12from .ISLC import ( 13 ISLC, 14 ISLC_method, 15 ISLC_original, 16 ISLC_original_method, 17 create_standard_lc_representation, 18 interpolation_standard_lc, 19 linear_interpd_all_to_grid, 20 linear_interpd_closest_to_grid, 21) 22from .lactation_curve_characteristics import ( 23 calculate_characteristic, 24 lactation_curve_characteristic_function, 25 numeric_cumulative_yield, 26 numeric_peak_yield, 27 numeric_time_to_peak, 28 persistency_fitted_curve, 29 persistency_milkbot, 30 persistency_wood, 31) 32from .method_test_interval import test_interval_method 33 34__all__ = [ 35 "ISLC", 36 "ISLC_method", 37 "ISLC_original", 38 "ISLC_original_method", 39 "best_predict", 40 "best_predict_method", 41 "best_predict_method_single_lac", 42 "build_covariance_matrix", 43 "calculate_characteristic", 44 "center_lactation_data", 45 "create_standard_lc_representation", 46 "fit_autocorrelation_matrix", 47 "fit_standard_lc", 48 "interpolation_standard_lc", 49 "lactation_curve_characteristics", 50 "lactation_curve_characteristic_function", 51 "linear_interpd_all_to_grid", 52 "linear_interpd_closest_to_grid", 53 "method_test_interval", 54 "numeric_cumulative_yield", 55 "numeric_peak_yield", 56 "numeric_time_to_peak", 57 "persistency_fitted_curve", 58 "persistency_milkbot", 59 "persistency_wood", 60 "pivot_milk_recordings_to_matrix", 61 "preprocess_measured_data", 62 "test_interval_method", 63]
220def ISLC( 221 df: pd.DataFrame, 222 days_in_milk_col: str | None = None, 223 milking_yield_col: str | None = None, 224 test_id_col: str | None = None, 225 default_test_id: int = 0, 226 standard_lc_305: pd.Series | npt.NDArray[np.float64] = STANDARD_CURVE, 227 correlation_matrix: pd.DataFrame = CORR_MATRIX, 228 std_per_grid_day: npt.NDArray[np.float64] = STDs, 229 max_dim: int | str = 305, 230 fit_standard_lc_from_data: bool = False, 231 reference_df: pd.DataFrame | None = None, 232) -> pd.DataFrame: 233 """Estimate cumulative lactation yield per TestId using ISLC. 234 235 This variant applies :func:`ISLC_method` to standardized lactation records. 236 By default it uses the package-provided standard curve, correlation matrix, 237 and per-grid day standard deviations. Optionally, these assets can be refit 238 from a reference dataset by providing a pandas dataframe at 'reference_df =' 239 when ``fit_standard_lc_from_data`` is True. 240 Alternative for customization is to set standard_lc_305, 241 correlation_matrix, and std_per_grid_day directly. 242 243 Args: 244 df: Input test-day records. 245 days_in_milk_col: Name of DIM column in ``df``. 246 milking_yield_col: Name of milk-yield column in ``df``. 247 test_id_col: Name of lactation identifier column in ``df``. 248 default_test_id: Fallback TestId value when ``test_id_col`` is absent. 249 standard_lc_305: Standard lactation curve values used by ISLC. 250 correlation_matrix: Correlation matrix aligned with ISLC grid. 251 std_per_grid_day: Standard deviations aligned with ISLC grid. 252 max_dim: Maximum DIM to include 253 (or ``"max"`` so highest test day DIM is considered the final lactation day). 254 fit_standard_lc_from_data: If True, fit representation from ``reference_df``. 255 reference_df: Reference records used when fitting curve and covariance. 256 257 Returns: 258 DataFrame with columns ``TestId`` and ``LactationMilkYield``. 259 260 Raises: 261 ValueError: If required columns are missing or fit mode has no reference data. 262 """ 263 # Standardize columns and filter DIM <= 305 264 df = standardize_lactation_columns( 265 df, 266 days_in_milk_col=days_in_milk_col, 267 milking_yield_col=milking_yield_col, 268 test_id_col=test_id_col, 269 default_test_id=default_test_id, 270 max_dim=max_dim, 271 ) 272 273 # Fit covariance/standard_lc if requested 274 if fit_standard_lc_from_data is True: 275 if reference_df is None: 276 raise ValueError("Provide reference_df to fit your own standard lactation curve.") 277 reference_df = standardize_lactation_columns( 278 reference_df, 279 days_in_milk_col=days_in_milk_col, 280 milking_yield_col=milking_yield_col, 281 test_id_col=test_id_col, 282 default_test_id=default_test_id, 283 max_dim=305, 284 ) 285 dim = reference_df["DaysInMilk"] 286 milk_yield = reference_df["MilkingYield"] 287 standard_full_lc_305 = fit_lactation_curve(dim, milkrecordings=milk_yield, model="wilmink") 288 # convert to pandas series 289 standard_full_lc_305 = pd.Series(standard_full_lc_305) 290 291 # fit matrix 292 ( 293 correlation_matrix, 294 std_per_grid_day, 295 standard_curve_fitted, 296 ) = create_standard_lc_representation( 297 df=reference_df, 298 standard_lactation_curve=pd.Series(standard_full_lc_305), 299 # reference_df is standardized above, so fixed names are required here 300 days_in_milk_col="DaysInMilk", 301 milking_yield_col="MilkingYield", 302 interpolation_method=interpolation_standard_lc, 303 small_grid=False, 304 ) 305 306 # Align fitted curve with default internal convention (day 0 equals day 1). 307 standard_lc_305 = np.insert( 308 np.asarray(standard_curve_fitted, dtype=float), 309 0, 310 float(standard_curve_fitted.iloc[0]), 311 ) 312 313 rows: list[dict[str, Any]] = [] 314 for test_id, group in df.groupby("TestId", sort=False): 315 if group.empty: 316 rows.append({"TestId": test_id, "LactationMilkYield": np.nan}) 317 continue 318 319 try: 320 cumulative_yield = ISLC_method( 321 df=group, 322 standard_lc=standard_lc_305, 323 correlation_matrix=correlation_matrix, 324 std_per_grid_day=std_per_grid_day, 325 days_in_milk_col="DaysInMilk", 326 milking_yield_col="MilkingYield", 327 ) 328 except ValueError: 329 cumulative_yield = np.nan 330 331 rows.append({"TestId": test_id, "LactationMilkYield": cumulative_yield}) 332 333 return pd.DataFrame(rows, columns=pd.Index(["TestId", "LactationMilkYield"]))
Estimate cumulative lactation yield per TestId using ISLC.
This variant applies ISLC_method() to standardized lactation records.
By default it uses the package-provided standard curve, correlation matrix,
and per-grid day standard deviations. Optionally, these assets can be refit
from a reference dataset by providing a pandas dataframe at 'reference_df ='
when fit_standard_lc_from_data is True.
Alternative for customization is to set standard_lc_305,
correlation_matrix, and std_per_grid_day directly.
Arguments:
- df: Input test-day records.
- days_in_milk_col: Name of DIM column in
df. - milking_yield_col: Name of milk-yield column in
df. - test_id_col: Name of lactation identifier column in
df. - default_test_id: Fallback TestId value when
test_id_colis absent. - standard_lc_305: Standard lactation curve values used by ISLC.
- correlation_matrix: Correlation matrix aligned with ISLC grid.
- std_per_grid_day: Standard deviations aligned with ISLC grid.
- max_dim: Maximum DIM to include
(or
"max"so highest test day DIM is considered the final lactation day). - fit_standard_lc_from_data: If True, fit representation from
reference_df. - reference_df: Reference records used when fitting curve and covariance.
Returns:
DataFrame with columns
TestIdandLactationMilkYield.
Raises:
- ValueError: If required columns are missing or fit mode has no reference data.
336def ISLC_method( 337 df: pd.DataFrame, 338 standard_lc: pd.Series | npt.NDArray[np.float64], 339 correlation_matrix: pd.DataFrame, 340 std_per_grid_day: npt.NDArray[np.float64], 341 days_in_milk_col: str = "DaysInMilk", 342 milking_yield_col: str = "MilkingYield", 343) -> float: 344 """Estimate total lactation yield by predicting missing grid days, then integrating. 345 346 The method fills missing grid-day yields with the ICAR-style relation: 347 348 yp = gp + b* (yi - gi) 349 350 where for each missing day p, the nearest measured day i is used and 351 b* = corr(i,p) * std(p) / std(i). The resulting complete grid profile is 352 integrated with the test-interval style weighting used elsewhere in this 353 package. 354 355 Notes: 356 The integration is performed over the days present after merging 357 measured and predicted points. If the input contains DIM beyond 305, 358 total yield can reflect that extended horizon. 359 """ 360 # add zero and 305 to the grid 361 grid = [0] + GRID_DAYS + [305] 362 dim_col = days_in_milk_col 363 yield_col = milking_yield_col 364 365 standard_lc_series = _curve_to_series(standard_lc) 366 367 # Extract the measured days 368 measured_days = df[dim_col].to_numpy() 369 if measured_days.size == 0: 370 raise ValueError("Input df must contain at least one measured day.") 371 372 # interpolate the measured days to the grid using the standard lactation curve 373 interpolated_df = interpolation_standard_lc( 374 group=df, 375 days_in_milk_col=dim_col, 376 milking_yield_col=yield_col, 377 standard_lc=standard_lc_series, 378 ) 379 380 # drop unnecessary columns 381 interpolated_df = interpolated_df.drop(columns=[dim_col, yield_col]) 382 383 # rename grid colum 384 interpolated_df = interpolated_df.rename( 385 columns={"GridDay": dim_col, "MilkYieldInterp": yield_col} 386 )[[dim_col, yield_col]] 387 388 measured_subset = cast(pd.DataFrame, df.loc[:, [dim_col, yield_col]]) 389 interpolated_subset = cast(pd.DataFrame, interpolated_df.loc[:, [dim_col, yield_col]]) 390 391 # join the interpolated values with the original measurements, keeping all measurements 392 merged_df = cast( 393 pd.DataFrame, 394 pd.merge( 395 measured_subset, 396 interpolated_subset, 397 on=dim_col, 398 how="outer", 399 suffixes=("_meas", "_interp"), 400 ), 401 ) 402 meas_series = cast(pd.Series, merged_df[f"{yield_col}_meas"]) 403 interp_series = cast(pd.Series, merged_df[f"{yield_col}_interp"]) 404 merged_df[yield_col] = cast(pd.Series, meas_series.combine_first(interp_series)) 405 df = cast( 406 pd.DataFrame, 407 merged_df.loc[:, [dim_col, yield_col]].sort_values(by=dim_col), 408 ) 409 410 # days missing from measurement → need prediction 411 days_to_predict = [day for day in grid if day not in df[dim_col].values] 412 413 predicted_rows: list[dict[str, float | int]] = [] 414 415 for day in days_to_predict: 416 # identify closest measured day 417 dim_series = cast(pd.Series, interpolated_df[dim_col]) 418 grid_vals = np.asarray(dim_series.to_numpy(dtype=float), dtype=float) 419 closest_idx = int(np.argmin(np.abs(grid_vals - float(day)))) 420 closest_day = int(dim_series.iloc[closest_idx]) 421 422 # extract mean milk yield from the standard lactation curve for the day to predict 423 expected_yield_grid_day = _curve_value(standard_lc_series, day) 424 expected_yield_measured_day = _curve_value(standard_lc_series, closest_day) 425 426 # calculate difference between measured and expected yield at closest measured day 427 diff = ( 428 interpolated_df.loc[interpolated_df[dim_col] == closest_day, yield_col].iloc[0] 429 - expected_yield_measured_day 430 ) 431 432 # calculate the correlation-based correction factor b* 433 # b* = corr(i,p) * std(p) / std(i) 434 pi = list(grid).index(closest_day) 435 qi = list(grid).index(day) 436 bpj = correlation_matrix.iloc[pi, qi] 437 std_q = std_per_grid_day[qi] 438 std_p = std_per_grid_day[pi] 439 b_star = 0.0 if std_p == 0 else float(bpj * std_q / std_p) 440 441 # predict the yield for the missing day 442 predicted_yield = expected_yield_grid_day + b_star * diff 443 predicted_rows.append({dim_col: int(day), yield_col: float(predicted_yield)}) 444 445 if predicted_rows: 446 df = pd.concat([df, pd.DataFrame(predicted_rows)], ignore_index=True) 447 448 # apply the test interval method to calculate total milk yield over 305 days 449 # sort the dataframe by days in milk and keep one value per day 450 df = df.drop_duplicates(subset=[dim_col], keep="last") 451 df = df.sort_values(by=dim_col) 452 # Trapezoidal contributions 453 df["width"] = df[dim_col].diff().shift(-1) 454 df["avg_yield"] = (df[yield_col] + df[yield_col].shift(-1)) / 2 455 df["trapezoid_area"] = df["width"] * df["avg_yield"] 456 457 trapezoid_values = np.asarray(pd.to_numeric(df["trapezoid_area"], errors="coerce"), dtype=float) 458 total_yield = float(np.nansum(trapezoid_values)) 459 460 return total_yield
Estimate total lactation yield by predicting missing grid days, then integrating.
The method fills missing grid-day yields with the ICAR-style relation:
yp = gp + b* (yi - gi)
where for each missing day p, the nearest measured day i is used and
b* = corr(i,p) * std(p) / std(i). The resulting complete grid profile is
integrated with the test-interval style weighting used elsewhere in this
package.
Notes:
The integration is performed over the days present after merging measured and predicted points. If the input contains DIM beyond 305, total yield can reflect that extended horizon.
576def ISLC_original( 577 df: pd.DataFrame, 578 days_in_milk_col: str | None = None, 579 milking_yield_col: str | None = None, 580 test_id_col: str | None = None, 581 default_test_id: int = 0, 582 standard_lc_305: pd.Series | npt.NDArray[np.float64] = STANDARD_CURVE, 583 correlation_matrix: pd.DataFrame = CORR_MATRIX, 584 std_per_grid_day: npt.NDArray[np.float64] = STDs, 585 fit_standard_lc_from_data: bool = False, 586 reference_df: pd.DataFrame | None = None, 587) -> pd.DataFrame: 588 """Compute estimated 305-day yields using the original grid-based ISLC variant 589 from the original paper by Dr. Wilmink in 1987. 590 591 The function groups standardized input by ``TestId``, interpolates to the 592 fixed GRID_DAYS grid, and applies :func:`ISLC_original_method` per lactation. 593 594 Args: 595 df: Input records with DIM and milk yield columns. 596 days_in_milk_col: Name of DIM column in ``df``. 597 milking_yield_col: Name of milk-yield column in ``df``. 598 test_id_col: Name of lactation identifier column in ``df``. 599 default_test_id: Fallback TestId when ``test_id_col`` is absent. 600 standard_lc_305: Standard lactation curve used for interpolation. 601 correlation_matrix: Correlation matrix over GRID_DAYS. 602 std_per_grid_day: Standard deviations per GRID_DAYS element. 603 fit_standard_lc_from_data: If True, refit standard curve and covariance 604 representation from ``reference_df`` before prediction. 605 reference_df: Reference population used when 606 ``fit_standard_lc_from_data=True``. The reference data is 607 standardized to ``DaysInMilk``/``MilkingYield`` and fit using 608 ``small_grid=True`` so correlation and standard-deviation assets are 609 aligned to ``GRID_DAYS``. 610 611 Returns: 612 DataFrame with columns ``LactationMilkYield`` and ``TestId``. 613 614 Raises: 615 ValueError: If required columns (DaysInMilk or MilkingYield) cannot be 616 found, or if ``fit_standard_lc_from_data=True`` and 617 ``reference_df`` is not provided. 618 619 Notes: 620 Records with DIM > 305 are dropped before interpolation and scoring. 621 """ 622 # Standardize columns and filter DIM <= 305 623 df = standardize_lactation_columns( 624 df, 625 days_in_milk_col=days_in_milk_col, 626 milking_yield_col=milking_yield_col, 627 test_id_col=test_id_col, 628 default_test_id=default_test_id, 629 max_dim=305, 630 ) 631 632 # Fit covariance/standard_lc if requested 633 if fit_standard_lc_from_data is True: 634 if reference_df is None: 635 raise ValueError("Provide reference_df to fit your own standard lactation curve.") 636 reference_df = standardize_lactation_columns( 637 reference_df, 638 days_in_milk_col=days_in_milk_col, 639 milking_yield_col=milking_yield_col, 640 test_id_col=test_id_col, 641 default_test_id=default_test_id, 642 max_dim=305, 643 ) 644 dim = reference_df["DaysInMilk"] 645 milk_yield = reference_df["MilkingYield"] 646 standard_full_lc_305 = fit_lactation_curve(dim, milkrecordings=milk_yield, model="wilmink") 647 standard_full_lc_305 = pd.Series(standard_full_lc_305) 648 649 # fit matrix 650 ( 651 correlation_matrix, 652 std_per_grid_day, 653 standard_curve_fitted, 654 ) = create_standard_lc_representation( 655 df=reference_df, 656 standard_lactation_curve=pd.Series(standard_full_lc_305), 657 # reference_df is standardized above, so fixed names are required here 658 days_in_milk_col="DaysInMilk", 659 milking_yield_col="MilkingYield", 660 interpolation_method=interpolation_standard_lc, 661 small_grid=True, 662 ) 663 664 # Align fitted curve with default internal convention (day 0 equals day 1). 665 standard_lc_305 = np.insert( 666 np.asarray(standard_curve_fitted, dtype=float), 667 0, 668 float(standard_curve_fitted.iloc[0]), 669 ) 670 rows: list[dict[str, Any]] = [] 671 for test_id, group in df.groupby("TestId", sort=False): 672 lactation = interpolation_standard_lc( 673 group, 674 days_in_milk_col="DaysInMilk", 675 milking_yield_col="MilkingYield", 676 standard_lc=standard_lc_305, 677 ) 678 if lactation.empty: 679 rows.append({"TestId": test_id, "305_milk_yield": np.nan}) 680 continue 681 682 cumulative_yield = ISLC_original_method( 683 df=lactation, 684 standard_lc=standard_lc_305, 685 correlation_matrix=correlation_matrix, 686 std_per_grid_day=std_per_grid_day, 687 ) 688 rows.append({"TestId": test_id, "LactationMilkYield": cumulative_yield}) 689 690 return pd.DataFrame(rows, columns=pd.Index(["TestId", "LactationMilkYield"]))
Compute estimated 305-day yields using the original grid-based ISLC variant from the original paper by Dr. Wilmink in 1987.
The function groups standardized input by TestId, interpolates to the
fixed GRID_DAYS grid, and applies ISLC_original_method() per lactation.
Arguments:
- df: Input records with DIM and milk yield columns.
- days_in_milk_col: Name of DIM column in
df. - milking_yield_col: Name of milk-yield column in
df. - test_id_col: Name of lactation identifier column in
df. - default_test_id: Fallback TestId when
test_id_colis absent. - standard_lc_305: Standard lactation curve used for interpolation.
- correlation_matrix: Correlation matrix over GRID_DAYS.
- std_per_grid_day: Standard deviations per GRID_DAYS element.
- fit_standard_lc_from_data: If True, refit standard curve and covariance
representation from
reference_dfbefore prediction. - reference_df: Reference population used when
fit_standard_lc_from_data=True. The reference data is standardized toDaysInMilk/MilkingYieldand fit usingsmall_grid=Trueso correlation and standard-deviation assets are aligned toGRID_DAYS.
Returns:
DataFrame with columns
LactationMilkYieldandTestId.
Raises:
- ValueError: If required columns (DaysInMilk or MilkingYield) cannot be
found, or if
fit_standard_lc_from_data=Trueandreference_dfis not provided.
Notes:
Records with DIM > 305 are dropped before interpolation and scoring.
463def ISLC_original_method( 464 df: pd.DataFrame, 465 standard_lc: pd.Series | npt.NDArray[np.float64], 466 correlation_matrix: pd.DataFrame, 467 std_per_grid_day: npt.NDArray[np.float64], 468) -> float: 469 """Estimate 305-day milk yield for a single interpolated lactation 470 from the original paper by Dr. Wilmink in 1987. 471 472 This function computes an estimated 305-day yield by combining three 473 components: known production from interpolated measurements, the 474 population mean for missing grid days (from ``standard_lc``), and a 475 correlation-based deviation correction derived from ``correlation_matrix``. 476 477 Args: 478 df: DataFrame containing at least the columns ``GridDay`` and 479 ``MilkYieldInterp`` (values already interpolated onto the grid). 480 standard_lc: Standard lactation curve values indexed by grid day. 481 correlation_matrix: Correlation coefficients between grid-day yields 482 (matrix-like, rows/columns ordered as the grid). 483 std_per_grid_day: 1-D array of standard deviations for each grid day. 484 485 Returns: 486 A float with the estimated 305-day milk yield. 487 488 Raises: 489 ValueError: if any measured day in ``df`` is not present in the 490 expected global ``grid``. 491 492 Notes: 493 - The routine assumes fixed recording intervals of 20 days, except 494 that the final interval (if day 290 is present) is 25 days. 495 - The implementation expects a global ``grid`` variable to be 496 defined and aligned with ``standard_lc`` and 497 ``std_per_grid_day``. 498 - The amount of estimated milk yields depends on the 499 availability of measured milk yields. 500 - Lactation yield is: 501 sum(known measurements (p) + estimated measurements (q)). 502 - In the implementation according to the CRV E2 document, 503 there should also be a correction factor for the difference between 504 the expected and actual complete lactation yield of the previous lactation. 505 However, this correction factor is not included in the current implementation 506 as it requires additional data (previous lactation yield) that is not always available. 507 """ 508 grid = GRID_DAYS 509 standard_lc_series = _curve_to_series(standard_lc) 510 511 # --- Validate that all measurements lie on the expected grid ------- 512 measured_days = df["GridDay"].values 513 if any(day not in grid for day in measured_days): 514 raise ValueError("At least one DIM in the input df is not part of the grid.") 515 516 # --- Prepare basic components ------------------------------------- 517 df = df.sort_values("GridDay") 518 519 # days missing from measurement → need prediction 520 days_to_predict = [day for day in grid if day not in measured_days] 521 pred_idx = [list(grid).index(d) for d in days_to_predict] 522 523 # mean expected milk yield from standard curve 524 mean_lc = pd.Series([_curve_value(standard_lc_series, g) for g in grid], index=grid) 525 526 # --- Compute known production (sum of actual measurements) --------- 527 if 290 in measured_days: 528 # special last interval: 25 days 529 known_prod = df["MilkYieldInterp"][:-1].sum() * 20 + df["MilkYieldInterp"].iloc[-1] * 25 530 else: 531 known_prod = df["MilkYieldInterp"].sum() * 20 532 533 # --- Compute population mean for missing days ---------------------- 534 pop_mean_missing = mean_lc.loc[days_to_predict] 535 536 if 290 in pop_mean_missing.index: 537 population_mean = pop_mean_missing.iloc[:-1].sum() * 20 + pop_mean_missing.iloc[-1] * 25 538 else: 539 population_mean = pop_mean_missing.sum() * 20 540 541 # --- Compute correlation-based correction -------------------------- 542 days_to_predict_np = np.array(days_to_predict) 543 measured_days_np = np.array(measured_days) 544 545 # for each missing day: find closest measured day 546 closest_measured = [ 547 measured_days_np[np.argmin(np.abs(measured_days_np - g))] for g in days_to_predict_np 548 ] 549 closest_idx = [list(grid).index(d) for d in closest_measured] 550 551 b_star_list = [] 552 for pi, qi in zip(closest_idx, pred_idx): 553 bpj = correlation_matrix.iloc[pi, qi] 554 stdj = std_per_grid_day[qi] 555 stdp = std_per_grid_day[pi] 556 b_star_list.append(bpj * stdj / stdp) 557 558 # scale correction by interval lengths 559 if 290 in measured_days: 560 # last interval belongs to prediction group 561 correction_weighted = sum(b_star_list[:-1]) * 20 562 else: 563 correction_weighted = sum(b_star_list[:-1]) * 20 + b_star_list[-1] * 25 564 565 last_day = df["GridDay"].iloc[-1] 566 last_yield = df["MilkYieldInterp"].iloc[-1] 567 mean_last = mean_lc.loc[last_day] 568 569 correction = correction_weighted * (last_yield - mean_last) 570 571 # --- Final sum ----------------------------------------------------- 572 final_milk_yield = known_prod + population_mean + correction 573 return float(final_milk_yield)
Estimate 305-day milk yield for a single interpolated lactation from the original paper by Dr. Wilmink in 1987.
This function computes an estimated 305-day yield by combining three
components: known production from interpolated measurements, the
population mean for missing grid days (from standard_lc), and a
correlation-based deviation correction derived from correlation_matrix.
Arguments:
- df: DataFrame containing at least the columns
GridDayandMilkYieldInterp(values already interpolated onto the grid). - standard_lc: Standard lactation curve values indexed by grid day.
- correlation_matrix: Correlation coefficients between grid-day yields (matrix-like, rows/columns ordered as the grid).
- std_per_grid_day: 1-D array of standard deviations for each grid day.
Returns:
A float with the estimated 305-day milk yield.
Raises:
- ValueError: if any measured day in
dfis not present in the expected globalgrid.
Notes:
- The routine assumes fixed recording intervals of 20 days, except that the final interval (if day 290 is present) is 25 days.
- The implementation expects a global
gridvariable to be defined and aligned withstandard_lcandstd_per_grid_day.- The amount of estimated milk yields depends on the availability of measured milk yields.
- Lactation yield is: sum(known measurements (p) + estimated measurements (q)).
- In the implementation according to the CRV E2 document, there should also be a correction factor for the difference between the expected and actual complete lactation yield of the previous lactation. However, this correction factor is not included in the current implementation as it requires additional data (previous lactation yield) that is not always available.
494def best_predict_method( 495 df: pd.DataFrame, 496 standard_lc: np.ndarray = STANDARD_CURVE, 497 days_in_milk_col: str | None = None, 498 milking_yield_col: str | None = None, 499 test_id_col: str | None = None, 500 default_test_id: int = 0, 501 covariance_matrix: np.ndarray | None = COV_MATRIX, 502 fit_standard_lc_from_data: bool = False, 503 reference_df: pd.DataFrame | None = None, 504) -> pd.DataFrame: 505 """Apply best prediction to one or more lactations. 506 507 By default this function uses the package-provided standard curve and covariance matrix. 508 But it is also possible to provide your own standard curve and covariance matrix, 509 for example when you want to fit these ingredients from your own reference population. 510 This can be done in two ways: either by fitting the covariance matrix and standard curve 511 directly from a reference dataset by providing a pandas dataframe at 'reference_df =' 512 when ``fit_standard_lc_from_data`` is True. 513 Alternative for customization is to set standard_lc_305 and covariance_matrix 514 directly in the function call. 515 516 Args: 517 df: Input observations. If ``TestId`` is missing, all rows are treated 518 as one lactation. 519 standard_lc: Expected daily milk yield lactation curve on days 1..305. 520 If not provided, the package's default curve is used. 521 Or fit your own standard curve from a reference dataset by 522 providing a pandas dataframe at 'reference_df =' 523 when ``fit_standard_lc_from_data`` is True. 524 days_in_milk_col: Optional input column name for days in milk. If 525 provided, it is mapped to ``DaysInMilk``. 526 milking_yield_col: Optional input column name for milk yield. If 527 provided, it is mapped to ``MilkingYield``. 528 test_id_col: Optional input column name for lactation/test identifier. 529 If provided, it is mapped to ``TestId``. 530 default_test_id: Fallback test id used when no test-id column is 531 available. 532 covariance_matrix: Optional prefit covariance matrix. If omitted, 533 the default matrix is used or 534 ``reference_df`` can be used to fit one for your own data. 535 fit_standard_lc_from_data: Whether to fit covariance information from 536 ``reference_df`` instead of using a provided covariance matrix. 537 reference_df: Reference dataframe used when ``covariance_matrix`` and 538 ``standard_lc`` are not provided and ``fit_standard_lc_from_data`` 539 is True. 540 541 Returns: 542 Dataframe with columns ``TestId`` and ``LactationMilkYield``. 543 544 Raises: 545 ValueError: If neither ``covariance_matrix`` nor ``reference_df`` is 546 provided. 547 """ 548 # Standardize columns and filter DIM <= 305 549 df = standardize_lactation_columns( 550 df, 551 days_in_milk_col=days_in_milk_col, 552 milking_yield_col=milking_yield_col, 553 test_id_col=test_id_col, 554 default_test_id=default_test_id, 555 max_dim=305, 556 ) 557 558 # Fit covariance if not provided 559 if fit_standard_lc_from_data: 560 if reference_df is None: 561 raise ValueError("Provide reference_df to fit your own standard lactation curve.") 562 reference_df = standardize_lactation_columns( 563 reference_df, 564 days_in_milk_col=days_in_milk_col, 565 milking_yield_col=milking_yield_col, 566 test_id_col=test_id_col, 567 default_test_id=default_test_id, 568 max_dim=305, 569 ) 570 covariance_matrix = cast( 571 np.ndarray, fit_autocorrelation_matrix(reference_df, standard_lc)["B_hat"] 572 ) 573 574 covariance_matrix_array = cast(np.ndarray, covariance_matrix) 575 576 df = df.copy() 577 578 results = [] 579 580 for test_id, lactation in df.groupby("TestId"): 581 pred = best_predict_method_single_lac( 582 lactation, 583 standard_lc, 584 covariance_matrix_array, 585 ) 586 results.append({"TestId": test_id, "LactationMilkYield": pred}) 587 588 return pd.DataFrame(results)
Apply best prediction to one or more lactations.
By default this function uses the package-provided standard curve and covariance matrix.
But it is also possible to provide your own standard curve and covariance matrix,
for example when you want to fit these ingredients from your own reference population.
This can be done in two ways: either by fitting the covariance matrix and standard curve
directly from a reference dataset by providing a pandas dataframe at 'reference_df ='
when fit_standard_lc_from_data is True.
Alternative for customization is to set standard_lc_305 and covariance_matrix
directly in the function call.
Arguments:
- df: Input observations. If
TestIdis missing, all rows are treated as one lactation. - standard_lc: Expected daily milk yield lactation curve on days 1..305.
If not provided, the package's default curve is used.
Or fit your own standard curve from a reference dataset by
providing a pandas dataframe at 'reference_df ='
when
fit_standard_lc_from_datais True. - days_in_milk_col: Optional input column name for days in milk. If
provided, it is mapped to
DaysInMilk. - milking_yield_col: Optional input column name for milk yield. If
provided, it is mapped to
MilkingYield. - test_id_col: Optional input column name for lactation/test identifier.
If provided, it is mapped to
TestId. - default_test_id: Fallback test id used when no test-id column is available.
- covariance_matrix: Optional prefit covariance matrix. If omitted,
the default matrix is used or
reference_dfcan be used to fit one for your own data. - fit_standard_lc_from_data: Whether to fit covariance information from
reference_dfinstead of using a provided covariance matrix. - reference_df: Reference dataframe used when
covariance_matrixandstandard_lcare not provided andfit_standard_lc_from_datais True.
Returns:
Dataframe with columns
TestIdandLactationMilkYield.
Raises:
- ValueError: If neither
covariance_matrixnorreference_dfis provided.
424def best_predict_method_single_lac( 425 lactation: pd.DataFrame, 426 standard_lc: np.ndarray = STANDARD_CURVE, 427 covariance_matrix: np.ndarray = COV_MATRIX, 428) -> float: 429 """Predict 305-day cumulative yield for one lactation. 430 431 Observed test-day deviations are projected over all 305 days using the 432 covariance structure and then added to the baseline cumulative standard 433 curve. 434 435 By default this function uses the package-provided standard curve and covariance matrix. 436 But it is also possible to provide your own standard curve and covariance matrix, 437 for example when you want to fit these ingredients from your own reference population. 438 439 440 Args: 441 lactation: Observed records for one lactation. 442 standard_lc: Population mean daily yield profile. 443 covariance_matrix: Day-to-day covariance matrix on the 305-day grid. 444 445 Returns: 446 Predicted cumulative 305-day milk yield. 447 448 Notes: 449 Duplicate day records are resolved with ``keep="last"`` before 450 prediction. If no valid observations remain in days 1..305, the method 451 returns the cumulative standard curve. 452 """ 453 filtered_lactation = lactation.loc[ 454 (lactation["DaysInMilk"] >= 1) & (lactation["DaysInMilk"] <= 305) 455 ].copy() 456 filtered_lactation = filtered_lactation.drop_duplicates(subset=["DaysInMilk"], keep="last") 457 filtered_lactation = filtered_lactation.sort_values("DaysInMilk") 458 459 corrected_series = preprocess_measured_data( 460 filtered_lactation, 461 standard_lc=standard_lc, 462 ) 463 464 if filtered_lactation.empty: 465 return float(np.sum(standard_lc)) 466 467 obs_idx_1based = filtered_lactation["DaysInMilk"].to_numpy(dtype=int) # DaysInMilk: 1-305 468 obs_idx_0based = obs_idx_1based - 1 # Convert to 0-based matrix indices: 0-304 469 y_obs = corrected_series.loc[obs_idx_1based].to_numpy( 470 dtype=float 471 ) # corrected_series is indexed by DaysInMilk (1-305) 472 473 # Extract covariance blocks 474 B_oo = covariance_matrix[ 475 np.ix_(obs_idx_0based, obs_idx_0based) 476 ] # Use 0-based indices for matrix 477 B_mo = covariance_matrix[:, obs_idx_0based] # Use 0-based indices for matrix 478 479 # solve 480 c, lower = cho_factor(B_oo) 481 alpha = cho_solve((c, lower), y_obs) 482 483 # Predict full deviation curve 484 y_estimate = B_mo @ alpha 485 486 # Total milk = baseline + deviation 487 deviation = np.sum(y_estimate) 488 489 total = np.sum(standard_lc) + deviation 490 491 return total
Predict 305-day cumulative yield for one lactation.
Observed test-day deviations are projected over all 305 days using the covariance structure and then added to the baseline cumulative standard curve.
By default this function uses the package-provided standard curve and covariance matrix. But it is also possible to provide your own standard curve and covariance matrix, for example when you want to fit these ingredients from your own reference population.
Arguments:
- lactation: Observed records for one lactation.
- standard_lc: Population mean daily yield profile.
- covariance_matrix: Day-to-day covariance matrix on the 305-day grid.
Returns:
Predicted cumulative 305-day milk yield.
Notes:
Duplicate day records are resolved with
keep="last"before prediction. If no valid observations remain in days 1..305, the method returns the cumulative standard curve.
271def build_covariance_matrix(rho: float, size: int) -> np.ndarray: 272 """Construct a covariance matrix. 273 274 Cole et al. (2007) estimated correlations among test-day yields using a 275 simplified model with an identity matrix (I) for daily measurement error 276 and an autoregressive matrix (E) for biological change. E is defined as 277 ``Eij = r ** |i-j|`` where ``i`` and ``j`` are test-day DIM and 278 ``0 < r < 1``. 279 280 Element ``(i, j)`` is ``rho ** abs(i - j)``. 281 282 Args: 283 rho: AR(1) correlation parameter. 284 size: Matrix dimension. 285 286 Returns: 287 A ``(size, size)`` AR(1) correlation matrix. 288 """ 289 idx = np.arange(size) 290 M = np.abs(idx[:, None] - idx[None, :]) 291 return rho**M
Construct a covariance matrix.
Cole et al. (2007) estimated correlations among test-day yields using a
simplified model with an identity matrix (I) for daily measurement error
and an autoregressive matrix (E) for biological change. E is defined as
Eij = r ** |i-j| where i and j are test-day DIM and
0 < r < 1.
Element (i, j) is rho ** abs(i - j).
Arguments:
- rho: AR(1) correlation parameter.
- size: Matrix dimension.
Returns:
A
(size, size)AR(1) correlation matrix.
323def calculate_characteristic( 324 dim, 325 milkrecordings, 326 model="wood", 327 characteristic="cumulative_milk_yield", 328 fitting="frequentist", 329 key=None, 330 parity=3, 331 breed="H", 332 continent="USA", 333 custom_priors=None, 334 milk_unit="kg", 335 persistency_method="derived", 336 lactation_length=305, 337) -> float | None: 338 """Evaluate a lactation curve characteristic from observed test-day data. 339 340 This function fits the requested model (frequentist or Bayesian via MilkBot), 341 retrieves model parameters, and evaluates the requested characteristic using the 342 symbolic expression (if available), falling back to numeric methods when needed. 343 344 Args: 345 dim (Int): Days in milk (DIM). 346 milkrecordings (Float): Milk recordings (kg or lbs) for each DIM. 347 model (str): Model name. Supported for this function: 348 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer'. 349 characteristic (str): One of 'time_to_peak', 'peak_yield', 350 'cumulative_milk_yield', 'persistency'. 351 fitting (str): 'frequentist' (default) or 'bayesian'. 352 key (str | None): API key for MilkBot Bayesian fitting. 353 parity (Int): Parity of the cow; values above 3 are considered as 3 (Bayesian). 354 breed (str): 'H' (Holstein) or 'J' (Jersey) (Bayesian). 355 custom_priors: provide your own priors for Bayesian fitting. 356 provide as dictionary using build_prior() function 357 to create the priors in the right format. 358 Alternative use priors from the literature provided by the string command 'CHEN' 359 milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting. 360 continent (str): 'USA' or 'EU' (Bayesian). 361 persistency_method (str): 'derived' (average slope after peak; default) or 'literature' 362 (only for Wood and MilkBot). 363 lactation_length (Int | str): Horizon for persistency calculation: 364 305 (default), 'max' (use max DIM in data), or an integer. 365 366 Returns: 367 float | None: The requested characteristic value. 368 Or None if value is negative (except persistency) 369 or cannot be computed symbolically. 370 371 Raises: 372 Exception: If inputs are invalid, the model/characteristic is unsupported, 373 an API key is missing for Bayesian fitting, or the characteristic cannot be computed. 374 """ 375 # check and prepare input 376 inputs = validate_and_prepare_inputs( 377 dim, 378 milkrecordings, 379 model=model, 380 fitting=fitting, 381 breed=breed, 382 parity=parity, 383 continent=continent, 384 custom_priors=custom_priors, 385 milk_unit=milk_unit, 386 persistency_method=persistency_method, 387 lactation_length=lactation_length, 388 ) 389 390 dim = inputs.dim 391 milkrecordings = inputs.milkrecordings 392 model = inputs.model 393 fitting = inputs.fitting 394 breed = inputs.breed 395 parity = inputs.parity 396 continent = inputs.continent 397 custom_priors = inputs.custom_priors 398 milk_unit = inputs.milk_unit 399 persistency_method = inputs.persistency_method 400 lactation_length = inputs.lactation_length 401 402 # After validation, these fields are guaranteed non-None for the paths below 403 assert model is not None 404 assert fitting is not None 405 406 if model not in ["milkbot", "wood", "wilmink", "ali_schaeffer", "fischer"]: 407 msg = ( 408 "this function currently only works for" 409 " the milkbot, wood, wilmink," 410 " ali_schaeffer and fischer models" 411 ) 412 raise Exception(msg) 413 414 characteristic_options: list[str] = [ 415 "time_to_peak", 416 "peak_yield", 417 "cumulative_milk_yield", 418 "persistency", 419 ] 420 421 if characteristic in characteristic_options: 422 if fitting == "frequentist": 423 # Get fitted parameters from your fitting function 424 fitted_params = get_lc_parameters(dim, milkrecordings, model) 425 assert fitted_params is not None 426 427 if characteristic != "persistency": 428 # Try symbolic formula first 429 assert isinstance(lactation_length, int) 430 try: 431 expr, params, fn = lactation_curve_characteristic_function( 432 model, characteristic, lactation_length 433 ) 434 with np.errstate( 435 divide="ignore", invalid="ignore" 436 ): # get rid of warnings for invalid operations 437 value = fn(*fitted_params) 438 except Exception: 439 value = None 440 441 # If symbolic formula fails or is invalid (use numeric approach) 442 if ( 443 value is None 444 or not np.isfinite(value) 445 or (characteristic == "time_to_peak" and value <= 0) 446 ): 447 assert parity is not None 448 assert breed is not None 449 assert continent is not None 450 if characteristic == "time_to_peak": 451 value = numeric_time_to_peak( 452 dim, 453 milkrecordings, 454 model, 455 fitting=fitting, 456 key=key, 457 parity=parity, 458 breed=breed, 459 continent=continent, 460 ) 461 elif characteristic == "peak_yield": 462 value = numeric_peak_yield( 463 dim, 464 milkrecordings, 465 model, 466 fitting=fitting, 467 key=key, 468 parity=parity, 469 breed=breed, 470 continent=continent, 471 ) 472 elif characteristic == "cumulative_milk_yield": 473 value = numeric_cumulative_yield( 474 dim, 475 milkrecordings, 476 model, 477 fitting=fitting, 478 lactation_length=lactation_length, 479 key=key, 480 parity=parity, 481 breed=breed, 482 continent=continent, 483 ) 484 485 else: 486 if persistency_method == "derived": 487 # find lactation length from data 488 if lactation_length == "max": 489 lactation_length = max(dim) 490 elif isinstance(lactation_length, int): 491 lactation_length = lactation_length 492 else: 493 lactation_length = 305 494 value = persistency_fitted_curve( 495 dim, 496 milkrecordings, 497 model, 498 fitting="frequentist", 499 lactation_length=lactation_length, 500 ) 501 502 else: 503 if model == "wood": 504 value = persistency_wood(fitted_params[1], fitted_params[2]) 505 506 elif model == "milkbot": 507 assert len(fitted_params) >= 4 508 value = persistency_milkbot(fitted_params[3]) 509 510 else: 511 msg = ( 512 "Currently only the Wood model" 513 " and MilkBot model have a" 514 " separate model function from" 515 " the literature integrated for" 516 " persistency. if" 517 ' persistency="derived" is' 518 " selected, persistency can be" 519 " calculated for every model as" 520 " the average slope of the" 521 " lactation after the peak." 522 ) 523 raise Exception(msg) 524 try: 525 if value is None: 526 return None 527 if characteristic != "persistency" and value < 0: 528 return None 529 return float(value) 530 except ValueError: 531 raise Exception("Could not compute characteristic") 532 533 else: 534 if model == "milkbot": 535 if key is None: 536 raise Exception("Key needed to use Bayesian fitting engine MilkBot") 537 else: 538 assert parity is not None 539 assert breed is not None 540 assert continent is not None 541 assert isinstance(milk_unit, str) 542 fitted_params_bayes = bayesian_fit_milkbot_single_lactation( 543 dim, 544 milkrecordings, 545 key=key, 546 parity=parity, 547 breed=breed, 548 custom_priors=custom_priors, 549 continent=continent, 550 milk_unit=milk_unit, 551 ) 552 fitted_params_bayes = ( 553 fitted_params_bayes["scale"], 554 fitted_params_bayes["ramp"], 555 fitted_params_bayes["offset"], 556 fitted_params_bayes["decay"], 557 ) 558 559 if characteristic != "persistency": 560 # Get the symbolic expression and model parameters 561 try: 562 expr, param_symbols, fn = lactation_curve_characteristic_function( 563 model, characteristic 564 ) 565 with np.errstate(divide="ignore", invalid="ignore"): 566 value = fn(*fitted_params_bayes) 567 except Exception: 568 value = None 569 570 if ( 571 value is None 572 or not np.isfinite(value) 573 or (characteristic == "time_to_peak" and value <= 0) 574 ): 575 if characteristic == "time_to_peak": 576 value = numeric_time_to_peak( 577 dim, 578 milkrecordings, 579 model, 580 fitting=fitting, 581 key=key, 582 parity=parity, 583 breed=breed, 584 custom_priors=custom_priors, 585 milk_unit=milk_unit, 586 continent=continent, 587 ) 588 elif characteristic == "peak_yield": 589 value = numeric_peak_yield( 590 dim, 591 milkrecordings, 592 model, 593 fitting=fitting, 594 key=key, 595 parity=parity, 596 breed=breed, 597 custom_priors=custom_priors, 598 milk_unit=milk_unit, 599 continent=continent, 600 ) 601 elif characteristic == "cumulative_milk_yield": 602 if lactation_length == "max": 603 numeric_lactation_length = max(dim) 604 elif isinstance(lactation_length, int): 605 numeric_lactation_length = lactation_length 606 else: 607 numeric_lactation_length = 305 608 value = numeric_cumulative_yield( 609 dim, 610 milkrecordings, 611 model, 612 fitting=fitting, 613 lactation_length=numeric_lactation_length, 614 key=key, 615 parity=parity, 616 breed=breed, 617 custom_priors=custom_priors, 618 milk_unit=milk_unit, 619 continent=continent, 620 ) 621 622 else: 623 if persistency_method == "derived": 624 # find lactation length from data 625 if lactation_length == "max": 626 lactation_length = max(dim) 627 elif isinstance(lactation_length, int): 628 lactation_length = lactation_length 629 else: 630 lactation_length = 305 631 assert isinstance(milk_unit, str) 632 value = persistency_fitted_curve( 633 dim, 634 milkrecordings, 635 model, 636 fitting="bayesian", 637 key=key, 638 parity=parity, 639 breed=breed, 640 custom_priors=custom_priors, 641 milk_unit=milk_unit, 642 continent=continent, 643 lactation_length=lactation_length, 644 ) 645 646 else: 647 value = persistency_milkbot(fitted_params_bayes[3]) 648 649 try: 650 if value is None: 651 return None 652 if characteristic != "persistency" and value < 0: 653 return None 654 return float(value) 655 except ValueError: 656 raise Exception("Could not compute characteristic") 657 else: 658 raise Exception("Bayesian fitting is currently only implemented for MilkBot models") 659 660 else: 661 raise Exception("Unknown characteristic")
Evaluate a lactation curve characteristic from observed test-day data.
This function fits the requested model (frequentist or Bayesian via MilkBot), retrieves model parameters, and evaluates the requested characteristic using the symbolic expression (if available), falling back to numeric methods when needed.
Arguments:
- dim (Int): Days in milk (DIM).
- milkrecordings (Float): Milk recordings (kg or lbs) for each DIM.
- model (str): Model name. Supported for this function: 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
- characteristic (str): One of 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
- fitting (str): 'frequentist' (default) or 'bayesian'.
- key (str | None): API key for MilkBot Bayesian fitting.
- parity (Int): Parity of the cow; values above 3 are considered as 3 (Bayesian).
- breed (str): 'H' (Holstein) or 'J' (Jersey) (Bayesian).
- custom_priors: provide your own priors for Bayesian fitting. provide as dictionary using build_prior() function to create the priors in the right format. Alternative use priors from the literature provided by the string command 'CHEN'
- milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
- continent (str): 'USA' or 'EU' (Bayesian).
- persistency_method (str): 'derived' (average slope after peak; default) or 'literature' (only for Wood and MilkBot).
- lactation_length (Int | str): Horizon for persistency calculation: 305 (default), 'max' (use max DIM in data), or an integer.
Returns:
float | None: The requested characteristic value. Or None if value is negative (except persistency) or cannot be computed symbolically.
Raises:
- Exception: If inputs are invalid, the model/characteristic is unsupported, an API key is missing for Bayesian fitting, or the characteristic cannot be computed.
242def center_lactation_data( 243 milk_matrix: np.ndarray, 244 standard_lc: np.ndarray, 245 day_mean_method: str = "standard_lc", 246) -> np.ndarray: 247 """Center lactation yields before covariance estimation. 248 249 Args: 250 milk_matrix: Yield matrix with lactations in rows and days in columns. 251 standard_lc: Expected day-wise milk yield profile. 252 day_mean_method: Mean-centering strategy. Supported values are 253 ``"standard_lc"`` (default) and ``"data"``. 254 255 Returns: 256 A centered matrix with the same shape as ``milk_matrix``. 257 258 Raises: 259 ValueError: If ``day_mean_method`` is not supported. 260 """ 261 if day_mean_method == "standard_lc": 262 day_mean = standard_lc 263 elif day_mean_method == "data": 264 day_mean = np.nanmean(milk_matrix, axis=0) 265 else: 266 raise ValueError("day_mean_method must be 'standard_lc' or 'data'.") 267 268 return milk_matrix - day_mean
Center lactation yields before covariance estimation.
Arguments:
- milk_matrix: Yield matrix with lactations in rows and days in columns.
- standard_lc: Expected day-wise milk yield profile.
- day_mean_method: Mean-centering strategy. Supported values are
"standard_lc"(default) and"data".
Returns:
A centered matrix with the same shape as
milk_matrix.
Raises:
- ValueError: If
day_mean_methodis not supported.
936def create_standard_lc_representation( 937 df: pd.DataFrame, 938 standard_lactation_curve: pd.Series, 939 days_in_milk_col: str, 940 milking_yield_col: str, 941 interpolation_method: InterpolationMethod = interpolation_standard_lc, 942 lc_model: str = "Wilmink", 943 small_grid: bool = False, 944) -> tuple[pd.DataFrame, npt.NDArray[np.float64], pd.Series]: 945 """Create a standard lactation curve with correlation matrix from data. 946 947 This function estimates the correlation structure and standard deviations 948 of milk yields across grid days, and refits a Wilmink lactation curve model to 949 the interpolated data. The Wilmink model is hereby the default, 950 but any model from the LC package can be used. 951 The outputs can be used as inputs to the 952 ``ISLC_method`` or ``ISLC_original`` functions for predicting 305-d yields. 953 954 The process standardizes yields per animal and computes row-wise 955 (per-animal) statistics, then derives relationships across grid days. 956 957 Args: 958 df: Input DataFrame containing test-day records. Must include a 959 ``TestId`` column to identify unique lactations. Also requires 960 columns specified by ``days_in_milk_col`` and ``milking_yield_col``. 961 standard_lactation_curve: A Series providing a reference lactation 962 curve (e.g., the fitted Wilmink curve on which to base 963 interpolation). 964 days_in_milk_col: Name of the column in ``df`` containing days in milk (DIM). 965 milking_yield_col: Name of the column in ``df`` containing measured 966 milk yields. 967 interpolation_method: An interpolation method conforming to 968 ``InterpolationMethod`` protocol (default: ``interpolation_standard_lc``). 969 Must be callable with signature 970 (group, days_in_milk_col, milking_yield_col, standard_lc). 971 lc_model: The lactation curve model to fit to the interpolated data. Default is "Wilmink". 972 small_grid: If True, build representation on GRID_DAYS only. 973 974 Returns: 975 A tuple of three elements: 976 - corr (pd.DataFrame): Correlation matrix between grid-day yields. 977 - std_per_grid_day (np.ndarray): Standard deviations of standardized 978 yields per grid day. 979 - standard_lactation_curve_grid (pd.Series): Refitted Wilmink model 980 indexed by DIM (1–305). 981 982 Notes: 983 - Expects ``df`` to have a ``TestId`` column to group lactations. 984 - Performs standardization per animal (row-wise), so output 985 correlations and standard deviations reflect between-day variation 986 within standardized animal profiles. 987 - Uses ``fit_lactation_curve`` from the ``lactationcurve`` package 988 with model='Wilmink' and fitting='frequentist'. 989 However other models can be used by specifying the ``lc_model`` argument. 990 """ 991 992 if "TestId" not in df.columns: 993 raise ValueError("DataFrame must contain a 'TestId' column.") 994 if days_in_milk_col not in df.columns or milking_yield_col not in df.columns: 995 raise ValueError(f"Columns '{days_in_milk_col}' and '{milking_yield_col}' must be in df.") 996 997 interpolated_groups: list[pd.DataFrame] = [] 998 for test_id, group in df.groupby("TestId", sort=False): 999 try: 1000 interp_group = interpolation_method( 1001 group, 1002 days_in_milk_col=days_in_milk_col, 1003 milking_yield_col=milking_yield_col, 1004 standard_lc=standard_lactation_curve, 1005 small_grid=small_grid, 1006 ) 1007 except TypeError: 1008 # Backward compatibility for custom interpolation callables 1009 # that do not yet accept small_grid. 1010 interp_group = interpolation_method( 1011 group, 1012 days_in_milk_col=days_in_milk_col, 1013 milking_yield_col=milking_yield_col, 1014 standard_lc=standard_lactation_curve, 1015 ) 1016 if interp_group.empty: 1017 continue 1018 if "TestId" not in interp_group.columns: 1019 interp_group = interp_group.copy() 1020 interp_group["TestId"] = test_id 1021 interpolated_groups.append(interp_group) 1022 1023 if not interpolated_groups: 1024 raise ValueError("No interpolated rows were produced; cannot build representation.") 1025 1026 df_grid = pd.concat(interpolated_groups, ignore_index=True) 1027 1028 # define Znp 1029 # pivot df to make GridDay the columns 1030 # Z should be the unscaled pivot table (TestId × GridDay) 1031 Z = df_grid.pivot(index="TestId", columns="GridDay", values="MilkYieldInterp") 1032 1033 # Standardize records per cow (so each row has mean=0, std=1) 1034 Z_std = (Z.sub(Z.mean(axis=1), axis=0)).div(Z.std(axis=1), axis=0) 1035 1036 # Convert to NumPy array 1037 Znp = Z_std.to_numpy() 1038 1039 # calculate correlation matrix 1040 corr = pd.DataFrame(Znp, columns=Z.columns).corr() 1041 1042 # calculate standard deviations for each grid day from the Znp matrix 1043 std_per_grid_day = np.nanstd(Znp, axis=0) # ignores NaN 1044 1045 # fit Wilmink model to get mean values and std for each cow 1046 standard_lactation_curve_grid = pd.Series( 1047 fit_lactation_curve( 1048 df_grid["GridDay"].values, 1049 df_grid["MilkYieldInterp"].values, 1050 model=lc_model, 1051 fitting="frequentist", 1052 ), 1053 index=range(1, 306), 1054 ) 1055 return corr, std_per_grid_day, standard_lactation_curve_grid
Create a standard lactation curve with correlation matrix from data.
This function estimates the correlation structure and standard deviations
of milk yields across grid days, and refits a Wilmink lactation curve model to
the interpolated data. The Wilmink model is hereby the default,
but any model from the LC package can be used.
The outputs can be used as inputs to the
ISLC_method or ISLC_original functions for predicting 305-d yields.
The process standardizes yields per animal and computes row-wise (per-animal) statistics, then derives relationships across grid days.
Arguments:
- df: Input DataFrame containing test-day records. Must include a
TestIdcolumn to identify unique lactations. Also requires columns specified bydays_in_milk_colandmilking_yield_col. - standard_lactation_curve: A Series providing a reference lactation curve (e.g., the fitted Wilmink curve on which to base interpolation).
- days_in_milk_col: Name of the column in
dfcontaining days in milk (DIM). - milking_yield_col: Name of the column in
dfcontaining measured milk yields. - interpolation_method: An interpolation method conforming to
InterpolationMethodprotocol (default:interpolation_standard_lc). Must be callable with signature (group, days_in_milk_col, milking_yield_col, standard_lc). - lc_model: The lactation curve model to fit to the interpolated data. Default is "Wilmink".
- small_grid: If True, build representation on GRID_DAYS only.
Returns:
A tuple of three elements:
- corr (pd.DataFrame): Correlation matrix between grid-day yields.
- std_per_grid_day (np.ndarray): Standard deviations of standardized yields per grid day.
- standard_lactation_curve_grid (pd.Series): Refitted Wilmink model indexed by DIM (1–305).
Notes:
- Expects
dfto have aTestIdcolumn to group lactations.- Performs standardization per animal (row-wise), so output correlations and standard deviations reflect between-day variation within standardized animal profiles.
- Uses
fit_lactation_curvefrom thelactationcurvepackage with model='Wilmink' and fitting='frequentist'. However other models can be used by specifying thelc_modelargument.
294def fit_autocorrelation_matrix( 295 df: pd.DataFrame, standard_lc: np.ndarray 296) -> dict[str, np.ndarray | float]: 297 """Estimate covariance parameters for best prediction. 298 299 The model is ``B = b1 * I + b2 * E`` where ``E`` is an AR(1) correlation 300 matrix. Parameters are optimized in transformed space and mapped back to 301 enforce ``b1 > 0``, ``b2 > 0``, and ``0 < rho < 1``. 302 303 Args: 304 df: Reference milk-recording dataframe. 305 standard_lc: Population mean curve used for centering. 306 307 Returns: 308 Dictionary with: 309 - ``"B_hat"``: fitted covariance matrix. 310 - ``"R_hat"``: correlation matrix derived from ``B_hat``. 311 - ``"b1"``, ``"b2"``, ``"rho"``: fitted scalar parameters. 312 """ 313 milk_matrix = pivot_milk_recordings_to_matrix(df) 314 centered_matrix = center_lactation_data(milk_matrix, standard_lc) 315 n_lactations, n_days = centered_matrix.shape 316 observed_indices = [np.where(~np.isnan(centered_matrix[i]))[0] for i in range(n_lactations)] 317 318 def negative_log_likelihood(params: np.ndarray) -> float: 319 p_b1, p_b2, p_rho = params 320 b1 = float(np.exp(p_b1)) 321 b2 = float(np.exp(p_b2)) 322 rho = float(1 / (1 + np.exp(-p_rho))) # now rho in (0,1) 323 correlation_matrix = build_covariance_matrix(rho, n_days) 324 325 total = 0.0 326 for lactation_idx, day_indices in enumerate(observed_indices): 327 observation_count = len(day_indices) 328 if observation_count == 0: 329 continue 330 331 observations = centered_matrix[lactation_idx, day_indices] 332 correlation_subset = correlation_matrix[np.ix_(day_indices, day_indices)] 333 sigma = b1 * np.eye(observation_count) + b2 * correlation_subset 334 335 # Numerical safeguards: try Cholesky and penalize non-PD parameters. 336 try: 337 cholesky_factor, lower = cho_factor(sigma, check_finite=False) 338 solution = cho_solve((cholesky_factor, lower), observations, check_finite=False) 339 except LinAlgError: 340 # penalty for non-PD 341 return float(1e12 + np.sum(np.abs(params))) 342 343 quadratic_form = float(observations @ solution) 344 log_determinant = 2.0 * np.sum(np.log(np.diag(cholesky_factor))) 345 total += 0.5 * ( 346 log_determinant + quadratic_form + observation_count * np.log(2 * np.pi) 347 ) 348 349 # return total negative log-likelihood 350 return float(total) 351 352 # initial guesses and optimization. A 50/50 split in variance is assumed as starting point 353 initial_variance = max(float(np.nanvar(centered_matrix)), 1e-6) 354 initial_params = [ 355 np.log(0.5 * initial_variance), 356 np.log(0.5 * initial_variance), 357 0.5, 358 ] 359 360 result = minimize( 361 negative_log_likelihood, 362 x0=initial_params, 363 method="L-BFGS-B", 364 options={"maxiter": 2000, "ftol": 1e-8}, 365 ) 366 367 if not result.success: 368 print(f"Optimization warning: {result.message}") 369 370 log_b1_hat, log_b2_hat, logit_rho_hat = result.x 371 b1_hat = float(np.exp(log_b1_hat)) 372 b2_hat = float(np.exp(log_b2_hat)) 373 rho_hat = float(1 / (1 + np.exp(-logit_rho_hat))) 374 correlation_matrix = build_covariance_matrix(rho_hat, n_days) 375 covariance_matrix = b1_hat * np.eye(n_days) + b2_hat * correlation_matrix 376 377 # convert to correlation matrix 378 std = np.sqrt(np.diag(covariance_matrix)) 379 correlation_matrix = covariance_matrix / np.outer(std, std) 380 381 return { 382 "B_hat": covariance_matrix, 383 "R_hat": correlation_matrix, 384 "b1": b1_hat, 385 "b2": b2_hat, 386 "rho": rho_hat, 387 }
Estimate covariance parameters for best prediction.
The model is B = b1 * I + b2 * E where E is an AR(1) correlation
matrix. Parameters are optimized in transformed space and mapped back to
enforce b1 > 0, b2 > 0, and 0 < rho < 1.
Arguments:
- df: Reference milk-recording dataframe.
- standard_lc: Population mean curve used for centering.
Returns:
Dictionary with:
"B_hat": fitted covariance matrix."R_hat": correlation matrix derived fromB_hat."b1","b2","rho": fitted scalar parameters.
211def fit_standard_lc(df: pd.DataFrame, lc_model: str = "Wood") -> np.ndarray: 212 """Fit a population-level standard lactation curve. 213 214 The curve is fit with the package's frequentist Wood model and returned on 215 the fixed day-1..305 grid. 216 217 Args: 218 df: Reference dataframe containing ``DaysInMilk`` and ``MilkingYield``. 219 lc_model: The lactation curve model to fit. 220 Default is the "Wood" lactation curve model. 221 222 Returns: 223 A NumPy array of expected daily milk yield for days 1..305. 224 225 Notes: 226 This mean curve acts as the baseline in best prediction. Individual 227 lactations are represented as deviations around this population profile. 228 """ 229 standard_lc = pd.Series( 230 fit_lactation_curve( 231 df["DaysInMilk"].values, 232 df["MilkingYield"].values, 233 model=lc_model, 234 fitting="frequentist", 235 ), 236 index=range(1, 306), 237 ) 238 239 return standard_lc.to_numpy(dtype=float)
Fit a population-level standard lactation curve.
The curve is fit with the package's frequentist Wood model and returned on the fixed day-1..305 grid.
Arguments:
- df: Reference dataframe containing
DaysInMilkandMilkingYield. - lc_model: The lactation curve model to fit.
- Default is the "Wood" lactation curve model.
Returns:
A NumPy array of expected daily milk yield for days 1..305.
Notes:
This mean curve acts as the baseline in best prediction. Individual lactations are represented as deviations around this population profile.
703def interpolation_standard_lc( 704 group: pd.DataFrame, 705 days_in_milk_col: str, 706 milking_yield_col: str, 707 standard_lc: pd.Series | npt.NDArray[np.float64] | None = None, 708 small_grid: bool = False, 709) -> pd.DataFrame: 710 """Interpolate a single lactation onto the DIM grid guided by a standard curve. 711 712 The function interpolates measured yields for one animal onto the 713 predefined grid of DIM values. When a grid day coincides with a measured 714 DIM the observed yield is used; otherwise the interpolation adjusts the 715 linear slope between neighboring measurements by the difference in the 716 standard curve, inspired by the CRV E2 approach. 717 718 The interpolation formula applied for a grid day ``gday`` between two 719 measured points (x1,y1) and (x2,y2) is:: 720 721 slope = ((y2 - y1) - (g2 - g1)) / (x2 - x1) 722 yi = gi + (slope * (gday - x1) + (y1 - g1)) 723 724 Args: 725 group: DataFrame with test-day records for a single animal. Must 726 contain columns named by ``days_in_milk_col`` and 727 ``milking_yield_col``. 728 days_in_milk_col: Name of the DIM column in ``group``. 729 milking_yield_col: Name of the milk-yield column in ``group``. 730 standard_lc: A Series indexed by DIM providing the expected yield on 731 each grid day; used to guide the interpolation shape. 732 small_grid: If True, interpolate on GRID_DAYS only; otherwise include 733 day 0 and day 305 in the interpolation target grid. 734 735 Returns: 736 A DataFrame with one row per interpolated grid day containing the 737 original identifying columns (from the first row of ``group``), plus 738 the columns ``GridDay`` and ``MilkYieldInterp``. 739 740 Notes: 741 - No interpolation is performed outside the range bounded by the 742 first and last measured DIM for the lactation (those grid days are 743 skipped). 744 - No interpolation is performed for grid days where the right 745 neighboring measured DIM is >= 305, because this lies outside the 746 intended standard-curve support. 747 - The implementation defines a local default ``grid`` of 748 [10, 30, ..., 290]. 749 750 751 """ 752 if small_grid: 753 grid = GRID_DAYS 754 else: 755 # add zero and 305 to the grid 756 grid = [0] + GRID_DAYS + [305] 757 758 if standard_lc is None: 759 raise ValueError("A standard lactation curve is required for interpolation_standard_lc.") 760 761 standard_lc_series = _curve_to_series(standard_lc) 762 763 # Sort and ensure unique DIMs 764 group = group.sort_values(days_in_milk_col).drop_duplicates(days_in_milk_col) 765 dims = group[days_in_milk_col].tolist() 766 767 rows = [] 768 769 # loop over the grid days 770 for gday in grid: 771 # --- CASE 1: Exact measured day --------------------------- 772 if gday in dims: 773 y_val = group.loc[group[days_in_milk_col] == gday, milking_yield_col].iloc[0] 774 775 row = group.iloc[0].to_dict() 776 row["GridDay"] = gday 777 row["MilkYieldInterp"] = float(y_val) 778 rows.append(row) 779 continue 780 781 # --- CASE 2: interpolate between measured days ------------- 782 before_df = cast(pd.DataFrame, group.loc[group[days_in_milk_col] < gday]) 783 after_df = cast(pd.DataFrame, group.loc[group[days_in_milk_col] > gday]) 784 before = before_df.tail(1) 785 after = after_df.head(1) 786 787 # CASE 3 No interpolation if grid day is outside the range of measured DIM 788 if before.empty or after.empty: 789 continue 790 791 # CASE 4: No interpolation if the day after is day 305 or higher, 792 # as this is outside the range of the standard curve 793 if float(after.iloc[0][days_in_milk_col]) >= 305: 794 continue 795 796 x1 = float(before.iloc[0][days_in_milk_col]) 797 y1 = float(before.iloc[0][milking_yield_col]) 798 x2 = float(after.iloc[0][days_in_milk_col]) 799 y2 = float(after.iloc[0][milking_yield_col]) 800 801 # expected yields from standard lactation curve 802 g1 = _curve_value(standard_lc_series, int(x1)) 803 g2 = _curve_value(standard_lc_series, int(x2)) 804 gi = _curve_value(standard_lc_series, int(gday)) 805 806 # Wilmink-based interpolation formula 807 slope = ((y2 - y1) - (g2 - g1)) / (x2 - x1) 808 yi = gi + (slope * (gday - x1) + (y1 - g1)) 809 810 row = group.iloc[0].to_dict() 811 row["GridDay"] = gday 812 row["MilkYieldInterp"] = float(yi) 813 rows.append(row) 814 815 if not rows: 816 return pd.DataFrame( 817 columns=pd.Index([*group.columns.to_list(), "GridDay", "MilkYieldInterp"]) 818 ) 819 820 return pd.DataFrame(rows)
Interpolate a single lactation onto the DIM grid guided by a standard curve.
The function interpolates measured yields for one animal onto the predefined grid of DIM values. When a grid day coincides with a measured DIM the observed yield is used; otherwise the interpolation adjusts the linear slope between neighboring measurements by the difference in the standard curve, inspired by the CRV E2 approach.
The interpolation formula applied for a grid day gday between two
measured points (x1,y1) and (x2,y2) is::
slope = ((y2 - y1) - (g2 - g1)) / (x2 - x1)
yi = gi + (slope * (gday - x1) + (y1 - g1))
Arguments:
- group: DataFrame with test-day records for a single animal. Must
contain columns named by
days_in_milk_colandmilking_yield_col. - days_in_milk_col: Name of the DIM column in
group. - milking_yield_col: Name of the milk-yield column in
group. - standard_lc: A Series indexed by DIM providing the expected yield on each grid day; used to guide the interpolation shape.
- small_grid: If True, interpolate on GRID_DAYS only; otherwise include day 0 and day 305 in the interpolation target grid.
Returns:
A DataFrame with one row per interpolated grid day containing the original identifying columns (from the first row of
group), plus the columnsGridDayandMilkYieldInterp.
Notes:
- No interpolation is performed outside the range bounded by the first and last measured DIM for the lactation (those grid days are skipped). - No interpolation is performed for grid days where the right neighboring measured DIM is >= 305, because this lies outside the intended standard-curve support.
- The implementation defines a local default
gridof [10, 30, ..., 290].
95def lactation_curve_characteristic_function( 96 model="wood", characteristic=None, lactation_length=305 97) -> tuple: 98 """Build (or fetch from cache) a symbolic expression and fast numeric function for an LCC. 99 100 This function derives the requested **lactation curve characteristic** for a given 101 model using SymPy (derivative / root finding / integration). It returns the symbolic 102 expression, the tuple of parameter symbols (argument order), and a lambdified 103 numeric function that can be evaluated with numerical parameters. 104 105 The symbolic derivation and integration are done only once per (model, characteristic) 106 and then **cached** for reuse. 107 108 Args: 109 model (str): Model name. Options: 110 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer', 111 'brody', 'sikka', 'nelder', 'dhanoa', 'emmans', 'hayashi', 112 'rook', 'dijkstra', 'prasad'. 113 characteristic (str | None): Desired characteristic. Options: 114 'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'. 115 If `None` or unrecognized, a dict of all available characteristics is returned 116 (with `persistency` possibly `None` if derivation is not feasible). 117 lactation_length (int): Length of lactation in days used in persistency 118 and cumulative milk yield computation (default 305). 119 120 Returns: 121 tuple: 122 expr: SymPy expression (or dict of expressions if `characteristic` is None). 123 params: Tuple of SymPy symbols for model parameters (argument order). 124 func: Lambdified numeric function `f(*params)` (or dict of functions). 125 126 Raises: 127 Exception: If the model is unknown, or if no positive real solution for 128 peak timing/yield exists where required. In this case None is returned 129 for the characteristic, 130 and the function can be evaluated with numeric methods as a fallback. 131 """ 132 # check the cache 133 storage = (model, characteristic) 134 if storage in _LCC_CACHE: 135 return ( 136 _LCC_CACHE[storage]["expr"], 137 _LCC_CACHE[storage]["params"], 138 _LCC_CACHE[storage]["func"], 139 ) 140 141 # make sure model is all lowercase 142 model = model.lower() 143 144 # define functions 145 if model == "brody": 146 # === BRODY 1 === 147 a, b, k1, k2, t = symbols("a b k1 k2 t", real=True, positive=True) 148 function = a * exp(-k1 * t) - b * exp(-k2 * t) 149 150 elif model == "sikka": 151 # === SIKKA === 152 a, b, c, t = symbols("a b c t", real=True, positive=True) 153 function = a * exp(b * t - c * t**2) 154 155 elif model == "fischer": 156 # === FISCHER === 157 a, b, c, t = symbols("a b c t", real=True, positive=True) 158 function = a - b * t - a * exp(-c * t) 159 160 elif model == "nelder": 161 # === NELDER === 162 a, b, c, t = symbols("a b c t", real=True, positive=True) 163 function = t / (a + b * t + c * t**2) 164 165 elif model == "wood": 166 # === WOOD === 167 a, b, c, t = symbols("a b c t", real=True, positive=True) 168 function = a * t**b * exp(-c * t) 169 170 elif model == "dhanoa": 171 # === DHANOA === 172 a, b, c, t = symbols("a b c t", real=True, positive=True) 173 function = a * t ** (b * c) * exp(-c * t) 174 175 elif model == "emmans": 176 # === EMMANS === 177 a, b, c, d, t = symbols("a b c d t", real=True, positive=True) 178 function = a * exp(-exp(d - b * t)) * exp(-c * t) 179 180 elif model == "ali_schaeffer": 181 # ====ALI===== 182 a, b, c, d, k, t = symbols("a b c d k t", real=True) 183 184 t1 = t / 305 185 w1 = log(305 / t) 186 187 function = a + b * t1 + c * t1**2 + d * w1 + k * w1**2 188 189 elif model == "wilmink": 190 # === WILMINK === 191 a, c, k, t = symbols("a c k t", real=True, positive=True) 192 b = symbols("b", real=True) # b can be negative in Wilmink 193 function = a + b * exp(-k * t) - c * t 194 195 elif model == "hayashi": 196 # === HAYASHI === 197 a, b, c, t = symbols("a b c t", real=True, positive=True) 198 function = b * (exp(-t / c) - exp(-t / (a * c))) 199 200 elif model == "rook": 201 # === ROOK === 202 a, b, c, d, t = symbols("a b c d t", real=True, positive=True) 203 function = a * (1 / (1 + b / (c + t))) * exp(-d * t) 204 205 elif model == "dijkstra": 206 # === DIJKSTRA === 207 a, b, c, d, t = symbols("a b c d t", real=True, positive=True) 208 function = a * exp((b * (1 - exp(-c * t)) / c) - d * t) 209 210 elif model == "prasad": 211 # === PRASAD === 212 a, b, c, d, t = symbols("a b c d t", real=True, positive=True) 213 function = a + b * t + c * t**2 + d / t 214 215 elif model == "milkbot": 216 # === MILKBOT === 217 a, b, c, d, t = symbols("a b c d t", real=True, positive=True) 218 function = a * (1 - exp((c - t) / b) / 2) * exp(-d * t) 219 220 else: 221 raise Exception("Unknown model") 222 223 # solve derivative for when it is zero to find the function for time of peak 224 fdiff = diff(function, t) 225 226 tpeak = None # default 227 228 valid_roots = [] 229 230 try: 231 candidate_roots = solve(fdiff, t) 232 233 for r in candidate_roots: 234 r_simplified = simplify(r) 235 236 if r_simplified.is_real is not False: 237 valid_roots.append(r_simplified) 238 239 except Exception: 240 valid_roots = [] 241 242 tpeak = valid_roots[0] if valid_roots else None 243 244 # override to prevent sympy error 245 if model == "dijkstra": 246 tpeak = simplify(log(b / d) / c) 247 248 # define the end of lactation 249 T = lactation_length # days in milk 250 251 # Persistency = average slope after peak, does not work for all models so therefore try except 252 persistency = None 253 try: 254 if tpeak: 255 tmp = (function.subs(t, T) - function.subs(t, tpeak)) / (T - tpeak) 256 tmp = tmp.cancel() # light simplification 257 if is_valid_sympy_expr(tmp): 258 persistency = tmp 259 except Exception: 260 persistency = None 261 262 if characteristic != "cumulative_milk_yield": 263 if tpeak: # Check if the list is not empty 264 peak_expr = simplify(function.subs(t, tpeak)) 265 else: 266 raise Exception("No real solution for time to peak and peak yield found") 267 268 # find function for cumulative milk yield of the lactation. 269 # A lactation length of 305 is the default, but this value can 270 # also be provided as a parameter in the characteristic function. 271 cum_my_expr = integrate(function, (t, 0, lactation_length), meijerg=False) 272 273 if cum_my_expr == 0: 274 cum_my_expr = Integral(function, (t, 0, lactation_length)) 275 276 # Sorted parameter list (exclude t) 277 params = tuple( 278 sorted([s for s in function.free_symbols if s.name != "t"], key=lambda x: x.name) 279 ) 280 281 # ---------------------------------------------------- 282 # Select requested characteristic 283 # ---------------------------------------------------- 284 if characteristic == "time_to_peak": 285 expr = tpeak 286 elif characteristic == "peak_yield": 287 expr = peak_expr 288 elif characteristic == "cumulative_milk_yield": 289 expr = cum_my_expr 290 elif characteristic == "persistency": 291 if persistency is None: 292 raise Exception("Persistency could not be computed symbolically") 293 expr = persistency 294 else: 295 # Return all four if None or 'all' 296 expr = { 297 "time_to_peak": tpeak, 298 "peak_yield": peak_expr, 299 "persistency": persistency, # possibly None 300 "cumulative_milk_yield": cum_my_expr, 301 } 302 303 # ---------------------------------------------------- 304 # Build fast numeric function with lambdify 305 # ---------------------------------------------------- 306 if isinstance(expr, dict): 307 func = { 308 name: lambdify(params, ex, modules=["numpy", "scipy"]) 309 for name, ex in expr.items() 310 if ex is not None 311 } 312 else: 313 func = lambdify(params, expr, modules=["numpy", "scipy"]) 314 315 # ---------------------------------------------------- 316 # Store in cache 317 # ---------------------------------------------------- 318 _LCC_CACHE[storage] = {"expr": expr, "params": params, "func": func} 319 320 return expr, params, func
Build (or fetch from cache) a symbolic expression and fast numeric function for an LCC.
This function derives the requested lactation curve characteristic for a given model using SymPy (derivative / root finding / integration). It returns the symbolic expression, the tuple of parameter symbols (argument order), and a lambdified numeric function that can be evaluated with numerical parameters.
The symbolic derivation and integration are done only once per (model, characteristic) and then cached for reuse.
Arguments:
- model (str): Model name. Options: 'milkbot', 'wood', 'wilmink', 'ali_schaeffer', 'fischer', 'brody', 'sikka', 'nelder', 'dhanoa', 'emmans', 'hayashi', 'rook', 'dijkstra', 'prasad'.
- characteristic (str | None): Desired characteristic. Options:
'time_to_peak', 'peak_yield', 'cumulative_milk_yield', 'persistency'.
If
Noneor unrecognized, a dict of all available characteristics is returned (withpersistencypossiblyNoneif derivation is not feasible). - lactation_length (int): Length of lactation in days used in persistency and cumulative milk yield computation (default 305).
Returns:
tuple: expr: SymPy expression (or dict of expressions if
characteristicis None). params: Tuple of SymPy symbols for model parameters (argument order). func: Lambdified numeric functionf(*params)(or dict of functions).
Raises:
- Exception: If the model is unknown, or if no positive real solution for peak timing/yield exists where required. In this case None is returned for the characteristic, and the function can be evaluated with numeric methods as a fallback.
823def linear_interpd_all_to_grid( 824 group: pd.DataFrame, 825 days_in_milk_col: str, 826 milking_yield_col: str, 827 standard_lc: pd.Series | None = None, 828 small_grid: bool = False, 829) -> pd.DataFrame: 830 """Linearly interpolate all grid days for a lactation. 831 832 This helper uses linear interpolation (with extrapolation) to produce 833 milk-yield values for every grid day regardless of whether the grid day 834 lies between measured observations. 835 836 Args: 837 group: DataFrame containing measured DIM and yield values for one 838 lactation. 839 days_in_milk_col: Name of the DIM column. 840 milking_yield_col: Name of the milk-yield column. 841 small_grid: If True, interpolate on GRID_DAYS only; otherwise include 842 day 0 and day 305. 843 844 Returns: 845 A DataFrame with identifying columns copied from ``group``'s first 846 row and columns ``GridDay`` and ``MilkYieldInterp`` containing the 847 interpolated (or extrapolated) yields for every grid day. 848 """ 849 if small_grid: 850 grid = GRID_DAYS 851 else: 852 # add zero and 305 to the grid 853 grid = [0] + GRID_DAYS + [305] 854 855 group = group.sort_values(days_in_milk_col).drop_duplicates(subset=days_in_milk_col) 856 f = interp1d( 857 group[days_in_milk_col], 858 group[milking_yield_col], 859 kind="linear", 860 fill_value=cast(Any, "extrapolate"), 861 ) 862 863 base = { 864 col: group[col].iloc[0] 865 for col in group.columns 866 if col not in [days_in_milk_col, milking_yield_col] 867 } 868 base["GridDay"] = grid 869 base["MilkYieldInterp"] = f(grid) 870 871 return pd.DataFrame(base)
Linearly interpolate all grid days for a lactation.
This helper uses linear interpolation (with extrapolation) to produce milk-yield values for every grid day regardless of whether the grid day lies between measured observations.
Arguments:
- group: DataFrame containing measured DIM and yield values for one lactation.
- days_in_milk_col: Name of the DIM column.
- milking_yield_col: Name of the milk-yield column.
- small_grid: If True, interpolate on GRID_DAYS only; otherwise include day 0 and day 305.
Returns:
A DataFrame with identifying columns copied from
group's first row and columnsGridDayandMilkYieldInterpcontaining the interpolated (or extrapolated) yields for every grid day.
876def linear_interpd_closest_to_grid( 877 group: pd.DataFrame, 878 days_in_milk_col: str, 879 milking_yield_col: str, 880 standard_lc: pd.Series | None = None, 881 small_grid: bool = False, 882) -> pd.DataFrame | None: 883 """Linearly interpolate grid days between measured observations. 884 885 This helper returns interpolated yields only for grid days that lie 886 between the first and last measured DIM for the lactation. If no grid 887 days fall within the measured range the function returns ``None``. 888 889 Args: 890 group: DataFrame for a single lactation. 891 days_in_milk_col: Name of the DIM column. 892 milking_yield_col: Name of the milk-yield column. 893 small_grid: If True, use GRID_DAYS only; otherwise include 894 day 0 and day 305. 895 896 Returns: 897 A DataFrame with identifying columns plus ``GridDay`` and 898 ``MilkYieldInterp``, or ``None`` if there are no grid days within 899 the measured range. 900 """ 901 if small_grid: 902 grid = GRID_DAYS 903 else: 904 # add zero and 305 to the grid 905 grid = [0] + GRID_DAYS + [305] 906 907 group = group.sort_values(days_in_milk_col).drop_duplicates(subset=days_in_milk_col) 908 909 # Find the range of interpolatable DIM 910 min_dim = group[days_in_milk_col].min() 911 max_dim = group[days_in_milk_col].max() 912 grid_in_range = [g for g in grid if min_dim <= g <= max_dim] 913 914 # apply linear interpolation 915 if not grid_in_range: 916 return None 917 f = interp1d( 918 group[days_in_milk_col], 919 group[milking_yield_col], 920 kind="linear", 921 fill_value=cast(Any, np.nan), 922 ) 923 924 # create a new dataframe with the newly created columns 925 base = { 926 col: group[col].iloc[0] 927 for col in group.columns 928 if col not in [days_in_milk_col, milking_yield_col] 929 } 930 base["GridDay"] = grid_in_range 931 base["MilkYieldInterp"] = f(grid_in_range) 932 933 return pd.DataFrame(base)
Linearly interpolate grid days between measured observations.
This helper returns interpolated yields only for grid days that lie
between the first and last measured DIM for the lactation. If no grid
days fall within the measured range the function returns None.
Arguments:
- group: DataFrame for a single lactation.
- days_in_milk_col: Name of the DIM column.
- milking_yield_col: Name of the milk-yield column.
- small_grid: If True, use GRID_DAYS only; otherwise include day 0 and day 305.
Returns:
A DataFrame with identifying columns plus
GridDayandMilkYieldInterp, orNoneif there are no grid days within the measured range.
720def numeric_cumulative_yield( 721 dim, milkrecordings, model, fitting="frequentist", lactation_length=305, **kwargs 722) -> float: 723 """Compute cumulative milk yield numerically over a given horizon. 724 725 Adds up the fitted milk yield for the first `lactation_length` days of the 726 predicted yield curve. 727 728 Args: 729 dim: DIM values. 730 milkrecordings: Milk recordings (kg). 731 model: Model name. 732 fitting: 'frequentist' or 'bayesian'. 733 lactation_length: Number of days to integrate (default 305). 734 **kwargs: Additional arguments forwarded to `fit_lactation_curve`. 735 736 Returns: 737 float: Cumulative milk yield over the specified horizon. 738 """ 739 y = fit_lactation_curve(dim, milkrecordings, model, fitting=fitting, **kwargs) 740 return float(np.trapezoid(y[:lactation_length], dx=1))
Compute cumulative milk yield numerically over a given horizon.
Adds up the fitted milk yield for the first lactation_length days of the
predicted yield curve.
Arguments:
- dim: DIM values.
- milkrecordings: Milk recordings (kg).
- model: Model name.
- fitting: 'frequentist' or 'bayesian'.
- lactation_length: Number of days to integrate (default 305).
- **kwargs: Additional arguments forwarded to
fit_lactation_curve.
Returns:
float: Cumulative milk yield over the specified horizon.
743def numeric_peak_yield( 744 dim, 745 milkrecordings, 746 model, 747 fitting="frequentist", 748 key=None, 749 parity=3, 750 breed="H", 751 custom_priors=None, 752 milk_unit="kg", 753 continent="USA", 754) -> float: 755 """Compute peak yield numerically from the fitted curve. 756 757 Args: 758 dim: DIM values. 759 milkrecordings: Milk recordings (kg). 760 model: Model name. 761 fitting: 'frequentist' or 'bayesian'. 762 key: API key for MilkBot (Bayesian). 763 parity: Parity for Bayesian fitting. 764 breed: Breed for Bayesian fitting ('H' or 'J'). 765 continent: Prior source for Bayesian ('USA', 'EU', 'CHEN'). 766 767 Returns: 768 float: Maximum predicted milk yield. 769 """ 770 # Fit the curve to get predicted milk yields 771 yields = fit_lactation_curve( 772 dim, 773 milkrecordings, 774 model, 775 fitting=fitting, 776 key=key, 777 parity=parity, 778 breed=breed, 779 custom_priors=custom_priors, 780 milk_unit=milk_unit, 781 continent=continent, 782 ) 783 # Find the peak yield 784 peak_yield = np.max(yields) 785 return peak_yield
Compute peak yield numerically from the fitted curve.
Arguments:
- dim: DIM values.
- milkrecordings: Milk recordings (kg).
- model: Model name.
- fitting: 'frequentist' or 'bayesian'.
- key: API key for MilkBot (Bayesian).
- parity: Parity for Bayesian fitting.
- breed: Breed for Bayesian fitting ('H' or 'J').
- continent: Prior source for Bayesian ('USA', 'EU', 'CHEN').
Returns:
float: Maximum predicted milk yield.
665def numeric_time_to_peak( 666 dim, 667 milkrecordings, 668 model, 669 fitting="frequentist", 670 key=None, 671 parity=3, 672 breed="H", 673 custom_priors=None, 674 milk_unit="kg", 675 continent="USA", 676) -> int: 677 """Compute time to peak using a numeric approach. 678 679 Fits the curve (frequentist or Bayesian), evaluates the predicted yields, 680 and returns the DIM corresponding to the maximum predicted yield. 681 682 Args: 683 dim: DIM values. 684 milkrecordings: Milk recordings (kg). 685 model: Model name. 686 fitting: 'frequentist' or 'bayesian'. 687 key: API key for MilkBot (Bayesian). 688 parity: Parity for Bayesian fitting. 689 breed: Breed for Bayesian fitting ('H' or 'J'). 690 custom_priors: provide your own priors for Bayesian fitting. 691 provide as dictionary using build_prior() function to set 692 the priors in the right format. Alternative use priors from 693 the literature provided by the string command 'CHEN' 694 milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting. 695 continent: Prior source for Bayesian ('USA', 'EU'). 696 697 698 Returns: 699 int: DIM at which the curve attains its maximum (1-indexed). 700 """ 701 # Fit the curve to get predicted milk yields 702 yields = fit_lactation_curve( 703 dim, 704 milkrecordings, 705 model, 706 fitting=fitting, 707 key=key, 708 parity=parity, 709 breed=breed, 710 custom_priors=custom_priors, 711 milk_unit=milk_unit, 712 continent=continent, 713 ) 714 # Find the index of the peak yield 715 peak_idx = np.argmax(yields) 716 # Return the corresponding DIM 717 return int(peak_idx + 1) # +1 because DIM starts at 1, not 0
Compute time to peak using a numeric approach.
Fits the curve (frequentist or Bayesian), evaluates the predicted yields, and returns the DIM corresponding to the maximum predicted yield.
Arguments:
- dim: DIM values.
- milkrecordings: Milk recordings (kg).
- model: Model name.
- fitting: 'frequentist' or 'bayesian'.
- key: API key for MilkBot (Bayesian).
- parity: Parity for Bayesian fitting.
- breed: Breed for Bayesian fitting ('H' or 'J').
- custom_priors: provide your own priors for Bayesian fitting. provide as dictionary using build_prior() function to set the priors in the right format. Alternative use priors from the literature provided by the string command 'CHEN'
- milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
- continent: Prior source for Bayesian ('USA', 'EU').
Returns:
int: DIM at which the curve attains its maximum (1-indexed).
813def persistency_fitted_curve( 814 dim, 815 milkrecordings, 816 model, 817 fitting="frequentist", 818 key=None, 819 parity=3, 820 breed="H", 821 custom_priors=None, 822 milk_unit="kg", 823 continent="USA", 824 lactation_length=305, 825) -> float: 826 """Persistency as the average slope after peak until end of lactation (numeric). 827 828 This is the default approach because symbolic derivation is not feasible for 829 all models. It computes: 830 `(yield_at_end - yield_at_peak) / (lactation_length - time_to_peak)` 831 832 Args: 833 dim: DIM values. 834 milkrecordings: Milk recordings (kg). 835 model: Model name. Options include 'milkbot' (Bayesian or frequentist), 836 'wood', 'wilmink', 'ali_schaeffer', 'fischer'. 837 fitting: 'frequentist' or 'bayesian'. 838 key: API key (only for Bayesian fitting). 839 parity: Parity of the cow; values above 3 treated as 3 (Bayesian). 840 breed: 'H' or 'J' (Bayesian). 841 custom_priors: provide your own priors for Bayesian fitting. 842 provide as dictionary using build_prior() function to 843 create the priors in the right format. Alternative use 844 priors from the literature provided by the string 845 command 'CHEN' 846 milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting. 847 continent: 'USA', 'EU', or 'CHEN' (Bayesian). 848 lactation_length (int | str): 305 (default), 'max' (use max DIM), or integer. 849 850 Returns: 851 float: Average slope after the peak until end of lactation. 852 """ 853 # calculate time to peak 854 t_peak = numeric_time_to_peak( 855 dim, 856 milkrecordings, 857 model, 858 fitting=fitting, 859 key=key, 860 parity=parity, 861 breed=breed, 862 custom_priors=custom_priors, 863 milk_unit=milk_unit, 864 continent=continent, 865 ) 866 867 # determine lactation length 868 if lactation_length == "max": 869 lactation_length = max(dim) 870 elif isinstance(lactation_length, int): 871 lactation_length = lactation_length 872 else: 873 lactation_length = 305 874 875 # calculate milk yield at peak 876 yields = fit_lactation_curve( 877 dim, 878 milkrecordings, 879 model, 880 fitting=fitting, 881 key=key, 882 parity=parity, 883 breed=breed, 884 custom_priors=custom_priors, 885 milk_unit=milk_unit, 886 continent=continent, 887 ) 888 peak_yield = yields[int(t_peak) - 1] # -1 to prevent index error 889 # calculate milk yield at end of lactation 890 end_yield = yields[int(lactation_length) - 1] # -1 to prevent index error 891 892 # calculate persistency 893 persistency = (end_yield - peak_yield) / (lactation_length - t_peak) 894 return persistency
Persistency as the average slope after peak until end of lactation (numeric).
This is the default approach because symbolic derivation is not feasible for
all models. It computes:
(yield_at_end - yield_at_peak) / (lactation_length - time_to_peak)
Arguments:
- dim: DIM values.
- milkrecordings: Milk recordings (kg).
- model: Model name. Options include 'milkbot' (Bayesian or frequentist), 'wood', 'wilmink', 'ali_schaeffer', 'fischer'.
- fitting: 'frequentist' or 'bayesian'.
- key: API key (only for Bayesian fitting).
- parity: Parity of the cow; values above 3 treated as 3 (Bayesian).
- breed: 'H' or 'J' (Bayesian).
- custom_priors: provide your own priors for Bayesian fitting. provide as dictionary using build_prior() function to create the priors in the right format. Alternative use priors from the literature provided by the string command 'CHEN'
- milk_unit: Unit of milk recordings ('kg' or 'lb') for Bayesian fitting.
- continent: 'USA', 'EU', or 'CHEN' (Bayesian).
- lactation_length (int | str): 305 (default), 'max' (use max DIM), or integer.
Returns:
float: Average slope after the peak until end of lactation.
801def persistency_milkbot(d) -> float: 802 """Persistency from the MilkBot model (Ehrlich, 2013): `Persistency = 0.693 / d`. 803 804 Args: 805 d (float): Parameter `d` of the MilkBot model. 806 807 Returns: 808 float: Persistency value from the MilkBot formula. 809 """ 810 return 0.693 / d
Persistency from the MilkBot model (Ehrlich, 2013): Persistency = 0.693 / d.
Arguments:
- d (float): Parameter
dof the MilkBot model.
Returns:
float: Persistency value from the MilkBot formula.
788def persistency_wood(b, c) -> float: 789 """Persistency from Wood et al. (1984): `Persistency = -(b + 1) * ln(c)`. 790 791 Args: 792 b (float): Parameter `b` of the Wood model. 793 c (float): Parameter `c` of the Wood model. 794 795 Returns: 796 float: Persistency value from the Wood formula. 797 """ 798 return float(-(b + 1) * ln(c))
Persistency from Wood et al. (1984): Persistency = -(b + 1) * ln(c).
Arguments:
- b (float): Parameter
bof the Wood model. - c (float): Parameter
cof the Wood model.
Returns:
float: Persistency value from the Wood formula.
183def pivot_milk_recordings_to_matrix(df: pd.DataFrame) -> np.ndarray: 184 """Convert long-format recordings to a fixed 305-day matrix. 185 186 Rows represent lactations (``TestId``) and columns represent days in milk 187 from 1 through 305. Missing observations are kept as ``NaN``. 188 189 Args: 190 df: Dataframe with ``TestId``, ``DaysInMilk``, and ``MilkingYield``. 191 192 Returns: 193 A NumPy array of shape ``(n_lactations, 305)``. 194 """ 195 # ensure sorting 196 df = df.sort_values(["TestId", "DaysInMilk"]) 197 198 # pivot to wide format 199 milk_recordings_pivot = df.pivot_table( 200 index="TestId", columns="DaysInMilk", values="MilkingYield" 201 ) 202 203 # enforce fixed 305-day grid alignment used by best-prediction 204 milk_recordings_pivot = milk_recordings_pivot.reindex(columns=range(1, 306)) 205 206 # convert to numpy matrix 207 Y = milk_recordings_pivot.to_numpy() 208 return Y
Convert long-format recordings to a fixed 305-day matrix.
Rows represent lactations (TestId) and columns represent days in milk
from 1 through 305. Missing observations are kept as NaN.
Arguments:
- df: Dataframe with
TestId,DaysInMilk, andMilkingYield.
Returns:
A NumPy array of shape
(n_lactations, 305).
393def preprocess_measured_data(lactation: pd.DataFrame, standard_lc: np.ndarray) -> pd.Series: 394 """Build a 305-day deviation vector for a single lactation. 395 396 For observed days, this computes ``MilkingYield - standard_lc[day]``. 397 The result is reindexed to days 1..305 with unobserved days filled as zero. 398 399 Args: 400 lactation: Single-lactation dataframe with ``DaysInMilk`` and 401 ``MilkingYield``. 402 standard_lc: Expected daily milk yield profile. 403 404 Returns: 405 A Series indexed by day 1..305 containing milk-yield deviations. 406 """ 407 408 # calculate the difference between the expected (population mean) and measured milk yield 409 410 # extract the expected milk yields for the measured DaysInMilk in the df 411 day_idx = lactation["DaysInMilk"].to_numpy(dtype=int) - 1 412 expected = np.asarray(standard_lc, dtype=float)[day_idx] 413 414 # Subtract 415 lactation["MilkDifference"] = lactation["MilkingYield"].to_numpy(dtype=float) - expected 416 417 # Create a Series of length 305 with missing values = 0 418 milk_difference = cast(pd.Series, lactation.set_index("DaysInMilk")["MilkDifference"]) 419 corrected_series = milk_difference.reindex(range(1, 306), fill_value=0) 420 421 return corrected_series
Build a 305-day deviation vector for a single lactation.
For observed days, this computes MilkingYield - standard_lc[day].
The result is reindexed to days 1..305 with unobserved days filled as zero.
Arguments:
- lactation: Single-lactation dataframe with
DaysInMilkandMilkingYield. - standard_lc: Expected daily milk yield profile.
Returns:
A Series indexed by day 1..305 containing milk-yield deviations.
88def test_interval_method( 89 df: pd.DataFrame, 90 days_in_milk_col: str | None = None, 91 milking_yield_col: str | None = None, 92 test_id_col: str | None = None, 93 default_test_id: int = 0, 94 max_dim: int = 305, 95) -> pd.DataFrame: 96 """Compute total lactation milk yield using the ICAR Test Interval Method. 97 98 The method applies: 99 - First test day milk yield from calving to the first test day, 100 - Trapezoidal integration between consecutive test days, 101 - Last test day milk yield from the last test day to DIM = max_dim (default = 305). 102 103 Args: 104 df (pd.DataFrame): Input DataFrame with at least DaysInMilk and 105 MilkingYield columns, plus an optional TestId column. Column names 106 can be provided explicitly or matched via known aliases. 107 days_in_milk_col (str | None): Optional column name override for 108 DaysInMilk. 109 milking_yield_col (str | None): Optional column name override for 110 MilkingYield. 111 test_id_col (str | None): Optional column name override for TestId. 112 default_test_id (int): Value used to create a default TestId column if 113 one is missing. 114 max_dim (int): Lactation length used to calculate cumulative 115 production. The default is 305 days. 116 Records with DIM > max_dim are excluded. 117 118 Returns: 119 pd.DataFrame: Two-column DataFrame with 120 - "TestId": identifier per lactation, 121 - "LactationMilkYield": computed total milk yield over the 122 specified window. 123 124 Raises: 125 ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found. 126 127 Notes: 128 - Records with DIM > max_dim are dropped before computation. 129 - At least two data points per TestId are required for trapezoidal integration; 130 otherwise the lactation is skipped. 131 """ 132 133 # Standardize columns and filter DIM <= max_dim 134 df = standardize_lactation_columns( 135 df, 136 days_in_milk_col=days_in_milk_col, 137 milking_yield_col=milking_yield_col, 138 test_id_col=test_id_col, 139 default_test_id=default_test_id, 140 max_dim=max_dim, 141 ) 142 143 result = [] 144 145 # Iterate over each lactation 146 for lactation in df["TestId"].unique(): 147 lactation_df = pd.DataFrame(df[df["TestId"] == lactation]) 148 149 # Sort by DaysInMilk ascending 150 lactation_df.sort_values(by="DaysInMilk", ascending=True, inplace=True) 151 152 if len(lactation_df) < 2: 153 print(f"Skipping TestId {lactation}: not enough data points for interpolation.") 154 continue 155 156 # Start and end points 157 start = lactation_df.iloc[0] 158 end = lactation_df.iloc[-1] 159 160 # Start contribution 161 MY0 = start["DaysInMilk"] * start["MilkingYield"] 162 163 # End contribution 164 MYend = (max_dim + 1 - end["DaysInMilk"]) * end["MilkingYield"] 165 166 # Intermediate trapezoidal contributions 167 lactation_df["width"] = lactation_df["DaysInMilk"].diff().shift(-1) 168 lactation_df["avg_yield"] = ( 169 lactation_df["MilkingYield"] + lactation_df["MilkingYield"].shift(-1) 170 ) / 2 171 lactation_df["trapezoid_area"] = lactation_df["width"] * lactation_df["avg_yield"] 172 173 total_intermediate = lactation_df["trapezoid_area"].sum() 174 175 total_yield = MY0 + total_intermediate + MYend 176 result.append((lactation, total_yield)) 177 178 return pd.DataFrame(result, columns=pd.Index(["TestId", "LactationMilkYield"]))
Compute total lactation milk yield using the ICAR Test Interval Method.
The method applies:
- First test day milk yield from calving to the first test day,
- Trapezoidal integration between consecutive test days,
- Last test day milk yield from the last test day to DIM = max_dim (default = 305).
Arguments:
- df (pd.DataFrame): Input DataFrame with at least DaysInMilk and MilkingYield columns, plus an optional TestId column. Column names can be provided explicitly or matched via known aliases.
- days_in_milk_col (str | None): Optional column name override for DaysInMilk.
- milking_yield_col (str | None): Optional column name override for MilkingYield.
- test_id_col (str | None): Optional column name override for TestId.
- default_test_id (int): Value used to create a default TestId column if one is missing.
- max_dim (int): Lactation length used to calculate cumulative production. The default is 305 days. Records with DIM > max_dim are excluded.
Returns:
pd.DataFrame: Two-column DataFrame with - "TestId": identifier per lactation, - "LactationMilkYield": computed total milk yield over the specified window.
Raises:
- ValueError: If required columns (DaysInMilk or MilkingYield) cannot be found.
Notes:
- Records with DIM > max_dim are dropped before computation.
- At least two data points per TestId are required for trapezoidal integration; otherwise the lactation is skipped.