From df55091db40e8b9bf893a5f79c32cd644a9d370e Mon Sep 17 00:00:00 2001 From: ss77995ss Date: Thu, 19 Dec 2024 00:50:54 +0800 Subject: [PATCH 1/2] feat(Statcast): Implement Runner Basestealing function --- example.py | 4 - .../enums/statcast_leaderboard.py | 10 ++- .../statcast/runner_basestealing.py | 84 +++++++++++++++++++ .../utils/statcast_leaderboard.py | 21 +++++ 4 files changed, 113 insertions(+), 6 deletions(-) create mode 100644 src/baseball_stats_python/statcast/runner_basestealing.py create mode 100644 src/baseball_stats_python/utils/statcast_leaderboard.py diff --git a/example.py b/example.py index a68abf9..f128071 100644 --- a/example.py +++ b/example.py @@ -7,7 +7,6 @@ ) from src.baseball_stats_python.enums.minor import MinorGameType from src.baseball_stats_python.enums.statcast import GameType, MlbTeam, Month -from src.baseball_stats_python.statcast.catcher_throwing import catcher_throwing def example(): @@ -36,6 +35,3 @@ def mlbam_id_example(): # example() # minor_example() # mlbam_id_example() - -df = catcher_throwing('669257', game_type=123) -print(df) diff --git a/src/baseball_stats_python/enums/statcast_leaderboard.py b/src/baseball_stats_python/enums/statcast_leaderboard.py index 23765d7..a612593 100644 --- a/src/baseball_stats_python/enums/statcast_leaderboard.py +++ b/src/baseball_stats_python/enums/statcast_leaderboard.py @@ -2,6 +2,12 @@ class GameType(EnumBase): - REGULAR_SEASON = 'R' - PLAYOFFS = 'PO' + REGULAR_SEASON = 'Regular' + PLAYOFFS = 'Playoff' ALL = 'All' + + +class Hand(EnumBase): + RIGHT = 'R' + LEFT = 'L' + ALL = 'all' diff --git a/src/baseball_stats_python/statcast/runner_basestealing.py b/src/baseball_stats_python/statcast/runner_basestealing.py new file mode 100644 index 0000000..645603d --- /dev/null +++ b/src/baseball_stats_python/statcast/runner_basestealing.py @@ -0,0 +1,84 @@ +import pandas as pd +import requests + +from ..constants import DEFAULT_SEASON +from ..enums.statcast_leaderboard import GameType, Hand +from ..utils.statcast_leaderboard import get_hand_param_str, get_prior_pk_param_str + +session = requests.Session() + +API_URL = ( + 'https://baseballsavant.mlb.com/leaderboard/services/basestealing-running-game' +) + + +def get_run_value(df: pd.DataFrame) -> float: + if df['is_runner_cs']: + return -0.45 + if df['is_runner_sb']: + return 0.2 + if df['is_runner_pk']: + return -0.45 + if df['is_runner_bk']: + return 0.2 + if df['is_runner_fb']: + return 0.2 + + raise ValueError(f'Invalid DataFrame: {df}') + + +def runner_basestealing( + runner_id: str, + game_type: str | GameType = GameType.REGULAR_SEASON, + season: str = str(DEFAULT_SEASON), + pitch_hand: str | Hand = Hand.ALL, + prior_pk: str = 'all', +) -> pd.DataFrame: + """ + Get basestealing data from each stolen base opportunity for a specific runner. + ref: https://baseballsavant.mlb.com/leaderboard/basestealing-run-value + + Args: + runner_id (str): The MLBAM ID of the runner. (Required) + game_type (str | GameType): The game type to filter by. Default is "Regular". + season (str): The season to filter by. The earliest season available is 2016. + pitch_hand (str | Hand): The pitch hand to filter by. Default is "all". + prior_pk (str): The number of prior pick-off attempts from pitcher before the stolen base opportunity. Default is "all". + Can be "all", "1", "2", or "3". "3" is include all prior pick-off attempts over 3. + Returns: + pd.DataFrame: A DataFrame containing the basestealing data. + """ + + if not runner_id: + raise ValueError('runner_id is required') + + if not isinstance(game_type, str) and not isinstance(game_type, GameType): + raise ValueError(f'Invalid type for game_type: {type(game_type)}') + + if not GameType.has_value(game_type): + raise ValueError(f'Invalid game type: {game_type}') + + if int(season) < 2016: + raise ValueError( + f'Invalid season: {season}, The earliest season available is 2016' + ) + + params = { + 'game_type': game_type, + 'season': season, + 'n': 0, + 'pitch_hand': get_hand_param_str(pitch_hand), + 'prior_pk': get_prior_pk_param_str(prior_pk), + } + + response = session.get(f'{API_URL}/{runner_id}', params=params) + + if response.status_code == 200: + result = response.json() + df = pd.DataFrame(result['data']) + df['run_value'] = df.apply(get_run_value, axis=1) + return df + else: + raise Exception( + f'Failed to fetch data: {response.status_code} - {response.text}' + ) diff --git a/src/baseball_stats_python/utils/statcast_leaderboard.py b/src/baseball_stats_python/utils/statcast_leaderboard.py new file mode 100644 index 0000000..437c60e --- /dev/null +++ b/src/baseball_stats_python/utils/statcast_leaderboard.py @@ -0,0 +1,21 @@ +from ..enums.statcast_leaderboard import Hand + + +def get_hand_param_str(hand: str | Hand) -> str: + if not isinstance(hand, str) and not isinstance(hand, Hand): + raise ValueError(f'Invalid type for hand: {type(hand)}') + + if not Hand.has_value(hand): + raise ValueError(f'Invalid hand: {hand}') + + return f'{hand}' + + +def get_prior_pk_param_str(prior_pk: str) -> str: + if not isinstance(prior_pk, str): + raise ValueError(f'Invalid type for prior_pk: {type(prior_pk)}') + + if prior_pk not in ['all', '1', '2', '3']: + raise ValueError(f'Invalid prior_pk: {prior_pk}') + + return prior_pk From d2dfdfacff664db700fe9f415e3f06dfa1347d04 Mon Sep 17 00:00:00 2001 From: ss77995ss Date: Fri, 27 Dec 2024 07:03:24 +0800 Subject: [PATCH 2/2] doc(Statcast): Add documentation for runner_basestealing function --- docs/runner_basestealing.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 docs/runner_basestealing.md diff --git a/docs/runner_basestealing.md b/docs/runner_basestealing.md new file mode 100644 index 0000000..18f15de --- /dev/null +++ b/docs/runner_basestealing.md @@ -0,0 +1,34 @@ +# Statcast Runner Basestealing + +## `runner_basestealing` + +Function to get baserunner stealing base data from each stolen base attempt. Attempts include successful stolen bases (`SB`), advances via balk (`BK`), caught stealing (`CS`), and pickoffs (`PK`). Also pickoff attempts over three times and not successful are included (`FB`). Based on Baseball Savant's [Runner Basestealing](https://baseballsavant.mlb.com/leaderboard/basestealing-run-value). + +**Examples** + +```python +from baseball_stats_python import runner_basestealing + +# Get Shohei Ohtani's runner basestealing data +runner_basestealing('660271') + +# Get Shohei Ohtani's runner basestealing data in 2023 +runner_basestealing('660271', season='2023') + +# Get Shohei Ohtani's catcher throwing data in playoffs +catcher_throwing('660271', game_type=GameType.PLAYOFFS) +``` + +**Arguments** + +| Argument | Data Type | Description | +| -------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| runner_id (Required) | `str` | The MLBAM ID of the catcher. | +| game_type | `str` or `GameType` | The game type to filter by. Can be `R` for regular season, `PO` for playoffs, or `All` for all games. Check enum [GameType](../enums/statcast_leaderboard.py) | +| season | `str` | The season to filter by. The earliest season available is 2016. | +| pitch_hand | `str` or `Hand` | The pitch hand to filter by. Default is "all". Check enum [Hand](../enums/statcast_leaderboard.py) | +| prior_pk | `str` | The number of prior pick-off attempts from pitcher before the stolen base opportunity. Default is "all". Can be "all", "1", "2", or "3". "3" is include all prior pick-off attempts over 3. | + +**Return** + +A DataFrame with columns that related to the [Runner Basestealing](https://baseballsavant.mlb.com/leaderboard/basestealing-run-value) leaderboard. The DataFrame will represent each stolen base attempt for a specific runner which contains data like `r_primary_lead`, `r_secondary_lead`, `run_value`, `r_sec_minus_prim_lead`, `runner_moved_cd`, etc.