| import urllib.request |
| from urllib.error import HTTPError |
| import requests |
| from bs4 import BeautifulSoup |
| import os |
| import json |
| import streamlit as st |
| import pandas as pd |
| |
| from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode |
|
|
|
|
| pos_dict = {1 :'P', |
| 2 :'C', |
| 3 :'1B', |
| 4 :'2B', |
| 5 :'3B', |
| 6 :'SS', |
| 7 :'LF', |
| 8 :'CF', |
| 9 :'RF', |
| 10 :'DH'} |
|
|
| |
| st.set_page_config(layout="wide") |
|
|
| |
| st.markdown( |
| """ |
| <style> |
| .main-container { |
| max-width: 1250px; |
| margin: 0 auto; |
| } |
| </style> |
| """, |
| unsafe_allow_html=True |
| ) |
|
|
| |
| st.markdown('<div class="main-container">', unsafe_allow_html=True) |
|
|
| |
| markdown_text = """ |
| ## Catch Probability Lookup Tool |
| |
| ##### By: Thomas Nestico ([@TJStats](https://x.com/TJStats)) |
| ##### Data: [MLB](https://baseballsavant.mlb.com/) |
| |
| #### About |
| This Streamlit app retrieves catch probability data for a selected fielder from [Baseball Savant](https://baseballsavant.mlb.com/leaderboard/catch_probability). |
| The app displays the fielder's data in a table and allows the user to select a |
| row to view the corresponding video. |
| |
| Catch probability data is only available for outfielders. |
| |
| #### What is Catch Probability? |
| *From MLB:* |
| |
| **Catch Probability** expresses the likelihood for a ball to be caught by an outfielder based on opportunity time, |
| distance needed, and direction. “Opportunity time” starts when the ball is released by the pitcher, |
| and “distance needed” is the shortest distance needed to make the catch. |
| [Learn more about how direction is accounted for here](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408). |
| [Read more about the details of how Catch Probability works here](https://www.mlb.com/news/statcast-introduces-catch-probability-for-2017-c217802340). |
| |
| """ |
|
|
| markdown_text_end = ''' |
| *Columns:* |
| - **Batter Name**: Name of the batter |
| - **Pitcher Name**: Name of the pitcher |
| - **Fielder Name**: Name of the fielder |
| - **Position**: Position of the fielder |
| - **Event**: Type of play |
| - **Out**: Was the ball caught? |
| - **Wall**: [Did the fielder catch the ball at the wall?](https://www.mlb.com/news/catch-probability-updated-to-account-for-walls-c269814542) |
| - **Back**: [Did the fielder catch the ball while moving back?](https://www.mlb.com/news/catch-probability-updated-to-include-direction-c232532408) |
| - **Stars**: [Number of stars assigned to the play](https://baseballsavant.mlb.com/leaderboard/catch_probability) |
| - **Distance**: Distance required to make the catch in feet |
| - **Hang Time**: Hang time of the ball in seconds |
| - **Catch Rate**: Probability of the catch being made |
| ''' |
|
|
| |
| st.markdown(markdown_text) |
|
|
|
|
| |
| import datasets |
| from datasets import load_dataset |
| |
| season = 2025 |
| level = 'mlb' |
| |
| |
| ds = load_dataset("TJStatsApps/mlb_data", data_files=f"data/{level}_pitch_data_{season}.parquet") |
| dataset = ds["train"].to_pandas() |
| |
| df = dataset.drop_duplicates(subset=['play_id'],keep='last') |
| df['batter_name_team'] = df['batter_name'] + ' - ' + df['batter_team'] |
|
|
| |
| fielders = df.drop_duplicates(['batter_id']).sort_values(['batter_name']).set_index('batter_id')['batter_name_team'].to_dict() |
| fielders_reversed = {v: k for k, v in fielders.items()} |
|
|
| |
| st.write("#### Select Fielder") |
| selected_fielder = st.selectbox('',list(fielders_reversed.keys())) |
|
|
| |
| fielder_select = fielders_reversed[selected_fielder] |
|
|
| |
| url = f"https://baseballsavant.mlb.com/player-services/range?playerId={fielder_select}&season={season}&playerType=fielder" |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
|
|
| data = requests.get(url).json() |
| df_catch = pd.DataFrame(data) |
|
|
| if df_catch.empty: |
| st.write("No data available for the selected fielder.") |
| st.stop() |
|
|
| df_catch['hang_time'] = df_catch['hang_time'].astype(float).round(1) |
| df_catch['distance'] = df_catch['distance'].astype(float).round(1) |
|
|
| df_merge = df.merge(df_catch, on='play_id', how='right', suffixes=('', '_fielder')).reset_index(drop=True) |
|
|
| |
| |
|
|
| df_merge['pos'] = df_merge['pos'].astype(int) |
| df_merge['Position'] = df_merge['pos'].map(pos_dict) |
|
|
|
|
| df_merge = df_merge[df_merge['batter_id'] != df_merge['player_id']] |
|
|
|
|
|
|
|
|
| df_merge.sort_values(by='game_date',inplace=True) |
| column_names = ['game_date','batter_name', 'pitcher_name', 'name_display_first_last', 'Position','event', 'out', 'wall', 'back', 'stars', 'distance', 'hang_time', 'catch_rate'] |
| column_names_display = ['Game Date','Batter Name', 'Pitcher Name', 'Fielder Name', 'Position','Event', 'Out', 'Wall', 'Back', 'Stars', 'Distance', 'Hang Time', 'Catch Rate'] |
|
|
|
|
|
|
|
|
|
|
| |
| with st.container(): |
| st.write("#### Fielder Data") |
| |
| gb = GridOptionsBuilder.from_dataframe(df_merge[column_names]) |
| |
| for col, display_name in zip(column_names, column_names_display): |
| gb.configure_column(col, headerName=display_name) |
|
|
|
|
| gb.configure_selection('single', use_checkbox=True) |
| grid_options = gb.build() |
|
|
| |
| grid_response = AgGrid( |
| df_merge[column_names], |
| gridOptions=grid_options, |
| update_mode=GridUpdateMode.SELECTION_CHANGED, |
| height=300, |
| allow_unsafe_jscode=True, |
| width="100%", |
| ) |
|
|
| |
|
|
|
|
| try: |
| |
| selected_row_index = int(grid_response['selected_rows'].index.values[0]) |
| play_link = f'https://baseballsavant.mlb.com/sporty-videos?playId={df_merge["play_id"].values[selected_row_index]}' |
| |
| |
| |
| |
| |
| st.markdown( f'#### [Link to Video]({play_link})') |
|
|
| |
| |
| except AttributeError: |
| st.write("#### Select Row to Get Video Link") |
|
|
| st.markdown(markdown_text_end) |
| st.markdown('</div>', unsafe_allow_html=True) |