import pandas as pd
from scipy.stats import shapiro

from datetime import datetime, date

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import plotly.io as pio

from IPython.display import HTML, Javascript, display

from pypalettes import load_cmap
import seaborn as sns
import cufflinks as cf
import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.io as pio
from plotly.offline import init_notebook_mode

# Read from CSV
df = pd.read_csv('nba-stats-data.csv', low_memory=False)
print(df)

        assists  blocks comment  defReb   fga  fgm   fgp  fta  ftm    ftp  \
0           3.0     1.0     NaN     7.0   6.0  1.0  16.7  4.0  3.0   75.0   
1           3.0     1.0     NaN     7.0   6.0  1.0  16.7  4.0  3.0   75.0   
2           3.0     1.0     NaN     7.0   6.0  1.0  16.7  4.0  3.0   75.0   
3           0.0     0.0     NaN     5.0   8.0  5.0  62.5  0.0  0.0    0.0   
4           1.0     0.0     NaN     6.0  10.0  5.0  50.0  4.0  4.0  100.0   
...         ...     ...     ...     ...   ...  ...   ...  ...  ...    ...   
394809      1.0     0.0     NaN     2.0   7.0  0.0   0.0  1.0  1.0  100.0   
394810      0.0     1.0     NaN     1.0  10.0  1.0  10.0  3.0  3.0  100.0   
394811      2.0     1.0     NaN     6.0   5.0  3.0  60.0  0.0  0.0    0.0   
394812      5.0     0.0     NaN     3.0  12.0  6.0  50.0  5.0  4.0   80.0   
394813      0.0     0.0     NaN     0.0   1.0  0.0   0.0  0.0  0.0    0.0   

        ...         DOB   age  feet  meters  position active  jersey_number  \
0       ...  1985-02-10  40.0   6"7    2.01         F   True            8.0   
1       ...  1985-02-10  40.0   6"7    2.01         F   True            8.0   
2       ...  1985-02-10  40.0   6"7    2.01         F   True            8.0   
3       ...  1989-07-01  35.0   6"4    1.93       G-F   True           24.0   
4       ...  1986-06-03  38.0   6"9    2.06       C-F   True           42.0   
...     ...         ...   ...   ...     ...       ...    ...            ...   
394809  ...         NaN   NaN   NaN     NaN       NaN    NaN            NaN   
394810  ...         NaN   NaN   NaN     NaN       NaN    NaN            NaN   
394811  ...         NaN   NaN   NaN     NaN       NaN    NaN            NaN   
394812  ...         NaN   NaN   NaN     NaN       NaN    NaN            NaN   
394813  ...         NaN   NaN   NaN     NaN       NaN    NaN            NaN   

        years_pro  rookie_year  pounds  
0            15.0       2006.0   257.0  
1            15.0       2006.0   257.0  
2            15.0       2006.0   257.0  
3             9.0       2012.0   195.0  
4            14.0       2007.0   240.0  
...           ...          ...     ...  
394809        NaN          NaN     NaN  
394810        NaN          NaN     NaN  
394811        NaN          NaN     NaN  
394812        NaN          NaN     NaN  
394813        NaN          NaN     NaN  

[394814 rows x 45 columns]

# set season (year) as main DF index to allow for time series analysis of player data
df = df.reset_index().set_index('season')
df.index = pd.to_datetime(df.index, format='%Y').to_period('Y')
df = df.sort_index(axis=0)

fontsize=22

# convenience function for time series plots
def create_ts_plot(data, title, ylabel, color="navy", rhs_vals=None):
    # create a time series plot
    pd.options.plotting.backend = "matplotlib"
    ax = data.plot(figsize=(9, 4.3), legend=False, lw=2, zorder=3, color=color)
    plt.title(title, fontsize=fontsize-1)
    plt.ylabel(ylabel)
    plt.xlabel(None)
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)
    return ax

# convenience function to aggregate top players by a given statistic or feature
def summary_top_players(grp, stat,  sort_by = 'mean', results = 10):
    df[stat] = pd.to_numeric(df[stat], errors='coerce')# handle nans and plusMinus values which are char in format ''+/-[0-9]*'
    df_aggs = round(df.groupby([grp])[stat].agg(['mean', 'sum']), 2)
    return df_aggs.sort_values(by=sort_by, ascending=False).head(results)

# convenience method to highlight data frame cell lime green
def highlighter(cell_value, thresh_lower = 0.7, thresh_upper = 0.99):
    if cell_value < thresh_upper and cell_value >= thresh_lower:
        return "background-color: #32CD32"
    elif cell_value == 1:
        return "background-color: yellow"

df['pounds'] = pd.to_numeric(df['pounds'])
print("Average player weight %s lbs." % round(df['pounds'].mean(skipna = True), 2))

Average player weight 219.89 lbs.

df['meters'] = pd.to_numeric(df['meters'])
round(df['meters'].mean(skipna = True), 2)
print("Average player height %s meters" % round(df['meters'].mean(skipna = True), 2))

Average player height 1.99 meters

df[['pounds', 'meters']].corr()

df.plot('pounds', 'meters', 'scatter')

<Axes: xlabel='pounds', ylabel='meters'>

numeric_cols = ['assists', 'blocks', 'defReb', 'fga', 'fgm', 'fgp', 'fta', 'ftm', 'ftp', 'offReb', 'pFouls', 
                'points', 'steals', 'totReb', 'tpa', 'tpm', 'tpp', 'turnovers', 'age', 'meters', 'min', 'plusMinus',
                'pounds', 'years_pro', 'rookie_year', 'jersey_number']

df_corr_c = df[numeric_cols].corr()

df_corr_c.style.map(highlighter)

df_corr_c = df[numeric_cols].corr().abs().unstack().sort_values(ascending=False)
df_corr_c[(df_corr_c < 1) & (df_corr_c >= 0.7)]# exclude below 74% correlation and 100% for matching cols in corr matrix

points       fgm            0.968215
fgm          points         0.968215
fta          ftm            0.953071
ftm          fta            0.953071
defReb       totReb         0.940426
totReb       defReb         0.940426
rookie_year  age            0.900022
age          rookie_year    0.900022
points       fga            0.897463
fga          points         0.897463
fgm          fga            0.897029
fga          fgm            0.897029
tpm          tpa            0.824836
tpa          tpm            0.824836
meters       pounds         0.801738
pounds       meters         0.801738
min          fga            0.794232
fga          min            0.794232
min          points         0.744084
points       min            0.744084
min          fgm            0.728871
fgm          min            0.728871
tpa          fga            0.708842
fga          tpa            0.708842
tpm          tpp            0.705648
tpp          tpm            0.705648
ftp          ftm            0.705585
ftm          ftp            0.705585
offReb       totReb         0.704013
totReb       offReb         0.704013
dtype: float64

df_weight = df.copy()
df_weight.index = df_weight.index.to_timestamp()

df_weight = df_weight['pounds'].resample("1YE").mean()
create_ts_plot(df_weight, "Average Weight by Year", "Weight in Pounds", "darkgreen");

avg_weight_2015 = round(df[df.index == '2015']['pounds'].mean(), 2)
print("Average weight for 2015: %s pounds" % avg_weight_2015)

Average weight for 2015: 222.31 pounds

avg_weight_2024 = round(df[df.index == '2024']['pounds'].mean(), 2)
print("Average weight for 2024: %s pounds" % avg_weight_2024)

Average weight for 2024: 217.21 pounds

print("Difference in weight from 2015 to 2024: %s lbs" % (round(avg_weight_2015 - avg_weight_2024, 2)))

Difference in weight from 2015 to 2024: 5.1 lbs

df[['player_name', 'pounds', 'meters', 'feet']].reset_index(drop=True).sort_values(by=['pounds'], ascending=False).drop_duplicates()[0:5]

df[['player_name', 'pounds', 'meters', 'feet']].reset_index(drop=True).sort_values(by=['pounds'], ascending=True).drop_duplicates()[0:5]

print("Average height for players in 2017: %s meters" % round(df[df.index == '2017']['meters'].mean(), 3))

Average height for players in 2017: 1.998 meters

print("Average height for players in 2021: %s meters" % round(df[df.index == '2021']['meters'].mean(), 3))

Average height for players in 2021: 1.99 meters

df_height = df.copy()
df_height.index = df_height.index.to_timestamp()

df_height = df_height['meters'].resample("1YE").mean()
create_ts_plot(df_height, "Average Height by Year", 
               "Height in Meters", "violet");

# create histogram visual
mins_played_summary = summary_top_players('player_name', 'min', results = 800)['sum']
h = mins_played_summary.hist(backend='plotly', labels=dict(index='Player Count', value='Minutes Played', 
                                                           variable='Total Minutes Played'))
h.layout.yaxis.title.text = 'Player Count'
h

test_normality = shapiro(mins_played_summary)

print(test_normality)

ShapiroResult(statistic=0.8014081288597795, pvalue=3.564594976648819e-30)

is_not_normal_dist = (test_normality.pvalue < 0.05) and (test_normality.statistic > 0.8)
print("Can we reject the null hypothesis and say this is not a normal distribution (bell shaped): %s" % is_not_normal_dist)

Can we reject the null hypothesis and say this is not a normal distribution (bell shaped): True

df_mins = summary_top_players('player_name', 'min', sort_by = 'sum', results = 20)
df_mins[df_mins['sum'] > 7500]
df_mins

summary_top_players('player_name', 'points', sort_by = 'sum', results=5)

summary_top_players('player_name', 'points', results=5)

summary_top_players('player_name', 'assists', sort_by = 'sum', results=5)

summary_top_players('player_name', 'assists', results=5)

summary_top_players('player_name', 'fgp', sort_by = 'sum', results=5)

summary_top_players('player_name', 'ftp', sort_by = 'sum', results=5)# free throw percentage

summary_top_players('player_name', 'tpp', sort_by = 'sum', results=6)# three-point percentage

summary_top_players('player_name', 'offReb', sort_by = 'sum', results=5)# offensive rebounds

summary_top_players('player_name', 'defReb', sort_by = 'sum', results=5)# defensive rebounds

summary_top_players('player_name', 'totReb', results=5)# Total rebounds by average

summary_top_players('player_name', 'totReb', sort_by = 'sum', results=5)# Total rebounds by total/sum

summary_top_players('player_name', 'blocks', sort_by = 'sum', results=5)# blocked shots

summary_top_players('player_name', 'blocks', results=6)# blocked shots by average

summary_top_players('player_name', 'steals', sort_by = 'sum', results=5)

summary_top_players('player_name', 'fgm', sort_by = 'sum', results=5)

summary_top_players('player_name', 'fgm', results=6)

summary_top_players('player_name', 'fga', results=5)

summary_top_players('player_name', 'fga', sort_by='sum', results=5)

df_field_goals_made = summary_top_players('player_name', 'fgm')
df_field_goals_made = df_field_goals_made[df_field_goals_made['sum'] > 5000]
df_field_goals_made

summary_top_players('player_name', 'plusMinus', sort_by = 'sum')

df_country_grp = df.loc[~((df['min'] == 0) | (df['min'].isna()))]

country_grp = df_country_grp.groupby('country')['min'].agg({'sum'}).sort_values('sum', ascending=False)
country_grp

print("The number of countries with representation in the NBA over the last 10 years is: %s" % len(country_grp))# Count of countries represented

The number of countries with representation in the NBA over the last 10 years is: 36

# remove USA (heavily weighted towards players from USA)
hbar = country_grp[1:].plot(backend='plotly', kind='bar', labels = dict(variable='Minutes Played', 
                                                                        value='Total Minutes Played', index='Country'))
hbar.layout.yaxis.title.text = 'Country'
hbar

df[(df['country'] == 'Mali') & (df['min'] > 0)][['player_name', 'min', 'game_id']]# data verification of minutes played for Mali

print("The number of unique NBA players listed from 2015 - 2024 seasons is: %s" % len(df['player_name'].unique()))# number of unique NBA players listed from 2015 - 2024

The number of unique NBA players listed from 2015 - 2024 seasons is: 2044

df[df['team_name'].isin({'Boston Celtics', 'Miami Heat', 'Milwaukee Bucks'})].groupby('team_name')['plusMinus'].plot(
    x=df.index.year, legend=True, figsize=(20, 10), use_index=False, title="Plus Minus By Team", 
    fontsize=fontsize, zorder=3)

team_name
Boston Celtics     Axes(0.125,0.11;0.775x0.77)
Miami Heat         Axes(0.125,0.11;0.775x0.77)
Milwaukee Bucks    Axes(0.125,0.11;0.775x0.77)
Name: plusMinus, dtype: object

df[df['player_name'].isin({'LeBron James', 'Stephen Curry'})].groupby('player_name')['plusMinus'].plot(
    x=df.index.year, legend=True, figsize=(20, 10), use_index=False, title="Plus Minus By Season", 
    fontsize=fontsize, zorder=3)

player_name
LeBron James     Axes(0.125,0.11;0.775x0.77)
Stephen Curry    Axes(0.125,0.11;0.775x0.77)
Name: plusMinus, dtype: object

df_pts_per_yr = df.copy()
df_pts_per_yr.index = df_pts_per_yr.index.to_timestamp()

df_pts_per_yr = df_pts_per_yr['points'].resample("1YE").max()
ax = create_ts_plot(df_pts_per_yr, "Points Scored by Season High", "Points Scored", "darkred")

df_assts_per_yr = df.copy()
df_assts_per_yr.index = df_assts_per_yr.index.to_timestamp()

df_assts_per_yr = df_assts_per_yr['assists'].resample("1YE").max()
ax = create_ts_plot(df_assts_per_yr, "Assists Made by Season High", "Assists Maximum")

df[['player_name', 'assists']].sort_values(by='assists', ascending=False)[0:9]

df_pts_high = df.loc[~((df['points'] == 0) | (df['points'].isna()))]
  
sns.set_theme(style="darkgrid")
palette = load_cmap("pastel").hex

plt.figure(figsize=(10, 5))
sns.boxplot(data=df_pts_high, x=df_pts_high.index, y='points', palette=palette, hue='season')
plt.title("Points Scored Distribution by Season", fontsize=fontsize-1)
plt.xlabel("Season")
plt.ylabel("Points Scored");
plt.ylim(-5, 75);

df[['player_name', 'points']].sort_values(by='points', ascending=False)[0:6]# Highest single game scorers in the past 10 years

round(df[['points', 'assists']].describe(), 2)

df_tpm_per_yr = df.copy()
df_tpm_per_yr.index = df_tpm_per_yr.index.to_timestamp()
# df_tpm_per_yr.index = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in df_tpm_per_yr.index]

df_tpm_per_yr = df_tpm_per_yr['tpm'].resample("1YE").max()
ax = create_ts_plot(df_tpm_per_yr, "Three Pointers Made by Season High", "Count", "royalblue")

df[['player_name', 'tpm']].sort_values(by='tpm', ascending=False)[0:6]

cols_pos = ['assists', 'blocks', 'steals', 'fgm', 'fgp', 'ftm', 'ftp', 'totReb', 'tpm', 'tpp', 'points', 'min', 'plusMinus']
cols_neg = ['pFouls', 'turnovers', 'min']


def calc_productivity_matrix(df_temp, cols, min_thresh = 0.75):
    # group by player and get mean / averages for 2015-2023 (9 years)
    df_prod = df_temp.groupby("player_name")[cols].mean()

    # min-max normalisation of data frame
    df_norm_prod = (df_prod - df_prod.min()) / (df_prod.max() - df_prod.min())

    # drop records with NA values
    df_prod_heatmap = round(df_norm_prod.dropna(), 2)
    
    # filter out players with less playing experience
    if 'min' in df_prod_heatmap.columns:
        df_prod_heatmap = df_prod_heatmap[df_prod_heatmap['min'] > min_thresh]

    # calculate row sum as a sort of productivity indicator (can be based on positive or negative contributions)
    df_prod_heatmap['player_sum'] = df_prod_heatmap.sum(axis=1)

    # sort and find top 15 players
    df_prod_heatmap = df_prod_heatmap.sort_values('player_sum', ascending=False)[0:24]
    return df_prod_heatmap[cols]

positive_prod = calc_productivity_matrix(df, cols_pos)
positive_prod

negative_prod = calc_productivity_matrix(df, cols_neg)
negative_prod

sns.heatmap(positive_prod, annot=True)

<Axes: ylabel='player_name'>

sns.heatmap(negative_prod, annot=True)

<Axes: ylabel='player_name'>

players_to_exclude = positive_prod.index.intersection(negative_prod.index)
players_to_exclude

Index(['James Harden', 'Luka Doncic', 'Kevin Durant', 'Stephen Curry',
       'Joel Embiid', 'LeBron James', 'Giannis Antetokounmpo',
       'Damian Lillard', 'Victor Wembanyama', 'Trae Young', 'Nikola Jokic',
       'Paul George', 'Karl-Anthony Towns', 'Donovan Mitchell',
       'Anthony Edwards', 'LaMelo Ball', 'Devin Booker'],
      dtype='object', name='player_name')

df_team_pos_players = positive_prod[~positive_prod.index.isin(players_to_exclude)]
df_team_pos_players

df_elite_players = df[df['player_name'].isin(df_team_pos_players.index)][['player_name', 'team_name', 'active', 'years_pro', 'affiliation', 'college', 'country', 'age', 'rookie_year']].reset_index(drop=True).drop_duplicates().sort_values('player_name')
df_elite_players.reset_index().sort_values(by=['player_name', 'index'], ascending=[True, False])

df_elite_players.sort_index(axis=0, ascending=False).groupby('player_name').first()

df_team_pos_players.style.background_gradient(cmap='Blues')

a4_dims= [11.7, 8.27]
fig, ax = plt.subplots(figsize=a4_dims)
df_team_pos_players = df_team_pos_players.rename(columns={'assists': 'Assists',
                                                          'blocks': 'Blocks',
                                                          'steals': 'Steals',
                                                          'fgm' : 'Field Goals Made',
                                                          'fgp' : 'Field Goal Percentage',
                                                          'ftm' : 'Free Throws Made',
                                                          'ftp' : 'Free Throw Percentage',
                                                          'totReb' : 'Total Rebounds',
                                                          'tpm' : 'Three Pointers Made',
                                                          'tpp' : 'Three Pointer Percentage',
                                                          'points': 'Points',
                                                          'min': 'Minutes Played',
                                                          'plusMinus': 'Plus Minus Score'})
ax = sns.lineplot(ax=ax, data=df_team_pos_players.T, markers=True, dashes=False)
sns.move_legend(ax, "upper left", bbox_to_anchor=(1,1), fontsize=19)
plt.xticks(rotation = "vertical", fontsize=19)
plt.yticks(fontsize=19)

(array([0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2]),
 [Text(0, 0.0, '0.0'),
  Text(0, 0.2, '0.2'),
  Text(0, 0.4, '0.4'),
  Text(0, 0.6000000000000001, '0.6'),
  Text(0, 0.8, '0.8'),
  Text(0, 1.0, '1.0'),
  Text(0, 1.2000000000000002, '1.2')])

df_matrix_70 = calc_productivity_matrix(df, cols_pos, min_thresh=0.7)# lower minustes played threshold to pull in less experienced pros and rookies that are performing well
df_victor_ad = df_matrix_70[df_matrix_70.index.isin(['Victor Wembanyama', 'Anthony Davis'])]
a4_dims= [11.7, 8.27]
fig, ax = plt.subplots(figsize=a4_dims)
df_victor_ad = df_victor_ad.rename(columns={'assists': 'Assists',
                                                          'blocks': 'Blocks',
                                                          'steals': 'Steals',
                                                          'fgm' : 'Field Goals Made',
                                                          'fgp' : 'Field Goal Percentage',
                                                          'ftm' : 'Free Throws Made',
                                                          'ftp' : 'Free Throw Percentage',
                                                          'totReb' : 'Total Rebounds',
                                                          'tpm' : 'Three Pointers Made',
                                                          'tpp' : 'Three Pointer Percentage',
                                                          'points': 'Points',
                                                          'min': 'Minutes Played',
                                                          'plusMinus': 'Plus Minus Score'})
ax = sns.lineplot(ax=ax, data=df_victor_ad.T, markers=True, dashes=False)
sns.move_legend(ax, "upper left", bbox_to_anchor=(1,1), fontsize=19)
plt.xticks(rotation = "vertical", fontsize=19)
plt.yticks(fontsize=19)

(array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
 [Text(0, 0.1, '0.1'),
  Text(0, 0.2, '0.2'),
  Text(0, 0.30000000000000004, '0.3'),
  Text(0, 0.4, '0.4'),
  Text(0, 0.5, '0.5'),
  Text(0, 0.6, '0.6'),
  Text(0, 0.7000000000000001, '0.7'),
  Text(0, 0.8, '0.8'),
  Text(0, 0.9, '0.9'),
  Text(0, 1.0, '1.0')])

plt.pcolor(df_team_pos_players)

<matplotlib.collections.PolyQuadMesh at 0x14a33b2f0>

sns.heatmap(df_team_pos_players, annot=True)

<Axes: ylabel='player_name'>

stat_cols = ['Assists', 'Blocks', 'Steals', 'Field Goals Made', 'Field Goal Percentage', 'Free Throws Made', 
             'Free Throw Percentage', 'Total Rebounds', 'Three Pointers Made', 'Three Pointer Percentage', 'Points',
             'Minutes Played', 'Plus Minus Score']
colours = ["limegreen", "red", "navy", "lightblue", "darkorange", "pink", "lightgreen", "orange", "darkgreen",
           "purple", "blue", "darkred", "yellow"]

stats_colours = dict(zip(stat_cols, colours))
stats_colours

{'Assists': 'limegreen',
 'Blocks': 'red',
 'Steals': 'navy',
 'Field Goals Made': 'lightblue',
 'Field Goal Percentage': 'darkorange',
 'Free Throws Made': 'pink',
 'Free Throw Percentage': 'lightgreen',
 'Total Rebounds': 'orange',
 'Three Pointers Made': 'darkgreen',
 'Three Pointer Percentage': 'purple',
 'Points': 'blue',
 'Minutes Played': 'darkred',
 'Plus Minus Score': 'yellow'}

fig = plt.figure(figsize=(22, 22))
for i, player in enumerate(df_team_pos_players.index):
    # create the sub plot
    axc = fig.add_subplot(5, 2, i+1)
    ax = df_team_pos_players[df_team_pos_players.index == player].plot.bar(ax=axc, 
                                                                           legend=False,
                                                                           y=stat_cols, 
                                                                           color=stats_colours, 
                                                                           fontsize=fontsize, 
                                                                           zorder=3)
    # configure axis ticks / labels
    xticklabels = ""
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(xticklabels))
    ax.set_title("Player Stats Split by Category", fontsize=fontsize)
    ax.set_xlabel("Player: %s" % player, fontsize=fontsize)
    ax.set_ylabel("Percentile", fontsize=fontsize)
    ax.set_ylim([.0, 1.05])
    ax.yaxis.grid=True;
fig.tight_layout()

result_bc = df_team_pos_players.plot(backend='plotly', kind='bar', labels=dict(variable='Stat Category', 
                                                                               value='Normalised Proportion of Productivity'))
result_bc.layout.xaxis.title.text = 'Player Name'
result_bc

	assists	blocks	defReb	fga	fgm	fgp	fta	ftm	ftp	offReb	pFouls	points	steals	totReb	tpa	tpm	tpp	turnovers	age	meters	min	plusMinus	pounds	years_pro	rookie_year	jersey_number
assists	1.000000	0.079840	0.341542	0.548966	0.484388	0.193481	0.369765	0.374962	0.311134	0.099026	0.245023	0.506260	0.316305	0.304469	0.390774	0.302927	0.203165	0.452770	0.128832	-0.252423	0.567290	0.159905	-0.166412	0.219776	0.106663	-0.147452
blocks	0.079840	1.000000	0.336355	0.195425	0.220408	0.196177	0.191473	0.159980	0.140813	0.278480	0.211656	0.208109	0.111982	0.367662	0.024140	0.017648	0.004017	0.141856	0.077320	0.251926	0.240309	0.085996	0.243935	0.110978	0.068239	0.012307
defReb	0.341542	0.336355	1.000000	0.510386	0.510206	0.327576	0.409690	0.370083	0.322418	0.420608	0.365465	0.504478	0.236909	0.940426	0.228649	0.181592	0.117611	0.376868	0.137425	0.261249	0.569548	0.162405	0.312349	0.224570	0.120884	-0.014604
fga	0.548966	0.195425	0.510386	1.000000	0.897029	0.322502	0.538305	0.538127	0.465845	0.287690	0.381801	0.897463	0.359770	0.507344	0.708842	0.565154	0.315441	0.494552	0.157100	-0.128543	0.794232	0.081852	-0.062387	0.262914	0.151250	-0.148484
fgm	0.484388	0.220408	0.510206	0.897029	1.000000	0.554352	0.513336	0.503563	0.435286	0.302518	0.362593	0.968215	0.326853	0.512760	0.566037	0.585903	0.396349	0.452816	0.148848	-0.029965	0.728871	0.181727	0.029610	0.245894	0.144353	-0.114200
fgp	0.193481	0.196177	0.327576	0.322502	0.554352	1.000000	0.226268	0.202018	0.253894	0.233494	0.297128	0.512236	0.178603	0.343931	0.145002	0.297741	0.422328	0.215789	0.087811	0.130382	0.263894	0.169360	0.146847	0.127607	0.077829	-0.015409
fta	0.369765	0.191473	0.409690	0.538305	0.513336	0.226268	1.000000	0.953071	0.629833	0.258113	0.273107	0.662380	0.238647	0.417436	0.268027	0.203644	0.110822	0.381546	0.118008	0.044996	0.486043	0.085496	0.108121	0.213502	0.106079	-0.071535
ftm	0.374962	0.159980	0.370083	0.538127	0.503563	0.202018	0.953071	1.000000	0.705585	0.203123	0.251555	0.671765	0.229993	0.365823	0.300367	0.230797	0.129817	0.371399	0.113967	0.000897	0.477959	0.095969	0.055910	0.206695	0.100236	-0.077106
ftp	0.311134	0.140813	0.322418	0.465845	0.435286	0.253894	0.629833	0.705585	1.000000	0.184509	0.265014	0.544077	0.215306	0.321535	0.292309	0.229177	0.157550	0.301711	0.095384	-0.023133	0.435448	0.083457	0.009371	0.161579	0.086646	-0.067285
offReb	0.099026	0.278480	0.420608	0.287690	0.302518	0.233494	0.258113	0.203123	0.184509	1.000000	0.266119	0.269383	0.131953	0.704013	-0.045042	-0.049622	-0.051849	0.180521	0.062571	0.330219	0.283937	0.040610	0.374211	0.109273	0.061528	0.040387
pFouls	0.245023	0.211656	0.365465	0.381801	0.362593	0.297128	0.273107	0.251555	0.265014	0.266119	1.000000	0.364967	0.218776	0.385816	0.234468	0.187246	0.148717	0.332724	0.089405	0.092152	0.432809	0.008543	0.116605	0.130562	0.075931	-0.029604
points	0.506260	0.208109	0.504478	0.897463	0.968215	0.512236	0.662380	0.671765	0.544077	0.269383	0.364967	1.000000	0.334354	0.495859	0.631962	0.656948	0.442276	0.472023	0.154545	-0.062424	0.744084	0.186617	-0.002027	0.258023	0.145995	-0.122387
steals	0.316305	0.111982	0.236909	0.359770	0.326853	0.178603	0.238647	0.229993	0.215306	0.131953	0.218776	0.334354	1.000000	0.234903	0.253356	0.196634	0.138973	0.262363	0.084613	-0.096721	0.359245	0.113339	-0.054177	0.124488	0.075410	-0.084824
totReb	0.304469	0.367662	0.940426	0.507344	0.512760	0.343931	0.417436	0.365823	0.321535	0.704013	0.385816	0.495859	0.234903	1.000000	0.162106	0.123553	0.072635	0.362664	0.131156	0.328686	0.557822	0.142350	0.385286	0.216941	0.117785	0.003703
tpa	0.390774	0.024140	0.228649	0.708842	0.566037	0.145002	0.268027	0.300367	0.292309	-0.045042	0.234468	0.631962	0.253356	0.162106	1.000000	0.824836	0.435104	0.309413	0.104815	-0.318287	0.585642	0.071578	-0.303864	0.156840	0.098521	-0.137527
tpm	0.302927	0.017648	0.181592	0.565154	0.585903	0.297741	0.203644	0.230797	0.229177	-0.049622	0.187246	0.656948	0.196634	0.123553	0.824836	1.000000	0.705648	0.241962	0.088194	-0.248232	0.474144	0.165116	-0.238167	0.132337	0.078425	-0.106122
tpp	0.203165	0.004017	0.117611	0.315441	0.396349	0.422328	0.110822	0.129817	0.157550	-0.051849	0.148717	0.442276	0.138973	0.072635	0.435104	0.705648	1.000000	0.155573	0.043654	-0.201655	0.272089	0.145718	-0.199015	0.066599	0.035838	-0.080371
turnovers	0.452770	0.141856	0.376868	0.494552	0.452816	0.215789	0.381546	0.371399	0.301711	0.180521	0.332724	0.472023	0.262363	0.362664	0.309413	0.241962	0.155573	1.000000	0.111894	-0.059444	0.468933	-0.019882	0.014308	0.201057	0.098182	-0.098600
age	0.128832	0.077320	0.137425	0.157100	0.148848	0.087811	0.118008	0.113967	0.095384	0.062571	0.089405	0.154545	0.084613	0.131156	0.104815	0.088194	0.043654	0.111894	1.000000	0.016241	0.107033	0.041469	0.176578	0.658807	0.900022	-0.111559
meters	-0.252423	0.251926	0.261249	-0.128543	-0.029965	0.130382	0.044996	0.000897	-0.023133	0.330219	0.092152	-0.062424	-0.096721	0.328686	-0.318287	-0.248232	-0.201655	-0.059444	0.016241	1.000000	-0.085559	0.003383	0.801738	0.022226	0.006351	0.337993
min	0.567290	0.240309	0.569548	0.794232	0.728871	0.263894	0.486043	0.477959	0.435448	0.283937	0.432809	0.744084	0.359245	0.557822	0.585642	0.474144	0.272089	0.468933	0.107033	-0.085559	1.000000	0.089483	-0.049958	0.146762	0.125697	-0.075726
plusMinus	0.159905	0.085996	0.162405	0.081852	0.181727	0.169360	0.085496	0.095969	0.083457	0.040610	0.008543	0.186617	0.113339	0.142350	0.071578	0.165116	0.145718	-0.019882	0.041469	0.003383	0.089483	1.000000	0.017134	0.067028	0.021486	-0.004002
pounds	-0.166412	0.243935	0.312349	-0.062387	0.029610	0.146847	0.108121	0.055910	0.009371	0.374211	0.116605	-0.002027	-0.054177	0.385286	-0.303864	-0.238167	-0.199015	0.014308	0.176578	0.801738	-0.049958	0.017134	1.000000	0.201739	0.018154	0.275634
years_pro	0.219776	0.110978	0.224570	0.262914	0.245894	0.127607	0.213502	0.206695	0.161579	0.109273	0.130562	0.258023	0.124488	0.216941	0.156840	0.132337	0.066599	0.201057	0.658807	0.022226	0.146762	0.067028	0.201739	1.000000	0.402829	-0.098223
rookie_year	0.106663	0.068239	0.120884	0.151250	0.144353	0.077829	0.106079	0.100236	0.086646	0.061528	0.075931	0.145995	0.075410	0.117785	0.098521	0.078425	0.035838	0.098182	0.900022	0.006351	0.125697	0.021486	0.018154	0.402829	1.000000	-0.168386
jersey_number	-0.147452	0.012307	-0.014604	-0.148484	-0.114200	-0.015409	-0.071535	-0.077106	-0.067285	0.040387	-0.029604	-0.122387	-0.084824	0.003703	-0.137527	-0.106122	-0.080371	-0.098600	-0.111559	0.337993	-0.075726	-0.004002	0.275634	-0.098223	-0.168386	1.000000

	player_name	pounds	meters	feet
79593	Boban Marjanovic	290.0	2.21	7"3
230879	Jusuf Nurkic	290.0	2.11	6"11
96058	Nikola Jokic	284.0	2.11	6"11
216397	Zion Williamson	284.0	1.98	6"6
247237	Joel Embiid	280.0	2.13	7"0

	player_name	pounds	meters	feet
250481	Tyrell Terry	160.0	1.88	6"2
335401	Isaiah Joe	165.0	1.93	6"4
358675	Xavier Moon	165.0	1.88	6"2
184584	Kyle Guy	167.0	1.85	6"1
358408	Bones Hyland	169.0	1.88	6"2

	mean	sum
player_name
Taurean Prince	24.39	19028.0
Josh Hart	33.61	15258.0
Norman Powell	27.07	15105.0
Domantas Sabonis	34.75	15012.0
Pascal Siakam	34.28	14810.0
Nikola Vucevic	33.15	13858.0
Alex Caruso	24.87	12981.0
Kyrie Irving	36.13	12718.0
Kevin Durant	35.98	12450.0
Jarrett Allen	31.46	12334.0
Malik Beasley	26.21	11796.0
Tim Hardaway Jr.	26.84	11060.0
Mason Plumlee	20.11	10920.0
Terry Rozier	32.66	10844.0
Jerami Grant	33.70	10448.0
Kelly Oubre Jr.	31.12	10268.0
Russell Westbrook	25.76	10048.0
Kyle Lowry	26.95	9862.0
Eric Gordon	26.01	9260.0
Georges Niang	19.93	9090.0

	mean	sum
player_name
Kevin Durant	27.01	35170.0
Russell Westbrook	21.16	33398.0
Kyrie Irving	24.24	29574.0
Andre Drummond	12.56	27846.0
Nikola Vucevic	18.16	27172.0

Modules for Import¶

Read df from disk¶

Data Analysis¶

Data Verification Task¶

3. Discussion¶

	mean	sum
player_name
Luka Doncic	28.23	13974.0
Kevin Durant	27.01	35170.0
Stephen Curry	26.92	19465.0
Giannis Antetokounmpo	26.45	20475.0
Joel Embiid	26.41	14181.0

	mean	sum
player_name
Russell Westbrook	8.58	13532.0
Kyle Lowry	6.23	9276.0
Eric Bledsoe	4.98	7404.0
James Harden	8.71	6952.0
Kyrie Irving	5.46	6658.0

	mean	sum
player_name
Trae Young	9.38	4757.0
James Harden	8.71	6952.0
John Wall	8.64	6204.0
Russell Westbrook	8.58	13532.0
Tyrese Haliburton	8.57	2802.0

	mean	sum
player_name
JaVale McGee	52.96	146170.4
Mason Plumlee	57.75	134266.2
Andre Drummond	53.20	117955.2
Taurean Prince	40.98	100804.0
Doug McDermott	43.51	93841.5

	mean	sum
player_name
Kevin Durant	85.17	110896.6
Norman Powell	50.71	110602.5
Gordon Hayward	66.28	106380.6
Russell Westbrook	66.24	104525.6
Mason Plumlee	44.33	103078.5

	mean	sum
player_name
Taurean Prince	34.07	83812.8
Doug McDermott	34.46	74325.0
Norman Powell	32.36	70580.7
Evan Fournier	35.05	62773.5
Alec Burks	32.77	61352.7
Gordon Hayward	34.49	55351.2

	mean	sum
player_name
Andre Drummond	3.94	8742.0
Steven Adams	3.87	5160.0
Mason Plumlee	2.10	4878.0
JaVale McGee	1.39	3848.0
Domantas Sabonis	2.62	3528.0

	mean	sum
player_name
Andre Drummond	8.16	18099.0
Nikola Vucevic	8.20	12262.0
Mason Plumlee	4.74	11016.0
Russell Westbrook	6.59	10396.0
Domantas Sabonis	7.63	10276.0

	mean	sum
player_name
Andre Drummond	12.11	26841.0
Rudy Gobert	12.00	9447.0
Giannis Antetokounmpo	10.79	8349.0
Karl-Anthony Towns	10.74	7354.0
Anthony Davis	10.74	7369.0

	mean	sum
player_name
JaVale McGee	0.96	2648.0
Andre Drummond	1.12	2478.0
Mason Plumlee	0.80	1866.0
Kevin Durant	1.26	1640.0
Rudy Gobert	2.02	1586.0

	mean	sum
player_name
Selom Mawugbe	4.50	9.0
Victor Wembanyama	3.64	393.0
Walker Kessler	2.34	422.0
Chet Holmgren	2.32	255.0
Myles Turner	2.17	1499.0
Anthony Davis	2.15	1472.0

	mean	sum
player_name
Luka Doncic	20.58	10185.0
Donovan Mitchell	19.28	11493.0
Stephen Curry	18.98	13723.0
Damian Lillard	18.90	13588.0
LeBron James	18.84	14378.0

	mean	sum
player_name
Kevin Durant	6.47	8420.0
Stephen Curry	7.54	5453.0
Kyrie Irving	4.29	5234.0
Danny Green	3.85	5004.0
Kyle Lowry	3.26	4852.0
Steven Adams	3.48	4632.0
Draymond Green	5.81	4431.0
Pascal Siakam	2.92	4078.0
Nikola Jokic	4.84	4036.0
Jayson Tatum	5.75	3995.0

	sum
country
USA	949700.0
Canada	28047.0
Australia	22018.0
France	21078.0
Germany	20187.0
Cameroon	19487.0
Lithuania	15012.0
Montenegro	13888.0
Bahamas	11729.0
Croatia	11405.0
Turkey	10133.0
Serbia	8326.0
Slovenia	7689.0
Dominican Republic	7636.0
Japan	6688.0
United Kingdom	6671.0
Saint Lucia	5888.0
Greece	5868.0
Latvia	5702.0
Austria	4481.0
Bosnia and Herzegovina	4312.0
Nigeria	4090.0
Jamaica	3611.0
Spain	3479.0
Ukraine	3317.0
New Zealand	3040.0
Sudan	2261.0
DRC	1766.0
Angola	1463.0
South Sudan	1248.0
Italy	715.0
Brazil	577.0
Republic of the Congo	234.0
Argentina	105.0
Gabon	28.0
Mali	20.0

	mean	sum
player_name
Andre Drummond	1.29	2871.0
Russell Westbrook	1.50	2364.0
Eric Bledsoe	1.34	2001.0
Taurean Prince	0.80	1960.0
Kyle Lowry	1.28	1900.0

	mean	sum
player_name
Kevin Durant	9.52	12392.0
Russell Westbrook	7.70	12152.0
Andre Drummond	5.22	11562.0
Nikola Vucevic	7.66	11452.0
Kyrie Irving	9.02	11004.0

	mean	sum
player_name
DMitrik Trice	10.00	10.0
LeBron James	9.86	7521.0
Giannis Antetokounmpo	9.70	7505.0
Luka Doncic	9.64	4772.0
Kevin Durant	9.52	12392.0
Anthony Davis	9.11	6251.0

	mean	sum
player_name
Russell Westbrook	17.56	27712.0
Kevin Durant	18.35	23886.0
Nikola Vucevic	15.57	23294.0
Kyrie Irving	18.73	22846.0
Andre Drummond	9.81	21741.0

	player_name	min	game_id
season
2023	Cheick Diallo	10.0	12508
2023	Cheick Diallo	5.0	12514
2023	Cheick Diallo	5.0	12546

	player_name	assists
season
2017	Rajon Rondo	25.0
2018	Russell Westbrook	24.0
2020	Russell Westbrook	24.0
2018	Russell Westbrook	24.0
2020	Russell Westbrook	24.0
2023	Tyrese Haliburton	23.0
2016	Russell Westbrook	22.0
2016	Russell Westbrook	22.0
2024	Trae Young	22.0

	points	assists
count	374176.00	374176.00
mean	9.24	2.01
std	8.44	2.45
min	0.00	0.00
25%	2.00	0.00
50%	8.00	1.00
75%	14.00	3.00
max	73.00	25.00

	player_name	tpm
season
2018	Klay Thompson	14.0
2019	Zach LaVine	13.0
2016	Stephen Curry	13.0
2022	Damian Lillard	13.0
2020	Damian Lillard	12.0
2015	Stephen Curry	12.0

	assists	blocks	steals	fgm	fgp	ftm	ftp	totReb	tpm	tpp	points	min	plusMinus
player_name
James Harden	0.93	0.14	0.38	0.78	0.43	0.93	0.84	0.52	0.70	0.35	0.93	0.94	0.58
Luka Doncic	0.86	0.10	0.31	0.96	0.46	0.74	0.72	0.71	0.66	0.33	1.00	1.00	0.55
Kevin Durant	0.54	0.28	0.21	0.95	0.53	0.73	0.85	0.58	0.48	0.40	0.96	0.97	0.63
Stephen Curry	0.62	0.07	0.35	0.89	0.47	0.56	0.83	0.42	1.00	0.41	0.95	0.88	0.66
Joel Embiid	0.36	0.35	0.22	0.87	0.48	1.00	0.80	0.88	0.24	0.31	0.94	0.92	0.61
LeBron James	0.84	0.15	0.31	0.99	0.52	0.56	0.70	0.66	0.42	0.33	0.92	0.95	0.58
Giannis Antetokounmpo	0.57	0.29	0.29	0.97	0.55	0.79	0.67	0.89	0.16	0.23	0.94	0.91	0.59
Damian Lillard	0.72	0.07	0.25	0.83	0.43	0.80	0.87	0.36	0.73	0.35	0.93	0.96	0.55
Victor Wembanyama	0.40	0.81	0.29	0.81	0.47	0.49	0.77	0.86	0.49	0.32	0.79	0.82	0.50
Trae Young	1.00	0.03	0.26	0.78	0.42	0.81	0.82	0.29	0.57	0.33	0.87	0.94	0.49
Anthony Davis	0.29	0.48	0.32	0.91	0.51	0.74	0.75	0.89	0.14	0.22	0.88	0.93	0.54
Nikola Jokic	0.73	0.16	0.30	0.82	0.55	0.48	0.72	0.89	0.24	0.32	0.76	0.94	0.60
Kawhi Leonard	0.39	0.14	0.42	0.84	0.49	0.63	0.80	0.53	0.42	0.38	0.84	0.90	0.63
Jayson Tatum	0.40	0.15	0.28	0.80	0.45	0.58	0.75	0.61	0.55	0.36	0.82	0.99	0.62
Kyrie Irving	0.58	0.11	0.31	0.90	0.48	0.45	0.78	0.34	0.57	0.38	0.86	0.98	0.59
Paul George	0.43	0.09	0.42	0.77	0.43	0.57	0.76	0.53	0.65	0.38	0.81	0.90	0.58
Karl-Anthony Towns	0.33	0.27	0.19	0.81	0.52	0.55	0.78	0.89	0.37	0.37	0.79	0.89	0.53
Donovan Mitchell	0.49	0.07	0.34	0.86	0.44	0.51	0.75	0.35	0.65	0.36	0.86	0.94	0.58
Anthony Edwards	0.44	0.14	0.33	0.83	0.44	0.48	0.72	0.43	0.61	0.35	0.82	0.97	0.53
Shai Gilgeous-Alexander	0.52	0.18	0.35	0.80	0.49	0.69	0.77	0.39	0.28	0.34	0.80	0.93	0.54
LaMelo Ball	0.76	0.07	0.37	0.72	0.41	0.38	0.70	0.49	0.64	0.34	0.72	0.88	0.47
Jimmy Butler	0.54	0.10	0.42	0.67	0.47	0.79	0.79	0.47	0.18	0.26	0.73	0.91	0.56
Ja Morant	0.79	0.07	0.26	0.81	0.46	0.61	0.71	0.40	0.29	0.29	0.79	0.84	0.56
Devin Booker	0.52	0.06	0.21	0.82	0.44	0.63	0.78	0.32	0.45	0.33	0.83	0.96	0.51

	index	player_name	team_name	active	years_pro	affiliation	college	country	age	rookie_year
1	1919	Anthony Davis	Los Angeles Lakers	True	9.0	Kentucky/USA	Kentucky	USA	32.0	2012.0
0	0	Anthony Davis	New Orleans Pelicans	True	9.0	Kentucky/USA	Kentucky	USA	32.0	2012.0
2	2016	Ja Morant	Memphis Grizzlies	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	909	Jayson Tatum	Boston Celtics	True	4.0	Duke/USA	Duke	USA	27.0	2017.0
4	2229	Jimmy Butler	Miami Heat	True	10.0	Marquette/USA	Marquette	USA	35.0	2011.0
7	1840	Jimmy Butler	Philadelphia 76ers	True	10.0	Marquette/USA	Marquette	USA	35.0	2011.0
6	1014	Jimmy Butler	Minnesota Timberwolves	True	10.0	Marquette/USA	Marquette	USA	35.0	2011.0
5	230	Jimmy Butler	Chicago Bulls	True	10.0	Marquette/USA	Marquette	USA	35.0	2011.0
9	2146	Kawhi Leonard	LA Clippers	True	10.0	San Diego State/USA	San Diego State	USA	33.0	2011.0
10	1320	Kawhi Leonard	Toronto Raptors	True	10.0	San Diego State/USA	San Diego State	USA	33.0	2011.0
8	319	Kawhi Leonard	San Antonio Spurs	True	10.0	San Diego State/USA	San Diego State	USA	33.0	2011.0
12	3680	Kyrie Irving	Dallas Mavericks	True	10.0	Duke/Australia	Duke	Australia	33.0	2011.0
11	2094	Kyrie Irving	Brooklyn Nets	True	10.0	Duke/Australia	Duke	Australia	33.0	2011.0
13	940	Kyrie Irving	Boston Celtics	True	10.0	Duke/Australia	Duke	Australia	33.0	2011.0
14	80	Kyrie Irving	Cleveland Cavaliers	True	10.0	Duke/Australia	Duke	Australia	33.0	2011.0
15	2324	Shai Gilgeous-Alexander	Oklahoma City Thunder	NaN	NaN	NaN	NaN	NaN	NaN	NaN
16	1741	Shai Gilgeous-Alexander	LA Clippers	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	assists	blocks	steals	fgm	fgp	ftm	ftp	totReb	tpm	tpp	points	min	plusMinus
player_name
Anthony Davis	0.290000	0.480000	0.320000	0.910000	0.510000	0.740000	0.750000	0.890000	0.140000	0.220000	0.880000	0.930000	0.540000
Kawhi Leonard	0.390000	0.140000	0.420000	0.840000	0.490000	0.630000	0.800000	0.530000	0.420000	0.380000	0.840000	0.900000	0.630000
Jayson Tatum	0.400000	0.150000	0.280000	0.800000	0.450000	0.580000	0.750000	0.610000	0.550000	0.360000	0.820000	0.990000	0.620000
Kyrie Irving	0.580000	0.110000	0.310000	0.900000	0.480000	0.450000	0.780000	0.340000	0.570000	0.380000	0.860000	0.980000	0.590000
Shai Gilgeous-Alexander	0.520000	0.180000	0.350000	0.800000	0.490000	0.690000	0.770000	0.390000	0.280000	0.340000	0.800000	0.930000	0.540000
Jimmy Butler	0.540000	0.100000	0.420000	0.670000	0.470000	0.790000	0.790000	0.470000	0.180000	0.260000	0.730000	0.910000	0.560000
Ja Morant	0.790000	0.070000	0.260000	0.810000	0.460000	0.610000	0.710000	0.400000	0.290000	0.290000	0.790000	0.840000	0.560000