import http.client
import json
from pathlib import Path
import pandas as pd
from scipy.stats import shapiro

from datetime import datetime, date

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import plotly.io as pio

from IPython.display import HTML, Javascript, display

# !pip install plotly 
# !pip install cufflinks 
# !pip install chart_studio
# !pip install seaborn --upgrade
# !pip install print-versions # works for python >= 3.8
# !pip install pypalettes

from pypalettes import load_cmap
import seaborn as sns
import cufflinks as cf
import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.io as pio
from plotly.offline import init_notebook_mode

# magic function, renders matplotlib figures in notebook / inline
# %matplotlib inline 
%matplotlib 
pd.options.plotting.backend = "plotly"
# pio.renderers.default = 'notebook'
pio.renderers.default = "notebook_connected"
init_notebook_mode(connected=False)
cf.go_offline()

Using matplotlib backend: module://matplotlib_inline.backend_inline

import types
def imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            yield val.__name__
list(imports())

['builtins',
 'builtins',
 'json',
 'sys',
 'http',
 'pandas',
 'matplotlib',
 'matplotlib.pyplot',
 'matplotlib.ticker',
 'plotly.io',
 'seaborn',
 'cufflinks',
 'chart_studio.plotly',
 'plotly.express',
 'plotly.graph_objects',
 'plotly.figure_factory',
 'types']

from print_versions import print_versions

print_versions(globals())

json==2.0.9
ipykernel==6.28.0
pandas==2.2.2
scipy==1.13.1
matplotlib==3.9.2
pypalettes==0.1.4
seaborn==0.13.2
cufflinks==0.17.3
plotly==5.24.1

import sys
print(sys.executable)
print(sys.version)
print(sys.version_info)

/opt/anaconda3/bin/python
3.12.7 | packaged by Anaconda, Inc. | (main, Oct  4 2024, 08:28:27) [Clang 14.0.6 ]
sys.version_info(major=3, minor=12, micro=7, releaselevel='final', serial=0)

# directory for raw data storage
dir_raw = Path("raw")
# directory for storing clean pre-processed data
dir_data = Path("data")
# make sure it exists
dir_data.mkdir(parents=True, exist_ok=True)

# init config, runtime variables and counters
teams = ["atlanta hawks", "boston celtics", "brooklyn nets", "charlotte hornets", "chicago bulls", 
         "cleveland cavaliers", "dallas mavericks", "denver nuggets", "detroit pistons", "golden state warriors", 
         "houston rockets", "indiana pacers", "la clippers", "los angeles lakers", "memphis grizzlies", "miami heat", 
         "milwaukee bucks", "minnesota timberwolves", "new orleans pelicans", "new york knicks", "oklahoma city thunder",
         "orlando magic", "philadelphia 76ers", "phoenix suns", "portland trail blazers", "sacramento kings", 
         "san antonio spurs", "toronto raptors", "utah jazz", "washington wizards"]
seasons = [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]

player_files = {}
for team_name in teams:
    player_files[team_name] = []
    
stats_files = {}
for team_name in teams:
    for season in seasons:
        stats_files[team_name + ' ' + str(season)] = []

stats_rec_cnt = 0
player_indiv_cnt = 0

debug_mode = False

# iterate through raw data on disk
for fpath in dir_raw.iterdir():
    if fpath.suffix == ".json":
        with open(fpath, "r") as fin:
            jdata = fin.read()
            data = json.loads(jdata)
            
            # parse the filename
            parts = fpath.stem.split("_")
            
            # set season, team and file type
            team = ''
            season = ''
            data_type = parts[-1]
            if (len(parts) >= 5 and len(parts[3]) == 4):
                team = parts[0] + ' ' + parts[1] + ' ' + parts[2]
                season = parts[3]
            else:
                team = parts[0] + ' ' + parts[1]
                season = parts[2]
            
            # add data to dict before merging
            if (team in teams):
                number_records = len(data['response'])
                if debug_mode:
                    print("Reading data for the %s for season %s with data type %s with %s records" % (team, season, data_type, number_records))
                if (data_type == 'players'):   
                    player_files[team] = pd.DataFrame.from_dict(data["response"])
                    player_indiv_cnt += number_records
                elif (data_type == 'stats'):
                    stats_files[team + ' ' + str(season)] = pd.DataFrame.from_dict(data["response"])
                    stats_rec_cnt += number_records
                else:
                    print('Invalid data type: ', data_type)
                
print("Finished reading %s player statistics records" % (stats_rec_cnt))
print("Finished reading %s individual player records" % (player_indiv_cnt))

Finished reading 329856 player statistics records
Finished reading 8032 individual player records

# calculate player age from date of birth (DOB)
def get_player_age(birth_date):
    if (birth_date != '' and birth_date is not None):
        dob = datetime.strptime(birth_date, '%Y-%m-%d').date()
        today = date.today()
        return today.year - dob.year - ((today.month, today.day) < (dob.month, dob.day))
    else:
        return None

df_player_stats = pd.concat(stats_files)

df_player_details = pd.concat(player_files)

print(df_player_stats.shape)
# 329856 is total number of player stats records
df_player_stats.shape[0] == 329856

(329856, 25)

True

print(df_player_details.shape)
df_player_details.shape[0] == 866

(866, 10)

True

# Normalize nested JSON objects for player game statistics
df_player = pd.json_normalize(df_player_stats['player'])# pull player_id and merge first/last name for player_name
df_team = pd.json_normalize(df_player_stats['team']) # pull team_id, team_code and name (drop team name index)
df_game = pd.json_normalize(df_player_stats['game']) # Extract game ID

# Normalize nested JSON objects for player details
df_birth = pd.json_normalize(df_player_details['birth'])# pull country and DOB
df_height = pd.json_normalize(df_player_details['height'])# pull height in mteres and merge feets/inches into ft_height
df_leagues = pd.json_normalize(df_player_details['leagues'])# pull standard.active	standard.jersey	standard.pos
df_nba = pd.json_normalize(df_player_details['nba'])# pull pro and start for international player insights ?
df_weight = pd.json_normalize(df_player_details['weight'])# pull kilograms and pounds

# Extract normalized data to player stats DF
df_player_stats = df_player_stats.reset_index(drop=False)

del df_player_stats['level_1']
df_player_stats['season'] = df_player_stats['level_0'].apply(lambda x: x.split(' ')[-1])
del df_player_stats['level_0']

df_player_stats['player_id'] = df_player['id']
df_player_stats['player_name'] = df_player['firstname'] + ' ' + df_player['lastname']

del df_player_stats['player']

df_player_stats['team_code'] = df_team['code']
df_player_stats['team_name'] = df_team['name']
df_player_stats['team_id'] = df_team['id']

del df_player_stats['team']

df_player_stats['game_id'] = df_game['id']

del df_player_stats['game']

df_player_stats['min'] = pd.to_numeric(df_player_stats['min'], errors='coerce')
df_player_stats['plusMinus'] = pd.to_numeric(df_player_stats['plusMinus'], errors='coerce')

# Extract normalized data to player details DF
df_player_details = df_player_details.reset_index(drop=True)

df_player_details['country'] = df_birth['country']
df_player_details['DOB'] = df_birth['date']

df_player_details['age'] = df_player_details['DOB'].apply(get_player_age)
df_player_details['age'] = df_player_details['age'].fillna(0).astype('int')
del df_player_details['birth']

df_player_details['feet'] = df_height['feets'] + '\"' + df_height['inches']
df_player_details['meters'] = df_height['meters']
df_player_details['meters'] = pd.to_numeric(df_player_details['meters'], errors='coerce')
df_player_details['meters'] = df_player_details['meters'].apply(lambda x: round(float(x), 2) if x != None else x)
del df_player_details['height']

df_player_details['position'] = df_leagues['standard.pos']
df_player_details['active'] = df_leagues['standard.active']
df_player_details['jersey_number'] = df_leagues['standard.jersey']
del df_player_details['leagues']

df_player_details['years_pro'] = df_nba['pro']
df_player_details['rookie_year'] = df_nba['start']
del df_player_details['nba']

df_player_details['pounds'] = df_weight['pounds']
df_player_details['pounds'] = df_player_details['pounds'].astype('Int16')
del df_player_details['weight']

df_player_stats.shape # verify record count is the same and new column count

(329856, 29)

# merge data frames (player game statistics and player details)
df = pd.merge(df_player_stats, df_player_details, how='left', left_on='player_id', right_on='id')

df.columns

Index(['assists', 'blocks', 'comment', 'defReb', 'fga', 'fgm', 'fgp', 'fta',
       'ftm', 'ftp', 'min', 'offReb', 'pFouls', 'plusMinus', 'points', 'pos',
       'steals', 'totReb', 'tpa', 'tpm', 'tpp', 'turnovers', 'season',
       'player_id', 'player_name', 'team_code', 'team_name', 'team_id',
       'game_id', 'affiliation', 'college', 'firstname', 'id', 'lastname',
       'country', 'DOB', 'age', 'feet', 'meters', 'position', 'active',
       'jersey_number', 'years_pro', 'rookie_year', 'pounds'],
      dtype='object')

# set season (year) as main DF index to allow for time series analysis of player data
df = df.reset_index().set_index('season')
df.index = pd.to_datetime(df.index, format='%Y').to_period('Y')
df = df.sort_index(axis=0)

fontsize=22

# convenience function for time series plots
def create_ts_plot(data, title, ylabel, color="navy", rhs_vals=None):
    # create a time series plot
    pd.options.plotting.backend = "matplotlib"
    ax = data.plot(figsize=(9, 4.3), legend=False, lw=2, zorder=3, color=color)
    plt.title(title, fontsize=fontsize-1)
    plt.ylabel(ylabel)
    plt.xlabel(None)
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)
    return ax

# convenience function to aggregate top players by a given statistic or feature
def summary_top_players(grp, stat,  sort_by = 'mean', results = 10):
    df[stat] = pd.to_numeric(df[stat], errors='coerce')# handle nans and plusMinus values which are char in format ''+/-[0-9]*'
    df_aggs = round(df.groupby([grp])[stat].agg(['mean', 'sum']), 2)
    return df_aggs.sort_values(by=sort_by, ascending=False).head(results)

# convenience method to highlight data frame cell lime green
def highlighter(cell_value, thresh_lower = 0.7, thresh_upper = 0.99):
    if cell_value < thresh_upper and cell_value >= thresh_lower:
        return "background-color: #32CD32"
    elif cell_value == 1:
        return "background-color: yellow"

df['pounds'] = pd.to_numeric(df['pounds'])
print("Average player weight %s lbs." % round(df['pounds'].mean(skipna = True), 2))

Average player weight 219.89 lbs.

df['meters'] = pd.to_numeric(df['meters'])
round(df['meters'].mean(skipna = True), 2)
print("Average player height %s meters" % round(df['meters'].mean(skipna = True), 2))

Average player height 1.99 meters

df[['pounds', 'meters']].corr()

df.plot('pounds', 'meters', 'scatter')

numeric_cols = ['assists', 'blocks', 'defReb', 'fga', 'fgm', 'fgp', 'fta', 'ftm', 'ftp', 'offReb', 'pFouls', 
                'points', 'steals', 'totReb', 'tpa', 'tpm', 'tpp', 'turnovers', 'age', 'meters', 'min', 'plusMinus',
                'pounds', 'years_pro', 'rookie_year', 'jersey_number']

df_corr_c = df[numeric_cols].corr()

df_corr_c.style.map(highlighter)

df_corr_c = df[numeric_cols].corr().abs().unstack().sort_values(ascending=False)
df_corr_c[(df_corr_c < 1) & (df_corr_c >= 0.7)]# exclude below 74% correlation and 100% for matching cols in corr matrix

points       fgm            0.968215
fgm          points         0.968215
fta          ftm            0.953071
ftm          fta            0.953071
defReb       totReb         0.940426
totReb       defReb         0.940426
rookie_year  age            0.898160
age          rookie_year    0.898160
points       fga            0.897463
fga          points         0.897463
fgm          fga            0.897029
fga          fgm            0.897029
tpm          tpa            0.824836
tpa          tpm            0.824836
meters       pounds         0.801738
pounds       meters         0.801738
min          fga            0.794232
fga          min            0.794232
min          points         0.744084
points       min            0.744084
min          fgm            0.728871
fgm          min            0.728871
tpa          fga            0.708842
fga          tpa            0.708842
tpm          tpp            0.705648
tpp          tpm            0.705648
ftp          ftm            0.705585
ftm          ftp            0.705585
offReb       totReb         0.704013
totReb       offReb         0.704013
dtype: float64

df_weight = df.copy()
df_weight.index = df_weight.index.to_timestamp()

df_weight = df_weight['pounds'].resample("1YE").mean()
create_ts_plot(df_weight, "Average Weight by Year", "Weight in Pounds", "darkgreen");

avg_weight_2015 = round(df[df.index == '2015']['pounds'].mean(), 2)
print("Average weight for 2015: %s pounds" % avg_weight_2015)

Average weight for 2015: 222.31 pounds

avg_weight_2024 = round(df[df.index == '2024']['pounds'].mean(), 2)
print("Average weight for 2024: %s pounds" % avg_weight_2024)

Average weight for 2024: 217.21 pounds

print("Difference in weight from 2015 to 2024: %s lbs" % (round(avg_weight_2015 - avg_weight_2024, 2)))

Difference in weight from 2015 to 2024: 5.1 lbs

df[['player_name', 'pounds', 'meters', 'feet']].reset_index(drop=True).sort_values(by=['pounds'], ascending=False).drop_duplicates()[0:5]

df[['player_name', 'pounds', 'meters', 'feet']].reset_index(drop=True).sort_values(by=['pounds'], ascending=True).drop_duplicates()[0:5]

print("Average height for players in 2017: %s meters" % round(df[df.index == '2017']['meters'].mean(), 3))

Average height for players in 2017: 1.998 meters

print("Average height for players in 2021: %s meters" % round(df[df.index == '2021']['meters'].mean(), 3))

Average height for players in 2021: 1.99 meters

df_height = df.copy()
df_height.index = df_height.index.to_timestamp()

df_height = df_height['meters'].resample("1YE").mean()
create_ts_plot(df_height, "Average Height by Year", 
               "Height in Meters", "violet");

# create histogram visual
mins_played_summary = summary_top_players('player_name', 'min', results = 800)['sum']
h = mins_played_summary.hist(backend='plotly', labels=dict(index='Player Count', value='Minutes Played', 
                                                           variable='Total Minutes Played'))
h.layout.yaxis.title.text = 'Player Count'
h

test_normality = shapiro(mins_played_summary)

print(test_normality)

ShapiroResult(statistic=0.8014081288597795, pvalue=3.564594976648819e-30)

is_not_normal_dist = (test_normality.pvalue < 0.05) and (test_normality.statistic > 0.8)
print("Can we reject the null hypothesis and say this is not a normal distribution (bell shaped): %s" % is_not_normal_dist)

Can we reject the null hypothesis and say this is not a normal distribution (bell shaped): True

df_mins = summary_top_players('player_name', 'min', sort_by = 'sum', results = 20)
df_mins[df_mins['sum'] > 7500]
df_mins

summary_top_players('player_name', 'points', sort_by = 'sum', results=5)

summary_top_players('player_name', 'points', results=5)

summary_top_players('player_name', 'assists', sort_by = 'sum', results=5)

summary_top_players('player_name', 'assists', results=5)

summary_top_players('player_name', 'fgp', sort_by = 'sum', results=5)

summary_top_players('player_name', 'ftp', sort_by = 'sum', results=5)# free throw percentage

summary_top_players('player_name', 'tpp', sort_by = 'sum', results=6)# three-point percentage

summary_top_players('player_name', 'offReb', sort_by = 'sum', results=5)# offensive rebounds

summary_top_players('player_name', 'defReb', sort_by = 'sum', results=5)# defensive rebounds

summary_top_players('player_name', 'totReb', results=5)# Total rebounds by average

summary_top_players('player_name', 'totReb', sort_by = 'sum', results=5)# Total rebounds by total/sum

summary_top_players('player_name', 'blocks', sort_by = 'sum', results=5)# blocked shots

summary_top_players('player_name', 'blocks', results=6)# blocked shots by average

summary_top_players('player_name', 'steals', sort_by = 'sum', results=5)

summary_top_players('player_name', 'fgm', sort_by = 'sum', results=5)

summary_top_players('player_name', 'fgm', results=6)

summary_top_players('player_name', 'fga', results=5)

summary_top_players('player_name', 'fga', sort_by='sum', results=5)

df_field_goals_made = summary_top_players('player_name', 'fgm')
df_field_goals_made = df_field_goals_made[df_field_goals_made['sum'] > 5000]
df_field_goals_made

summary_top_players('player_name', 'plusMinus', sort_by = 'sum')

df_country_grp = df.loc[~((df['min'] == 0) | (df['min'].isna()))]

country_grp = df_country_grp.groupby('country')['min'].agg({'sum'}).sort_values('sum', ascending=False)
country_grp

print("The number of countries with representation in the NBA over the last 10 years is: %s" % len(country_grp))# Count of countries represented

The number of countries with representation in the NBA over the last 10 years is: 36

# remove USA (heavily weighted towards players from USA)
hbar = country_grp[1:].plot(backend='plotly', kind='bar', labels = dict(variable='Minutes Played', 
                                                                        value='Total Minutes Played', index='Country'))
hbar.layout.yaxis.title.text = 'Country'
hbar

df[(df['country'] == 'Mali') & (df['min'] > 0)][['player_name', 'min', 'game_id']]# data verification of minutes played for Mali

print("The number of unique NBA players listed from 2015 - 2024 seasons is: %s" % len(df['player_name'].unique()))# number of unique NBA players listed from 2015 - 2024

The number of unique NBA players listed from 2015 - 2024 seasons is: 2044

df[df['team_name'].isin({'Boston Celtics', 'Miami Heat', 'Milwaukee Bucks'})].groupby('team_name')['plusMinus'].plot(
    x=df.index.year, legend=True, figsize=(20, 10), use_index=False, title="Plus Minus By Team", 
    fontsize=fontsize, zorder=3)

team_name
Boston Celtics     Axes(0.125,0.11;0.775x0.77)
Miami Heat         Axes(0.125,0.11;0.775x0.77)
Milwaukee Bucks    Axes(0.125,0.11;0.775x0.77)
Name: plusMinus, dtype: object

df[df['player_name'].isin({'LeBron James', 'Stephen Curry'})].groupby('player_name')['plusMinus'].plot(
    x=df.index.year, legend=True, figsize=(20, 10), use_index=False, title="Plus Minus By Season", 
    fontsize=fontsize, zorder=3)

player_name
LeBron James     Axes(0.125,0.11;0.775x0.77)
Stephen Curry    Axes(0.125,0.11;0.775x0.77)
Name: plusMinus, dtype: object

df_pts_per_yr = df.copy()
df_pts_per_yr.index = df_pts_per_yr.index.to_timestamp()

df_pts_per_yr = df_pts_per_yr['points'].resample("1YE").max()
ax = create_ts_plot(df_pts_per_yr, "Points Scored by Season High", "Points Scored", "darkred")

df_assts_per_yr = df.copy()
df_assts_per_yr.index = df_assts_per_yr.index.to_timestamp()

df_assts_per_yr = df_assts_per_yr['assists'].resample("1YE").max()
ax = create_ts_plot(df_assts_per_yr, "Assists Made by Season High", "Assists Maximum")

df[['player_name', 'assists']].sort_values(by='assists', ascending=False)[0:9]

df_pts_high = df.loc[~((df['points'] == 0) | (df['points'].isna()))]
  
sns.set_theme(style="darkgrid")
palette = load_cmap("pastel").hex

plt.figure(figsize=(10, 5))
sns.boxplot(data=df_pts_high, x=df_pts_high.index, y='points', palette=palette, hue='season')
plt.title("Points Scored Distribution by Season", fontsize=fontsize-1)
plt.xlabel("Season")
plt.ylabel("Points Scored");
plt.ylim(-5, 75);

df[['player_name', 'points']].sort_values(by='points', ascending=False)[0:6]# Highest single game scorers in the past 9 years

round(df[['points', 'assists']].describe(), 2)

df_tpm_per_yr = df.copy()
df_tpm_per_yr.index = df_tpm_per_yr.index.to_timestamp()

df_tpm_per_yr = df_tpm_per_yr['tpm'].resample("1YE").max()
ax = create_ts_plot(df_tpm_per_yr, "Three Pointers Made by Season High", "Count", "royalblue")

df[['player_name', 'tpm']].sort_values(by='tpm', ascending=False)[0:6]

cols_pos = ['assists', 'blocks', 'steals', 'fgm', 'fgp', 'ftm', 'ftp', 'totReb', 'tpm', 'tpp', 'points', 'min', 'plusMinus']
cols_neg = ['pFouls', 'turnovers', 'min']


def calc_productivity_matrix(df_temp, cols, min_thresh = 0.75):
    # group by player and get mean / averages for 2015-2024 (10 years)
    df_prod = df_temp.groupby("player_name")[cols].mean()

    # min-max normalisation of data frame
    df_norm_prod = (df_prod - df_prod.min()) / (df_prod.max() - df_prod.min())

    # drop records with NA values
    df_prod_heatmap = round(df_norm_prod.dropna(), 2)
    
    # filter out players with less playing experience
    if 'min' in df_prod_heatmap.columns:
        df_prod_heatmap = df_prod_heatmap[df_prod_heatmap['min'] > min_thresh]

    # calculate row sum as a sort of productivity indicator (can be based on positive or negative contributions)
    df_prod_heatmap['player_sum'] = df_prod_heatmap.sum(axis=1)

    # sort and find top 19 players
    df_prod_heatmap = df_prod_heatmap.sort_values('player_sum', ascending=False)[0:19]
    return df_prod_heatmap[cols]

positive_prod = calc_productivity_matrix(df, cols_pos)
positive_prod

negative_prod = calc_productivity_matrix(df, cols_neg)
negative_prod

sns.heatmap(positive_prod, annot=True, xticklabels=True, yticklabels=True)

<Axes: ylabel='player_name'>

sns.heatmap(negative_prod, annot=True, xticklabels=True, yticklabels=True)

<Axes: ylabel='player_name'>

players_to_exclude = positive_prod.index.intersection(negative_prod.index)
players_to_exclude

Index(['James Harden', 'Luka Doncic', 'Kevin Durant', 'Joel Embiid',
       'LeBron James', 'Giannis Antetokounmpo', 'Victor Wembanyama',
       'Trae Young', 'Nikola Jokic', 'Paul George', 'Karl-Anthony Towns',
       'Donovan Mitchell', 'Anthony Edwards'],
      dtype='object', name='player_name')

df_team_pos_players = positive_prod[~positive_prod.index.isin(players_to_exclude)]
df_team_pos_players

df_elite_players = df[df['player_name'].isin(df_team_pos_players.index)][['player_name', 'team_name', 'active', 'years_pro', 'affiliation', 'college', 'country', 'age', 'rookie_year']].reset_index(drop=True).drop_duplicates().sort_values('player_name')

df_elite_players = df_elite_players.loc[~((df_elite_players['years_pro'] == 0) | (df_elite_players['years_pro'].isna()))]
df_elite_players['years_pro'] = df_elite_players['years_pro'].astype('int')
df_elite_players['age'] = df_elite_players['age'].astype('int')
df_elite_players['rookie_year'] = df_elite_players['rookie_year'].astype('int')
df_elite_players

df_elite_players.sort_index(axis=0, ascending=False).groupby('player_name').first()

df_team_pos_players.style.background_gradient(cmap='Blues')

a4_dims= [11.7, 8.27]
fig, ax = plt.subplots(figsize=a4_dims)
df_team_pos_players = df_team_pos_players.rename(columns={'assists': 'Assists',
                                                          'blocks': 'Blocks',
                                                          'steals': 'Steals',
                                                          'fgm' : 'Field Goals Made',
                                                          'fgp' : 'Field Goal Percentage',
                                                          'ftm' : 'Free Throws Made',
                                                          'ftp' : 'Free Throw Percentage',
                                                          'totReb' : 'Total Rebounds',
                                                          'tpm' : 'Three Pointers Made',
                                                          'tpp' : 'Three Pointer Percentage',
                                                          'points': 'Points',
                                                          'min': 'Minutes Played',
                                                          'plusMinus': 'Plus Minus Score'})
ax = sns.lineplot(ax=ax, data=df_team_pos_players.T, markers=True, dashes=False)
sns.move_legend(ax, "upper left", bbox_to_anchor=(1,1), fontsize=19)
plt.xticks(rotation = "vertical", fontsize=19)
plt.yticks(fontsize=19)

(array([0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2]),
 [Text(0, 0.0, '0.0'),
  Text(0, 0.2, '0.2'),
  Text(0, 0.4, '0.4'),
  Text(0, 0.6000000000000001, '0.6'),
  Text(0, 0.8, '0.8'),
  Text(0, 1.0, '1.0'),
  Text(0, 1.2000000000000002, '1.2')])

df_matrix_70 = calc_productivity_matrix(df, cols_pos, min_thresh=0.7)# lower minustes played threshold to pull in less experienced pros and rookies that are performing well
df_victor_ad = df_matrix_70[df_matrix_70.index.isin(['Victor Wembanyama', 'Anthony Davis'])]
a4_dims= [11.7, 8.27]
fig, ax = plt.subplots(figsize=a4_dims)
df_victor_ad = df_victor_ad.rename(columns={'assists': 'Assists',
                                                          'blocks': 'Blocks',
                                                          'steals': 'Steals',
                                                          'fgm' : 'Field Goals Made',
                                                          'fgp' : 'Field Goal Percentage',
                                                          'ftm' : 'Free Throws Made',
                                                          'ftp' : 'Free Throw Percentage',
                                                          'totReb' : 'Total Rebounds',
                                                          'tpm' : 'Three Pointers Made',
                                                          'tpp' : 'Three Pointer Percentage',
                                                          'points': 'Points',
                                                          'min': 'Minutes Played',
                                                          'plusMinus': 'Plus Minus Score'})
ax = sns.lineplot(ax=ax, data=df_victor_ad.T, markers=True, dashes=False)
sns.move_legend(ax, "upper left", bbox_to_anchor=(1,1), fontsize=19)
plt.xticks(rotation = "vertical", fontsize=19)
plt.yticks(fontsize=19)

(array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
 [Text(0, 0.1, '0.1'),
  Text(0, 0.2, '0.2'),
  Text(0, 0.30000000000000004, '0.3'),
  Text(0, 0.4, '0.4'),
  Text(0, 0.5, '0.5'),
  Text(0, 0.6, '0.6'),
  Text(0, 0.7000000000000001, '0.7'),
  Text(0, 0.8, '0.8'),
  Text(0, 0.9, '0.9'),
  Text(0, 1.0, '1.0')])

plt.pcolor(df_team_pos_players)

<matplotlib.collections.PolyQuadMesh at 0x15f815670>

sns.heatmap(df_team_pos_players, annot=True, xticklabels=True, yticklabels=True)

<Axes: ylabel='player_name'>

stat_cols = ['Assists', 'Blocks', 'Steals', 'Field Goals Made', 'Field Goal Percentage', 'Free Throws Made', 
             'Free Throw Percentage', 'Total Rebounds', 'Three Pointers Made', 'Three Pointer Percentage', 'Points',
             'Minutes Played', 'Plus Minus Score']
colours = ["limegreen", "red", "navy", "lightblue", "darkorange", "pink", "lightgreen", "orange", "darkgreen",
           "purple", "blue", "darkred", "yellow"]

stats_colours = dict(zip(stat_cols, colours))
stats_colours

{'Assists': 'limegreen',
 'Blocks': 'red',
 'Steals': 'navy',
 'Field Goals Made': 'lightblue',
 'Field Goal Percentage': 'darkorange',
 'Free Throws Made': 'pink',
 'Free Throw Percentage': 'lightgreen',
 'Total Rebounds': 'orange',
 'Three Pointers Made': 'darkgreen',
 'Three Pointer Percentage': 'purple',
 'Points': 'blue',
 'Minutes Played': 'darkred',
 'Plus Minus Score': 'yellow'}

fig = plt.figure(figsize=(22, 22))
for i, player in enumerate(df_team_pos_players.index):
    # create the sub plot
    axc = fig.add_subplot(5, 2, i+1)
    ax = df_team_pos_players[df_team_pos_players.index == player].plot.bar(ax=axc, 
                                                                           legend=False,
                                                                           y=stat_cols, 
                                                                           color=stats_colours, 
                                                                           fontsize=fontsize, 
                                                                           zorder=3)
    # configure axis ticks / labels
    xticklabels = ""
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(xticklabels))
    ax.set_title("Player Stats Split by Category", fontsize=fontsize)
    ax.set_xlabel("Player: %s" % player, fontsize=fontsize)
    ax.set_ylabel("Percentile", fontsize=fontsize)
    ax.set_ylim([.0, 1.05])
    ax.yaxis.grid=True;
fig.tight_layout()

result_bc = df_team_pos_players.plot(backend='plotly', kind='bar', labels=dict(variable='Stat Category', 
                                                                               value='Normalised Proportion of Productivity'))
result_bc.layout.xaxis.title.text = 'Player Name'
result_bc

	assists	blocks	defReb	fga	fgm	fgp	fta	ftm	ftp	offReb	pFouls	points	steals	totReb	tpa	tpm	tpp	turnovers	age	meters	min	plusMinus	pounds	years_pro	rookie_year	jersey_number
assists	1.000000	0.079840	0.341542	0.548966	0.484388	0.193481	0.369765	0.374962	0.311134	0.099026	0.245023	0.506260	0.316305	0.304469	0.390774	0.302927	0.203165	0.452770	0.126561	-0.252423	0.567290	0.159905	-0.166412	0.219776	0.106663	-0.147452
blocks	0.079840	1.000000	0.336355	0.195425	0.220408	0.196177	0.191473	0.159980	0.140813	0.278480	0.211656	0.208109	0.111982	0.367662	0.024140	0.017648	0.004017	0.141856	0.075148	0.251926	0.240309	0.085996	0.243935	0.110978	0.068239	0.012307
defReb	0.341542	0.336355	1.000000	0.510386	0.510206	0.327576	0.409690	0.370083	0.322418	0.420608	0.365465	0.504478	0.236909	0.940426	0.228649	0.181592	0.117611	0.376868	0.137243	0.261249	0.569548	0.162405	0.312349	0.224570	0.120884	-0.014604
fga	0.548966	0.195425	0.510386	1.000000	0.897029	0.322502	0.538305	0.538127	0.465845	0.287690	0.381801	0.897463	0.359770	0.507344	0.708842	0.565154	0.315441	0.494552	0.156592	-0.128543	0.794232	0.081852	-0.062387	0.262914	0.151250	-0.148484
fgm	0.484388	0.220408	0.510206	0.897029	1.000000	0.554352	0.513336	0.503563	0.435286	0.302518	0.362593	0.968215	0.326853	0.512760	0.566037	0.585903	0.396349	0.452816	0.148391	-0.029965	0.728871	0.181727	0.029610	0.245894	0.144353	-0.114200
fgp	0.193481	0.196177	0.327576	0.322502	0.554352	1.000000	0.226268	0.202018	0.253894	0.233494	0.297128	0.512236	0.178603	0.343931	0.145002	0.297741	0.422328	0.215789	0.087033	0.130382	0.263894	0.169360	0.146847	0.127607	0.077829	-0.015409
fta	0.369765	0.191473	0.409690	0.538305	0.513336	0.226268	1.000000	0.953071	0.629833	0.258113	0.273107	0.662380	0.238647	0.417436	0.268027	0.203644	0.110822	0.381546	0.117704	0.044996	0.486043	0.085496	0.108121	0.213502	0.106079	-0.071535
ftm	0.374962	0.159980	0.370083	0.538127	0.503563	0.202018	0.953071	1.000000	0.705585	0.203123	0.251555	0.671765	0.229993	0.365823	0.300367	0.230797	0.129817	0.371399	0.113480	0.000897	0.477959	0.095969	0.055910	0.206695	0.100236	-0.077106
ftp	0.311134	0.140813	0.322418	0.465845	0.435286	0.253894	0.629833	0.705585	1.000000	0.184509	0.265014	0.544077	0.215306	0.321535	0.292309	0.229177	0.157550	0.301711	0.094410	-0.023133	0.435448	0.083457	0.009371	0.161579	0.086646	-0.067285
offReb	0.099026	0.278480	0.420608	0.287690	0.302518	0.233494	0.258113	0.203123	0.184509	1.000000	0.266119	0.269383	0.131953	0.704013	-0.045042	-0.049622	-0.051849	0.180521	0.063036	0.330219	0.283937	0.040610	0.374211	0.109273	0.061528	0.040387
pFouls	0.245023	0.211656	0.365465	0.381801	0.362593	0.297128	0.273107	0.251555	0.265014	0.266119	1.000000	0.364967	0.218776	0.385816	0.234468	0.187246	0.148717	0.332724	0.088719	0.092152	0.432809	0.008543	0.116605	0.130562	0.075931	-0.029604
points	0.506260	0.208109	0.504478	0.897463	0.968215	0.512236	0.662380	0.671765	0.544077	0.269383	0.364967	1.000000	0.334354	0.495859	0.631962	0.656948	0.442276	0.472023	0.153958	-0.062424	0.744084	0.186617	-0.002027	0.258023	0.145995	-0.122387
steals	0.316305	0.111982	0.236909	0.359770	0.326853	0.178603	0.238647	0.229993	0.215306	0.131953	0.218776	0.334354	1.000000	0.234903	0.253356	0.196634	0.138973	0.262363	0.083595	-0.096721	0.359245	0.113339	-0.054177	0.124488	0.075410	-0.084824
totReb	0.304469	0.367662	0.940426	0.507344	0.512760	0.343931	0.417436	0.365823	0.321535	0.704013	0.385816	0.495859	0.234903	1.000000	0.162106	0.123553	0.072635	0.362664	0.131186	0.328686	0.557822	0.142350	0.385286	0.216941	0.117785	0.003703
tpa	0.390774	0.024140	0.228649	0.708842	0.566037	0.145002	0.268027	0.300367	0.292309	-0.045042	0.234468	0.631962	0.253356	0.162106	1.000000	0.824836	0.435104	0.309413	0.103647	-0.318287	0.585642	0.071578	-0.303864	0.156840	0.098521	-0.137527
tpm	0.302927	0.017648	0.181592	0.565154	0.585903	0.297741	0.203644	0.230797	0.229177	-0.049622	0.187246	0.656948	0.196634	0.123553	0.824836	1.000000	0.705648	0.241962	0.087459	-0.248232	0.474144	0.165116	-0.238167	0.132337	0.078425	-0.106122
tpp	0.203165	0.004017	0.117611	0.315441	0.396349	0.422328	0.110822	0.129817	0.157550	-0.051849	0.148717	0.442276	0.138973	0.072635	0.435104	0.705648	1.000000	0.155573	0.043729	-0.201655	0.272089	0.145718	-0.199015	0.066599	0.035838	-0.080371
turnovers	0.452770	0.141856	0.376868	0.494552	0.452816	0.215789	0.381546	0.371399	0.301711	0.180521	0.332724	0.472023	0.262363	0.362664	0.309413	0.241962	0.155573	1.000000	0.111076	-0.059444	0.468933	-0.019882	0.014308	0.201057	0.098182	-0.098600
age	0.126561	0.075148	0.137243	0.156592	0.148391	0.087033	0.117704	0.113480	0.094410	0.063036	0.088719	0.153958	0.083595	0.131186	0.103647	0.087459	0.043729	0.111076	1.000000	0.015987	0.103249	0.041099	0.177507	0.662742	0.898160	-0.110687
meters	-0.252423	0.251926	0.261249	-0.128543	-0.029965	0.130382	0.044996	0.000897	-0.023133	0.330219	0.092152	-0.062424	-0.096721	0.328686	-0.318287	-0.248232	-0.201655	-0.059444	0.015987	1.000000	-0.085559	0.003383	0.801738	0.022226	0.006351	0.337993
min	0.567290	0.240309	0.569548	0.794232	0.728871	0.263894	0.486043	0.477959	0.435448	0.283937	0.432809	0.744084	0.359245	0.557822	0.585642	0.474144	0.272089	0.468933	0.103249	-0.085559	1.000000	0.089483	-0.049958	0.146762	0.125697	-0.075726
plusMinus	0.159905	0.085996	0.162405	0.081852	0.181727	0.169360	0.085496	0.095969	0.083457	0.040610	0.008543	0.186617	0.113339	0.142350	0.071578	0.165116	0.145718	-0.019882	0.041099	0.003383	0.089483	1.000000	0.017134	0.067028	0.021486	-0.004002
pounds	-0.166412	0.243935	0.312349	-0.062387	0.029610	0.146847	0.108121	0.055910	0.009371	0.374211	0.116605	-0.002027	-0.054177	0.385286	-0.303864	-0.238167	-0.199015	0.014308	0.177507	0.801738	-0.049958	0.017134	1.000000	0.201739	0.018154	0.275634
years_pro	0.219776	0.110978	0.224570	0.262914	0.245894	0.127607	0.213502	0.206695	0.161579	0.109273	0.130562	0.258023	0.124488	0.216941	0.156840	0.132337	0.066599	0.201057	0.662742	0.022226	0.146762	0.067028	0.201739	1.000000	0.402829	-0.098223
rookie_year	0.106663	0.068239	0.120884	0.151250	0.144353	0.077829	0.106079	0.100236	0.086646	0.061528	0.075931	0.145995	0.075410	0.117785	0.098521	0.078425	0.035838	0.098182	0.898160	0.006351	0.125697	0.021486	0.018154	0.402829	1.000000	-0.168386
jersey_number	-0.147452	0.012307	-0.014604	-0.148484	-0.114200	-0.015409	-0.071535	-0.077106	-0.067285	0.040387	-0.029604	-0.122387	-0.084824	0.003703	-0.137527	-0.106122	-0.080371	-0.098600	-0.110687	0.337993	-0.075726	-0.004002	0.275634	-0.098223	-0.168386	1.000000

	player_name	pounds	meters	feet
252086	Boban Marjanovic	290	2.21	7"3
83808	Jusuf Nurkic	290	2.11	6"11
84580	Nikola Jokic	284	2.11	6"11
216527	Zion Williamson	284	1.98	6"6
89767	Joel Embiid	280	2.13	7"0

	player_name	pounds	meters	feet
292988	Tyrell Terry	160	1.88	6"2
298355	Isaiah Joe	165	1.93	6"4
327276	Xavier Moon	165	1.88	6"2
183692	Kyle Guy	167	1.85	6"1
391592	Bones Hyland	169	1.88	6"2

	mean	sum
player_name
Taurean Prince	24.39	19028.0
Josh Hart	33.61	15258.0
Norman Powell	27.07	15105.0
Domantas Sabonis	34.75	15012.0
Pascal Siakam	34.28	14810.0
Nikola Vucevic	33.15	13858.0
Alex Caruso	24.87	12981.0
Kyrie Irving	36.13	12718.0
Kevin Durant	35.98	12450.0
Jarrett Allen	31.46	12334.0
Malik Beasley	26.21	11796.0
Tim Hardaway Jr.	26.84	11060.0
Mason Plumlee	20.11	10920.0
Terry Rozier	32.66	10844.0
Jerami Grant	33.70	10448.0
Kelly Oubre Jr.	31.12	10268.0
Russell Westbrook	25.76	10048.0
Kyle Lowry	26.95	9862.0
Eric Gordon	26.01	9260.0
Georges Niang	19.93	9090.0

	mean	sum
player_name
Kevin Durant	27.01	35170.0
Russell Westbrook	21.16	33398.0
Kyrie Irving	24.24	29574.0
Andre Drummond	12.56	27846.0
Nikola Vucevic	18.16	27172.0

2. Data Preparation and Analysis¶

Data Preprocessing¶

Data Parsing and Merging¶

Data Verification¶

Merge player performance statistics and individual player information¶

Data Analysis¶

Data Verification Task¶

3. Discussion¶

	mean	sum
player_name
Luka Doncic	28.23	13974.0
Kevin Durant	27.01	35170.0
Stephen Curry	26.92	19465.0
Giannis Antetokounmpo	26.45	20475.0
Joel Embiid	26.41	14181.0

	mean	sum
player_name
Russell Westbrook	8.58	13532.0
Kyle Lowry	6.23	9276.0
Eric Bledsoe	4.98	7404.0
James Harden	8.71	6952.0
Kyrie Irving	5.46	6658.0

	mean	sum
player_name
Trae Young	9.38	4757.0
James Harden	8.71	6952.0
John Wall	8.64	6204.0
Russell Westbrook	8.58	13532.0
Tyrese Haliburton	8.57	2802.0

	mean	sum
player_name
JaVale McGee	52.96	146170.4
Mason Plumlee	57.75	134266.2
Andre Drummond	53.20	117955.2
Taurean Prince	40.98	100804.0
Doug McDermott	43.51	93841.5

	mean	sum
player_name
Kevin Durant	85.17	110896.6
Norman Powell	50.71	110602.5
Gordon Hayward	66.28	106380.6
Russell Westbrook	66.24	104525.6
Mason Plumlee	44.33	103078.5

	mean	sum
player_name
Taurean Prince	34.07	83812.8
Doug McDermott	34.46	74325.0
Norman Powell	32.36	70580.7
Evan Fournier	35.05	62773.5
Alec Burks	32.77	61352.7
Gordon Hayward	34.49	55351.2

	mean	sum
player_name
Andre Drummond	3.94	8742.0
Steven Adams	3.87	5160.0
Mason Plumlee	2.10	4878.0
JaVale McGee	1.39	3848.0
Domantas Sabonis	2.62	3528.0

	mean	sum
player_name
Andre Drummond	8.16	18099.0
Nikola Vucevic	8.20	12262.0
Mason Plumlee	4.74	11016.0
Russell Westbrook	6.59	10396.0
Domantas Sabonis	7.63	10276.0

	mean	sum
player_name
Andre Drummond	12.11	26841.0
Rudy Gobert	12.00	9447.0
Giannis Antetokounmpo	10.79	8349.0
Karl-Anthony Towns	10.74	7354.0
Anthony Davis	10.74	7369.0

	mean	sum
player_name
JaVale McGee	0.96	2648.0
Andre Drummond	1.12	2478.0
Mason Plumlee	0.80	1866.0
Kevin Durant	1.26	1640.0
Rudy Gobert	2.02	1586.0

	mean	sum
player_name
Selom Mawugbe	4.50	9.0
Victor Wembanyama	3.64	393.0
Walker Kessler	2.34	422.0
Chet Holmgren	2.32	255.0
Myles Turner	2.17	1499.0
Anthony Davis	2.15	1472.0

	mean	sum
player_name
DMitrik Trice	10.00	10.0
LeBron James	9.86	7521.0
Giannis Antetokounmpo	9.70	7505.0
Luka Doncic	9.64	4772.0
Kevin Durant	9.52	12392.0
Anthony Davis	9.11	6251.0

	mean	sum
player_name
Luka Doncic	20.58	10185.0
Donovan Mitchell	19.28	11493.0
Stephen Curry	18.98	13723.0
Damian Lillard	18.90	13588.0
LeBron James	18.84	14378.0

	mean	sum
player_name
Kevin Durant	6.47	8420.0
Stephen Curry	7.54	5453.0
Kyrie Irving	4.29	5234.0
Danny Green	3.85	5004.0
Kyle Lowry	3.26	4852.0
Steven Adams	3.48	4632.0
Draymond Green	5.81	4431.0
Pascal Siakam	2.92	4078.0
Nikola Jokic	4.84	4036.0
Jayson Tatum	5.75	3995.0

	mean	sum
player_name
Andre Drummond	1.29	2871.0
Russell Westbrook	1.50	2364.0
Eric Bledsoe	1.34	2001.0
Taurean Prince	0.80	1960.0
Kyle Lowry	1.28	1900.0

	mean	sum
player_name
Russell Westbrook	17.56	27712.0
Kevin Durant	18.35	23886.0
Nikola Vucevic	15.57	23294.0
Kyrie Irving	18.73	22846.0
Andre Drummond	9.81	21741.0

	sum
country
USA	949700.0
Canada	28047.0
Australia	22018.0
France	21078.0
Germany	20187.0
Cameroon	19487.0
Lithuania	15012.0
Montenegro	13888.0
Bahamas	11729.0
Croatia	11405.0
Turkey	10133.0
Serbia	8326.0
Slovenia	7689.0
Dominican Republic	7636.0
Japan	6688.0
United Kingdom	6671.0
Saint Lucia	5888.0
Greece	5868.0
Latvia	5702.0
Austria	4481.0
Bosnia and Herzegovina	4312.0
Nigeria	4090.0
Jamaica	3611.0
Spain	3479.0
Ukraine	3317.0
New Zealand	3040.0
Sudan	2261.0
DRC	1766.0
Angola	1463.0
South Sudan	1248.0
Italy	715.0
Brazil	577.0
Republic of the Congo	234.0
Argentina	105.0
Gabon	28.0
Mali	20.0

	player_name	min	game_id
season
2023	Cheick Diallo	10.0	12508
2023	Cheick Diallo	5.0	12514
2023	Cheick Diallo	5.0	12546

	player_name	assists
season
2017	Rajon Rondo	25.0
2018	Russell Westbrook	24.0
2020	Russell Westbrook	24.0
2018	Russell Westbrook	24.0
2020	Russell Westbrook	24.0
2023	Tyrese Haliburton	23.0
2016	Russell Westbrook	22.0
2016	Russell Westbrook	22.0
2024	Trae Young	22.0

	points	assists
count	374176.00	374176.00
mean	9.24	2.01
std	8.44	2.45
min	0.00	0.00
25%	2.00	0.00
50%	8.00	1.00
75%	14.00	3.00
max	73.00	25.00

	player_name	tpm
season
2018	Klay Thompson	14.0
2019	Zach LaVine	13.0
2016	Stephen Curry	13.0
2022	Damian Lillard	13.0
2020	Damian Lillard	12.0
2015	Stephen Curry	12.0

	assists	blocks	steals	fgm	fgp	ftm	ftp	totReb	tpm	tpp	points	min	plusMinus
player_name
James Harden	0.93	0.14	0.38	0.78	0.43	0.93	0.84	0.52	0.70	0.35	0.93	0.94	0.58
Luka Doncic	0.86	0.10	0.31	0.96	0.46	0.74	0.72	0.71	0.66	0.33	1.00	1.00	0.55
Kevin Durant	0.54	0.28	0.21	0.95	0.53	0.73	0.85	0.58	0.48	0.40	0.96	0.97	0.63
Stephen Curry	0.62	0.07	0.35	0.89	0.47	0.56	0.83	0.42	1.00	0.41	0.95	0.88	0.66
Joel Embiid	0.36	0.35	0.22	0.87	0.48	1.00	0.80	0.88	0.24	0.31	0.94	0.92	0.61
LeBron James	0.84	0.15	0.31	0.99	0.52	0.56	0.70	0.66	0.42	0.33	0.92	0.95	0.58
Giannis Antetokounmpo	0.57	0.29	0.29	0.97	0.55	0.79	0.67	0.89	0.16	0.23	0.94	0.91	0.59
Damian Lillard	0.72	0.07	0.25	0.83	0.43	0.80	0.87	0.36	0.73	0.35	0.93	0.96	0.55
Victor Wembanyama	0.40	0.81	0.29	0.81	0.47	0.49	0.77	0.86	0.49	0.32	0.79	0.82	0.50
Trae Young	1.00	0.03	0.26	0.78	0.42	0.81	0.82	0.29	0.57	0.33	0.87	0.94	0.49
Anthony Davis	0.29	0.48	0.32	0.91	0.51	0.74	0.75	0.89	0.14	0.22	0.88	0.93	0.54
Nikola Jokic	0.73	0.16	0.30	0.82	0.55	0.48	0.72	0.89	0.24	0.32	0.76	0.94	0.60
Kawhi Leonard	0.39	0.14	0.42	0.84	0.49	0.63	0.80	0.53	0.42	0.38	0.84	0.90	0.63
Jayson Tatum	0.40	0.15	0.28	0.80	0.45	0.58	0.75	0.61	0.55	0.36	0.82	0.99	0.62
Kyrie Irving	0.58	0.11	0.31	0.90	0.48	0.45	0.78	0.34	0.57	0.38	0.86	0.98	0.59
Paul George	0.43	0.09	0.42	0.77	0.43	0.57	0.76	0.53	0.65	0.38	0.81	0.90	0.58
Karl-Anthony Towns	0.33	0.27	0.19	0.81	0.52	0.55	0.78	0.89	0.37	0.37	0.79	0.89	0.53
Donovan Mitchell	0.49	0.07	0.34	0.86	0.44	0.51	0.75	0.35	0.65	0.36	0.86	0.94	0.58
Anthony Edwards	0.44	0.14	0.33	0.83	0.44	0.48	0.72	0.43	0.61	0.35	0.82	0.97	0.53

	player_name	team_name	active	years_pro	affiliation	college	country	age	rookie_year
0	Anthony Davis	New Orleans Pelicans	True	9	Kentucky/USA	Kentucky	USA	31	2012
2314	Anthony Davis	Los Angeles Lakers	True	9	Kentucky/USA	Kentucky	USA	31	2012
187	Damian Lillard	Portland Trail Blazers	True	9	Weber State/USA	Weber State	USA	34	2012
4308	Damian Lillard	Milwaukee Bucks	True	9	Weber State/USA	Weber State	USA	34	2012
1121	Jayson Tatum	Boston Celtics	True	4	Duke/USA	Duke	USA	26	2017
430	Kawhi Leonard	San Antonio Spurs	True	10	San Diego State/USA	San Diego State	USA	33	2011
1613	Kawhi Leonard	Toronto Raptors	True	10	San Diego State/USA	San Diego State	USA	33	2011
2463	Kawhi Leonard	LA Clippers	True	10	San Diego State/USA	San Diego State	USA	33	2011
279	Kyrie Irving	Cleveland Cavaliers	True	10	Duke/Australia	Duke	Australia	32	2011
1152	Kyrie Irving	Boston Celtics	True	10	Duke/Australia	Duke	Australia	32	2011
2411	Kyrie Irving	Brooklyn Nets	True	10	Duke/Australia	Duke	Australia	32	2011
3706	Kyrie Irving	Dallas Mavericks	True	10	Duke/Australia	Duke	Australia	32	2011
80	Stephen Curry	Golden State Warriors	True	12	Davidson/USA	Davidson	USA	36	2009

	assists	blocks	steals	fgm	fgp	ftm	ftp	totReb	tpm	tpp	points	min	plusMinus
player_name
Stephen Curry	0.620000	0.070000	0.350000	0.890000	0.470000	0.560000	0.830000	0.420000	1.000000	0.410000	0.950000	0.880000	0.660000
Damian Lillard	0.720000	0.070000	0.250000	0.830000	0.430000	0.800000	0.870000	0.360000	0.730000	0.350000	0.930000	0.960000	0.550000
Anthony Davis	0.290000	0.480000	0.320000	0.910000	0.510000	0.740000	0.750000	0.890000	0.140000	0.220000	0.880000	0.930000	0.540000
Kawhi Leonard	0.390000	0.140000	0.420000	0.840000	0.490000	0.630000	0.800000	0.530000	0.420000	0.380000	0.840000	0.900000	0.630000
Jayson Tatum	0.400000	0.150000	0.280000	0.800000	0.450000	0.580000	0.750000	0.610000	0.550000	0.360000	0.820000	0.990000	0.620000
Kyrie Irving	0.580000	0.110000	0.310000	0.900000	0.480000	0.450000	0.780000	0.340000	0.570000	0.380000	0.860000	0.980000	0.590000