In [None]:
goals_for = { \
    "Bodø/Glimt":   48, \
    "Viking":       43, \
    "Tromsø":       27, \
    "Molde":        41, \
    "Brann":        35, \
    "Lillestrøm":   36, \
    "Sarpsborg":    33, \
    "Odd":          21, \
    "Rosenborg":    23, \
    "Strømsgodset": 23, \
    "HamKam":       25, \
    "Haugesund":    18, \
    "Sandefjord":   27, \
    "Vålerenga":    22, \
    "Stabæk":       17, \
    "Aalesund":     13  \
}

goals_against = { \
    "Bodø/Glimt":   22, \
    "Viking":       25, \
    "Tromsø":       15, \
    "Molde":        21, \
    "Brann":        24, \
    "Lillestrøm":   30, \
    "Sarpsborg":    33, \
    "Odd":          22, \
    "Rosenborg":    28, \
    "Strømsgodset": 25, \
    "HamKam":       39, \
    "Haugesund":    29, \
    "Sandefjord":   36, \
    "Vålerenga":    30, \
    "Stabæk":       30, \
    "Aalesund":     43  \
}

draws = { \
    "Bodø/Glimt":   2, \
    "Viking":       2, \
    "Tromsø":       3, \
    "Molde":        4, \
    "Brann":        3, \
    "Lillestrøm":   2, \
    "Sarpsborg":    3, \
    "Odd":          4, \
    "Rosenborg":    4, \
    "Strømsgodset": 2, \
    "HamKam":       1, \
    "Haugesund":    4, \
    "Sandefjord":   5, \
    "Vålerenga":    4, \
    "Stabæk":       4, \
    "Aalesund":     1  \
}


In [None]:
import numpy as np

# goals for, raw values and centered
goals_for_array = np.asarray([goals_for[club] for club in goals_for])
goals_for_array_ctr = np.asarray([goals_for[club] - sum(goals_for_array)/len(goals_for_array) \
                                  for club in goals_for])
print("goals for:\t", goals_for_array, "\n\t\t", goals_for_array_ctr)

# goals against, raw values and centered
goals_against_array = np.asarray([goals_against[club] for club in goals_for])
goals_against_array_ctr = np.asarray([goals_against[club] - sum(goals_against_array)/len(goals_against_array) \
                                  for club in goals_for])
print("goals against:\t", goals_against_array, "\n\t\t", goals_against_array_ctr)

# goal difference, raw values and centered
goal_diff_array = np.array([goals_for[club] - goals_against[club] for club in goals_for])
goal_diff_array_ctr = np.asarray([goals_for[club] - goals_against[club] \
                                  - sum(goal_diff_array)/len(goal_diff_array) \
                                  for club in goals_for])
print("goal balance:\t", goal_diff_array, "\n\t\t", goal_diff_array_ctr)

# number of draws, raw values and centered
draws_array = np.asarray([draws[club] for club in goals_for])
draws_array_ctr = np.asarray([draws[club] - sum(draws_array)/len(draws_array) \
                                  for club in goals_for])
print("games drawn:\t", draws_array, "\n\t\t", draws_array_ctr)

In [None]:
# is there a significant correlation between the number of goals scored and the number of goals received?
#
import statsmodels.api as sm

goals_against_vs_goals_for = sm.OLS(goals_for_array_ctr, goals_against_array_ctr).fit()

print("Fit goals against (x) vs. goals for (y):\n", goals_against_vs_goals_for.summary())

In [None]:
# same as above, but with the uncentered data and a constant term
#
import statsmodels.api as sm

goals_against_vs_goals_for_unc = sm.OLS(goals_for_array, sm.add_constant(goals_against_array)).fit()

print("Fit goals against (x) vs. goals for (y):\n", goals_against_vs_goals_for_unc.summary())

In [None]:
import seaborn as sbn
import matplotlib.pyplot as plt

# graphical linear regression using seaborn
#
fig, ax = plt.subplots()
fig.set_size_inches(13, 8)
plt.xticks(fontsize=18, color="#322300")
plt.yticks(fontsize=18, color="#322300")
ax.set_xlabel("goals against", fontsize=24, color="#322300")
ax.set_ylabel("goals for", fontsize=24, color="#322300")

sbn.regplot(x=goals_against_array, y=goals_for_array, color='#002855', order=1, \
            scatter_kws={'s':50}, ci=None, truncate=False)

ax.set(xlim=(0, 50))
ax.set(ylim=(0, 50))