top of page
Search

2021 Tokyo Olympics data analysis

  • Writer: Steve Kan
    Steve Kan
  • Sep 15, 2021
  • 1 min read

Updated: Sep 23, 2021

The following data analysis is conducted using 2021 Tokyo Olympics datasets posted on Kaggle by Arjun Prasad Sarkhel. This analysis provide insights into the Olympics that most don't know about.



ree

# Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot  as plt
import openpyxl
import xlrd
from textwrap import wrap
import os

# Reading data
path = "C:/Users/Stevie/Desktop/Coding/data/"

#List all files in the directory
medals = pd.read_excel(path+"Medals.xlsx",engine="openpyxl")
Athletes = pd.read_excel(path+"Athletes.xlsx",engine="openpyxl")
Coaches = pd.read_excel(path+"Coaches.xlsx",engine="openpyxl")
Teams = pd.read_excel(path+"Teams.xlsx",engine="openpyxl")
EntriesGender = pd.read_excel(path+"EntriesGender.xlsx",engine="openpyxl")

Studying the number of Olympics Medals obtained by top-10 countries:

df = medals.loc[medals["Rank"] <= 10]
df = df[["Team/NOC","Gold","Silver","Bronze"]]

countries = df1.iloc[:,0]
indx = np.arange(len(df))
df1.plot(kind='barh',stacked=True,figsize=(14,6))
countries = ['\n'.join(wrap(country,20)) for country in countries]
plt.yticks(indx,countries)
plt.xlabel("Number of Medals")
plt.ylabel("Countries")
plt.title("Number of Olympics Medals obtained by the Top-10 Countries")
plt.show()

# Total Olympic participants of each type of sport - 25% of the originally data
EntriesGender1 = EntriesGender.sample(frac=.25)
names = EntriesGender1["Discipline"].dropna()
values = EntriesGender1["Total"].dropna()

plt.figure(figsize=(14,6))
plt.pie(x=values,
        labels=names,
        autopct='%.1f%%',)
plt.title("Total Olympic participants of each type of sport")
plt.show()

EntriesGender.plot(kind="pie",label=label_p,figsize=(14,6))

ree

Female vs Male participants of each disipline:

df2 = EntriesGender[["Discipline","Female","Male","Total"]]
df2.sort_values("Total",inplace=True)
df2 = df2[["Discipline","Female","Male"]]
indx2 = np.arange(len(df2))
dis_name = df2.iloc[:,0]

df2.plot(kind="barh",stacked=True,figsize=(14,14)) #plotting
plt.yticks(indx2,dis_name)
plt.xlabel("Number of people")
plt.ylabel("Discipline")
plt.title("Male/Female participants of each discipline")
plt.show()

ree

Total number of participants of each discipline - 25% of the data

EntriesGender1 = EntriesGender.sample(frac=.25) #25%
names = EntriesGender1["Discipline"].dropna()
values = EntriesGender1["Total"].dropna()

plt.figure(figsize=(14,6))
plt.pie(x=values,
        labels=names,
        autopct='%.1f%%',)
plt.title("Total Olympic participants of each type of sport")
plt.show()

EntriesGender.plot(kind="pie",label=label_p,figsize=(14,6))

ree

- WORK IN PROGRESS


 
 
 

Comments


bottom of page