Assignment - Ipynb - Colaboratory
Assignment - Ipynb - Colaboratory
Assignment - Ipynb - Colaboratory
ipynb - Colaboratory
Files
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
bin
boot
#Read the data:
content
d = pd.read_csv(r"/owid-covid-data.csv")
datalab
dev
df = pd.DataFrame(d)
etc
home
#View the Data:
lib
lib32
df.shape
lib64
(231176, 67) media
mnt
df.head() opt
proc
iso_code continent location date total_cas python-apt
2020- root
0 AFG Asia Afghanistan 5
02-24 run
2020- sbin
1 AFG Asia Afghanistan 5
02-25 srv
2020- sys
2 AFG Asia Afghanistan 5
02-26 tmp
tools
2020-
3 AFG Asia Afghanistan 5
02-27 usr
var
2020-
4 AFG Asia Afghanistan 5 NGC-DL-CONTAINER-LICE…
02-28
owid-covid-data.csv
5 rows × 67 columns
df.dtypes
iso_code object
continent object
location object
date object
total_cases float64
...
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 1/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
population float64
excess_mortality_cumulative_absolute float64
excess_mortality_cumulative float64
excess_mortality float64
excess_mortality_cumulative_per_million float64
df.describe(include = "all")
2021-
top MEX Europe Mexico
09-14
11 rows × 67 columns
df.columns
'new_cases_smoothed', 'total_deaths',
'new_deaths',
'new_deaths_smoothed',
'total_cases_per_million',
'new_cases_per_million',
'new_cases_smoothed_per_million',
'total_deaths_per_million',
'new_deaths_per_million',
'new_deaths_smoothed_per_million',
'reproduction_rate', 'icu_patients',
'icu_patients_per_million', 'hosp_patients',
'hosp_patients_per_million',
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 2/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
'weekly_icu_admissions',
'weekly_icu_admissions_per_million',
'weekly_hosp_admissions',
'weekly_hosp_admissions_per_million',
'total_tests', 'new_tests',
'total_tests_per_thousand',
'new_tests_per_thousand',
'new_tests_smoothed',
'new_tests_smoothed_per_thousand',
'positive_rate', 'tests_per_case',
'tests_units', 'total_vaccinations',
'people_vaccinated',
'people_fully_vaccinated', 'total_boosters',
'new_vaccinations',
'new_vaccinations_smoothed',
'total_vaccinations_per_hundred',
'people_vaccinated_per_hundred',
'people_fully_vaccinated_per_hundred',
'total_boosters_per_hundred',
'new_vaccinations_smoothed_per_million',
'new_people_vaccinated_smoothed',
'new_people_vaccinated_smoothed_per_hundred',
'stringency_index',
'population_density', 'median_age',
'aged_65_older', 'aged_70_older',
'gdp_per_capita', 'extreme_poverty',
'cardiovasc_death_rate',
'diabetes_prevalence', 'female_smokers',
'male_smokers',
'handwashing_facilities',
'hospital_beds_per_thousand',
'life_expectancy',
'human_development_index', 'population',
'excess_mortality_cumulative_absolute',
'excess_mortality_cumulative',
'excess_mortality',
'excess_mortality_cumulative_per_million'],
dtype='object')
#Dropping the column:
df.drop(['new_cases_smoothed','new_deaths_smoothed','new_c
# shape of table After dropping some columns
df.shape
(231176, 63)
#Renaming the column name:
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 3/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
#We can rename column name, row name, or index. In our dat
df.rename(columns = {'date': 'Date','location':'Country',
#List the continent name:
continent = list(df.Continent.unique())
continent
#simple imputer:
#Simple imputer helps with missing values in a dataset. In
imputer = SimpleImputer(strategy='constant')
df2 = pd.DataFrame(imputer.fit_transform(df),columns=df.co
df2
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 4/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
2020-
0 AFG Asia Afghanistan
02-24
2020-
1 AFG Asia Afghanistan
02-25
2020-
2 AFG Asia Afghanistan
02-26
df2.groupby(['Date','Country'])[['Date','Country','total_c
2020-
3 AFG Asia Afghanistan
02-27
df2
2020-
4 AFG Asia Afghanistan
ISO_code Continent Country 02-28
Date tota
... ... ... ... ...
2020-
0 AFG Asia Afghanistan
02-24
2022-
231171 ZWE Africa Zimbabwe 2
10-25
2020-
1 AFG Asia Afghanistan
02-25
2022-
231172 ZWE Africa Zimbabwe 2
10-26
2020-
2 AFG Asia Afghanistan
02-26
2022-
231173 ZWE Africa Zimbabwe 2
10-27
2020-
3 AFG Asia Afghanistan
02-27
2022-
231174 ZWE Africa Zimbabwe 2
10-28
2020-
4 AFG Asia Afghanistan
02-28
2022-
231175 ZWE Africa Zimbabwe 2
... ... ... ... 10-29
...
231176 rows × 63 columns 2022-
231171 ZWE Africa Zimbabwe 2
10-25
2022-
231172 ZWE Africa Zimbabwe 2
10-26
2022-
231173 ZWE Africa Zimbabwe 2
10-27
2022-
231174 ZWE Africa Zimbabwe 2
10-28
2022-
231175 ZWE Africa Zimbabwe 2
10-29
df3 = df2.groupby(['Date','Country'])[['Date','Country','t
df3 tail(10)
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 5/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
df3.tail(10)
2022-
231167 World 629985701.0 6588602.0
10-29
2022-
231168 Yemen 11939.0 2158.0
10-29
2022-
231169 Zambia 333674.0 4017.0
10-29
2022-
231170 Zimbabwe 257893.0 5606.0
10-29
2022-
231171 Austria missing_value missing_value
10-30
2022-
231172 Germany missing_value missing_value
10-30
2022-
231173 Israel missing_value missing_value
10-30
2022-
231174 Malaysia missing_value missing_value
10-30
2022-
231175 Russia missing_value missing_value
10-30
#change missing_value to 0
df3['total_cases'].replace({'missing_value':0},inplace=Tru
df3['total_deaths'].replace({'missing_value':0},inplace=Tr
df3['total_vaccinations'].replace({'missing_value':0},inpl
df3
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 6/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
2020-
0 Argentina 0.0 0.0
01-01
2020-
1 Mexico 0.0 0.0
01-01
2020-
2 Argentina 0.0 0.0
01-02
2020-
3 Mexico 0.0 0.0
01-02
2020-
4 Argentina 0.0 0.0
#total countries where total_deaths is greater than 100000
01-03
df4=df3[df3['total_deaths']>1000000]
... ... ... ... ...
2022-
231171 Austria 0.0 0.0
10-30
df4
2022-
231172 Germany 0.0 0.0
10-30
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 7/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
2020-
45117 World 29933149.0 1004634.0
09-16
#unique conuntries where total_deaths is greater than 1000
2020-
countries = df4['Country'].unique()
45345 World 30248717.0 1010285.0
09-17
print(len(countries))
2020-
print()45573 09-18 World 30575342.0 1016140.0
print("conuntry_deaths_greater_than_1000000 : ")
print() 2020-
45801 World 30868423.0 1021413.0
09-19
conuntry_deaths_greater_than_1000000 = list(df4['Country']
2020-
conuntry_deaths_greater_than_1000000
46029 World 31123528.0 1025401.0
09-20
10
... ... ... ... ...
conuntry_deaths_greater_than_1000000
2022- North :
2022-
'High income',
South
231132 64273975.0 1332168.0
10-29 income',
2022-
'South America',
United
231158 97447532.0 1070264.0
'Asia',
10-29 States
'Lower middle income',
Upper
'North America',
2022-
231159 middle 138798900.0 2497566.0
'European Union',
10-29
income
'United States']
2022-
231167 World 629985701.0 6588602.0
10-29
New Section
4770 rows × 5 columns
#plotting the trend
for idx in range(0, len(countries)):
C = df4[df4['Country']==countries[idx]].reset_index()
plt.scatter(np.arange(0, len(C)),C['total_cases'],color="
plt.scatter(np.arange(0, len(C)),C['total_deaths'],color=
plt.scatter(np.arange(0, len(C)),C['total_vaccinations'],
plt.title(countries[ idx])
plt.xlabel("Number of days since first suspect")
plt.ylabel("Number of cases")
plt.legend()
plt.show()
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 8/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 9/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 10/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
#group the countries
df5 = df4.groupby(['Country']) [['Country', 'total_cases',
df5
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 11/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
C = df5
plt.xlabel("Number of days since first suspect")
plt.ylabel("Number of cases")
plt.legend()
plt.show()
#analysis by date where total_deaths is greater than 10000
date = df4['Date'].unique()
len (date)
774
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 12/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
df6 = df4.groupby(['Date']) [[ 'Date', 'total_cases', 'tot
df6
#graph plotting by Date
C = df6
plt.scatter (np.arange(0,len (C)),C['total_cases'], color=
plt.scatter (np.arange(0,len (C)),C['total_deaths' ], colo
plt.title("World")
plt.xlabel("Number of days since first suspect")
plt.ylabel("Number of cases")
plt.legend()
plt.show()
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 13/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 14/14