Assignment - Ipynb - Colaboratory

Download as pdf or txt
Download as pdf or txt
You are on page 1of 14

11/1/22, 12:48 PM assignment.

ipynb - Colaboratory

Files
import numpy as np

import pandas as pd

import matplotlib.pyplot  as plt

from sklearn.impute  import SimpleImputer

bin
boot
#Read the data:

content
d = pd.read_csv(r"/owid-covid-data.csv")
datalab
dev
df =  pd.DataFrame(d)
etc
home
#View the Data:
lib
lib32
df.shape

lib64
(231176, 67) media
mnt
df.head() opt
proc
iso_code continent location date total_cas python-apt

2020- root
0 AFG Asia Afghanistan 5
02-24 run

2020- sbin
1 AFG Asia Afghanistan 5
02-25 srv

2020- sys
2 AFG Asia Afghanistan 5
02-26 tmp
tools
2020-
3 AFG Asia Afghanistan 5
02-27 usr
var
2020-
4 AFG Asia Afghanistan 5 NGC-DL-CONTAINER-LICE…
02-28
owid-covid-data.csv
5 rows × 67 columns

df.dtypes

iso_code object

continent object

location object

date object

total_cases float64

...

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 1/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

population float64

excess_mortality_cumulative_absolute float64

excess_mortality_cumulative float64

excess_mortality float64

excess_mortality_cumulative_per_million float64

Length: 67, dtype: object

df.describe(include = "all")

iso_code continent location date tota

count 231176 218126 231176 231176 2.180

unique 248 6 248 1034

2021-
top MEX Europe Mexico
09-14

freq 1033 52934 1033 247

mean NaN NaN NaN NaN 4.479

std NaN NaN NaN NaN 2.798

min NaN NaN NaN NaN 1.000

25% NaN NaN NaN NaN 4.427

50% NaN NaN NaN NaN 4.975

75% NaN NaN NaN NaN 5.259

max NaN NaN NaN NaN 6.299

11 rows × 67 columns

df.columns

Index(['iso_code', 'continent', 'location', 'date',


'total_cases', 'new_cases',

'new_cases_smoothed', 'total_deaths',
'new_deaths',

'new_deaths_smoothed',
'total_cases_per_million',

'new_cases_per_million',
'new_cases_smoothed_per_million',
'total_deaths_per_million',
'new_deaths_per_million',

'new_deaths_smoothed_per_million',
'reproduction_rate', 'icu_patients',

'icu_patients_per_million', 'hosp_patients',

'hosp_patients_per_million',
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 2/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

'weekly_icu_admissions',

'weekly_icu_admissions_per_million',
'weekly_hosp_admissions',

'weekly_hosp_admissions_per_million',
'total_tests', 'new_tests',

'total_tests_per_thousand',
'new_tests_per_thousand',

'new_tests_smoothed',
'new_tests_smoothed_per_thousand',

'positive_rate', 'tests_per_case',
'tests_units', 'total_vaccinations',

'people_vaccinated',
'people_fully_vaccinated', 'total_boosters',

'new_vaccinations',
'new_vaccinations_smoothed',

'total_vaccinations_per_hundred',
'people_vaccinated_per_hundred',

'people_fully_vaccinated_per_hundred',
'total_boosters_per_hundred',

'new_vaccinations_smoothed_per_million',

'new_people_vaccinated_smoothed',

'new_people_vaccinated_smoothed_per_hundred',
'stringency_index',

'population_density', 'median_age',
'aged_65_older', 'aged_70_older',
'gdp_per_capita', 'extreme_poverty',
'cardiovasc_death_rate',

'diabetes_prevalence', 'female_smokers',
'male_smokers',

'handwashing_facilities',
'hospital_beds_per_thousand',

'life_expectancy',
'human_development_index', 'population',

'excess_mortality_cumulative_absolute',
'excess_mortality_cumulative',

'excess_mortality',
'excess_mortality_cumulative_per_million'],

dtype='object')

#Dropping the column:

df.drop(['new_cases_smoothed','new_deaths_smoothed','new_c

# shape of table After dropping  some columns

df.shape

(231176, 63)

#Renaming the column name:

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 3/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

#We can rename column name, row name, or index. In our dat

df.rename(columns = {'date': 'Date','location':'Country',

#List the continent name:

continent =  list(df.Continent.unique())

continent

['Asia', nan, 'Europe', 'Africa', 'North America',


'South America', 'Oceania']

#simple imputer:
#Simple imputer helps with missing values in a dataset. In

imputer = SimpleImputer(strategy='constant')

df2 = pd.DataFrame(imputer.fit_transform(df),columns=df.co

df2

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 4/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

ISO_code Continent Country Date tota

2020-
0 AFG Asia Afghanistan
02-24

2020-
1 AFG Asia Afghanistan
02-25

2020-
2 AFG Asia Afghanistan
02-26
df2.groupby(['Date','Country'])[['Date','Country','total_c
2020-
3 AFG Asia Afghanistan
02-27
df2
2020-
4 AFG Asia Afghanistan
ISO_code Continent Country 02-28
Date tota
... ... ... ... ...
2020-
0 AFG Asia Afghanistan
02-24
2022-
231171 ZWE Africa Zimbabwe 2
10-25
2020-
1 AFG Asia Afghanistan
02-25
2022-
231172 ZWE Africa Zimbabwe 2
10-26
2020-
2 AFG Asia Afghanistan
02-26
2022-
231173 ZWE Africa Zimbabwe 2
10-27
2020-
3 AFG Asia Afghanistan
02-27
2022-
231174 ZWE Africa Zimbabwe 2
10-28
2020-
4 AFG Asia Afghanistan
02-28
2022-
231175 ZWE Africa Zimbabwe 2
... ... ... ... 10-29
...
231176 rows × 63 columns 2022-
231171 ZWE Africa Zimbabwe 2
10-25

2022-
231172 ZWE Africa Zimbabwe 2
10-26

2022-
231173 ZWE Africa Zimbabwe 2
10-27

2022-
231174 ZWE Africa Zimbabwe 2
10-28

2022-
231175 ZWE Africa Zimbabwe 2
10-29

231176 rows × 63 columns

df3 = df2.groupby(['Date','Country'])[['Date','Country','t

df3 tail(10)
https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 5/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory
df3.tail(10)

Date Country total_cases total_deaths

2022- Wallis and


231166 761.0 7.0
10-29 Futuna

2022-
231167 World 629985701.0 6588602.0
10-29

2022-
231168 Yemen 11939.0 2158.0
10-29

2022-
231169 Zambia 333674.0 4017.0
10-29

2022-
231170 Zimbabwe 257893.0 5606.0
10-29

2022-
231171 Austria missing_value missing_value
10-30

2022-
231172 Germany missing_value missing_value
10-30

2022-
231173 Israel missing_value missing_value
10-30

2022-
231174 Malaysia missing_value missing_value
10-30

2022-
231175 Russia missing_value missing_value
10-30

#change missing_value to 0

df3['total_cases'].replace({'missing_value':0},inplace=Tru
df3['total_deaths'].replace({'missing_value':0},inplace=Tr
df3['total_vaccinations'].replace({'missing_value':0},inpl
df3

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 6/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

Date Country total_cases total_deaths

2020-
0 Argentina 0.0 0.0
01-01

2020-
1 Mexico 0.0 0.0
01-01

2020-
2 Argentina 0.0 0.0
01-02

2020-
3 Mexico 0.0 0.0
01-02

2020-
4 Argentina 0.0 0.0
#total countries where total_deaths is greater than 100000
01-03

df4=df3[df3['total_deaths']>1000000]
... ... ... ... ...

2022-
231171 Austria 0.0 0.0
10-30
df4
2022-
231172 Germany 0.0 0.0
10-30

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 7/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

Date Country total_cases total_deaths t

2020-
45117 World 29933149.0 1004634.0
09-16
#unique conuntries where total_deaths is greater than 1000
2020-
countries = df4['Country'].unique()
45345 World 30248717.0 1010285.0
09-17
print(len(countries))
2020-
print()45573 09-18 World 30575342.0 1016140.0
print("conuntry_deaths_greater_than_1000000 : ")
print() 2020-
45801 World 30868423.0 1021413.0
09-19
conuntry_deaths_greater_than_1000000 = list(df4['Country']
2020-
conuntry_deaths_greater_than_1000000
46029 World 31123528.0 1025401.0
09-20
10
... ... ... ... ...

conuntry_deaths_greater_than_1000000
2022- North :

231093 115473140.0 1525509.0


10-29 America
['World',

2022-
'High income',
South
231132 64273975.0 1332168.0
10-29 income',

'Upper middle America


'Europe',

2022-
'South America',
United
231158 97447532.0 1070264.0
'Asia',
10-29 States
'Lower middle income',

Upper
'North America',

2022-
231159 middle 138798900.0 2497566.0
'European Union',

10-29
income
'United States']
2022-
231167 World 629985701.0 6588602.0
10-29

New Section
4770 rows × 5 columns

#plotting the trend
for idx in range(0, len(countries)):
 C = df4[df4['Country']==countries[idx]].reset_index()
 plt.scatter(np.arange(0, len(C)),C['total_cases'],color="
 plt.scatter(np.arange(0, len(C)),C['total_deaths'],color=
 plt.scatter(np.arange(0, len(C)),C['total_vaccinations'],
 plt.title(countries[ idx])
 plt.xlabel("Number of days since first suspect")
 plt.ylabel("Number of cases")
 plt.legend()
 plt.show()

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 8/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 9/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 10/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

#group the countries

df5 = df4.groupby(['Country']) [['Country', 'total_cases',
df5

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 11/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

Country total_cases total_deaths

0 Asia 5.521382e+10 5.806319e+08

1 Europe 7.241052e+10 8.742173e+08

2 European Union 3.696045e+10 2.760102e+08

3 High income 1.213026e+11 1.303327e+09

4 Lower middle income 3.455630e+10 5.294324e+08

5 North America 3.665789e+10 5.602120e+08

6 South America 2.429292e+10 6.009055e+08

7 United States 1.583746e+10 1.799059e+08

C = df5

8 Upper middle income 5.491293e+10 1.225825e+09


plt.scatter (np.arange(0,len (C)),C['total_cases'], color=
9 World 2.286620e+11 3.447812e+09
plt.scatter(np.arange(0,len (C)),C['total_deaths'], color=
plt.title("World")

plt.xlabel("Number of days since first suspect")

plt.ylabel("Number of cases")

plt.legend()

plt.show()

#analysis by date where total_deaths is greater than 10000

date = df4['Date'].unique()
len (date)

774

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 12/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

df6 = df4.groupby(['Date']) [[ 'Date', 'total_cases', 'tot
df6

Date total_cases total_deaths

0 2020-09-16 2.993315e+07 1004634.0

1 2020-09-17 3.024872e+07 1010285.0

2 2020-09-18 3.057534e+07 1016140.0

3 2020-09-19 3.086842e+07 1021413.0

4 2020-09-20 3.112353e+07 1025401.0

... ... ... ...

769 2022-10-25 2.128946e+09 21647461.0

770 2022-10-26 2.130554e+09 21656263.0

771 2022-10-27 2.132099e+09 21665448.0

772 2022-10-28 2.133303e+09 21670280.0

773 2022-10-29 2.133942e+09 21672141.0

774 rows × 3 columns

#graph plotting by Date
C = df6
plt.scatter (np.arange(0,len (C)),C['total_cases'], color=
plt.scatter (np.arange(0,len (C)),C['total_deaths' ], colo
plt.title("World")
plt.xlabel("Number of days since first suspect")
plt.ylabel("Number of cases")
plt.legend()
plt.show()

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 13/14
11/1/22, 12:48 PM assignment.ipynb - Colaboratory

Disk 85.10 GB available

Colab paid products


-
Cancel contracts here

check 0s completed at 12:36 PM

https://colab.research.google.com/drive/1AJFCKFDfnSSH-YAv3_nCI5B9eA7zmP7v#scrollTo=32WSiJI7AmH2&printMode=true 14/14

You might also like