3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
~import libraries
In [1]: import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
%matplotlib inline
sns.set()
In [2]: Hr = pd.read_csv(r'C:\Users\compucity\Downloads\Book1.csv')
Hr.head()
Out[2]:
E_ID Name Age Address Telephone Salary Department Hire Date
0 11 Aleya 46 Cairo 4218483 3000 Account 24-02-03
1 9 Hassan 25 Cairo 3578283 2000 Sales 21-01-06
2 15 Ramy 57 Alex 3674313 5000 Computer 21-03-00
3 18 Ola 28 Milan 4186473 5000 Sales 04-02-07
4 22 Zeiad 29 Milan 3642303 2000 Sales 01-03-98
task of Data
1- Find the number of employees in each governorate
2- Find the number of employees in each Department
3- Average age of employees in each department
4- Average Salary of employees in each department
5- Retrieving the data of employees who work in the computer department only,
as well as the rest of the employees in other departments
6- Find the number of employees in each department who work in Cairo Governorate only
7- Search for the employee who receives the highest salary and retrieve his complete data+
8- Number of employees by department in each governorate
9- Bonus ... Based on Hire Date
Hire Date1 >= -1-2005 5% of Salary
1-1-2003 10%
1-1-2000 15%
1-1-1995 20%
1-1-1990 25%
Else 30%
Based on Hire Date
10 - Find some suitable graph for the data
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 1/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [4]: Hr.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 E_ID 24 non-null int64
1 Name 24 non-null object
2 Age 24 non-null int64
3 Address 24 non-null object
4 Telephone 24 non-null int64
5 Salary 24 non-null int64
6 Department 24 non-null object
7 Hire Date 24 non-null object
dtypes: int64(4), object(4)
memory usage: 1.6+ KB
In [5]: Hr.dtypes
Out[5]: E_ID int64
Name object
Age int64
Address object
Telephone int64
Salary int64
Department object
Hire Date object
dtype: object
In [6]: # Total isnull data
Hr.isnull().sum()
Out[6]: E_ID 0
Name 0
Age 0
Address 0
Telephone 0
Salary 0
Department 0
Hire Date 0
dtype: int64
In [7]: # duplicated
Hr.duplicated().sum()
Out[7]: 0
In [8]: Hr['Address'].value_counts()
Out[8]: Cairo 8
Alex 6
Giza 5
Milan 2
Alexandria 1
Alixandria 1
milan 1
Name: Address, dtype: int64
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 2/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [9]: Hr['Department'].value_counts()
Out[9]: Sales 9
Computer 8
Account 7
Name: Department, dtype: int64
In [10]: # AVERAGE Age
Hr.groupby('Department')['Age'].mean()
Out[10]: Department
Account 46.142857
Computer 41.625000
Sales 33.222222
Name: Age, dtype: float64
In [11]: # AVERAGE Salary of Department
Hr.groupby('Department')['Salary'].mean().round()
Out[11]: Department
Account 2143.0
Computer 4375.0
Sales 2778.0
Name: Salary, dtype: float64
In [12]: Hr[Hr['Department']== "Computer"]
Out[12]: E_ID Name Age Address Telephone Salary Department Hire Date
2 15 Ramy 57 Alex 3674313 5000 Computer 21-03-00
5 12 Salwa 34 Alexandria 4090443 4000 Computer 06-12-05
8 27 Yousef 46 Alixandria 3706323 7000 Computer 10-04-03
16 58 Neveen 43 Alex 3834363 4000 Computer 29-06-03
19 61 Yasser 37 Cairo 3962403 3000 Computer 17-09-06
21 79 Maged 33 Cairo 3930393 6000 Computer 28-08-98
22 90 Ahmed 28 Cairo 4250493 4000 Computer 16-03-90
23 94 Dina 55 Cairo 4282503 2000 Computer 05-04-99
In [13]: Hr[Hr['Department']== "Sales"]
Out[13]: E_ID Name Age Address Telephone Salary Department Hire Date
1 9 Hassan 25 Cairo 3578283 2000 Sales 21-01-06
3 18 Ola 28 Milan 4186473 5000 Sales 04-02-07
4 22 Zeiad 29 Milan 3642303 2000 Sales 01-03-98
6 24 Ali 24 Giza 4154463 1000 Sales 15-01-91
7 25 Tahany 39 Alex 3546273 3000 Sales 01-01-00
10 35 Mahmoud 57 Giza 3610293 4000 Sales 10-02-99
12 48 Wagdy 24 Cairo 4058433 5000 Sales 16-11-07
14 55 Samah 38 milan 4026423 1000 Sales 27-10-01
15 57 Rawan 35 Alex 3994413 2000 Sales 07-10-02
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 3/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [14]: Hr[Hr['Department']== "Account"]
Out[14]: E_ID Name Age Address Telephone Salary Department Hire Date
0 11 Aleya 46 Cairo 4218483 3000 Account 24-02-03
9 29 Khaled 29 Alex 3770343 3000 Account 20-05-01
11 47 Talaat 48 Giza 3866373 3000 Account 19-07-01
13 54 Samy 55 Giza 3738333 1000 Account 30-04-04
17 62 Amr 44 Cairo 4122453 2000 Account 26-12-95
18 65 Hala 53 Alex 3802353 2000 Account 09-06-90
20 71 Radwa 48 Giza 3898383 1000 Account 08-08-05
In [15]: # Find employees who work in Alexandria in the computer department
Hr[Hr['Department']== "Computer"]['Address']== "Alex"
Out[15]: 2 True
5 False
8 False
16 True
19 False
21 False
22 False
23 False
Name: Address, dtype: bool
In [16]: # Find the number of employees in each department who work in Cairo Governorate only
Hr[Hr['Address']=='Cairo']['Department'].value_counts()
Out[16]: Computer 4
Account 2
Sales 2
Name: Department, dtype: int64
In [36]: # Search for the employee who receives the highest salary and retrieve his complete data
Hr[Hr['Salary']==Hr['Salary']].max()[['Name']+['Address']+['Department']+['Age']+['Salary']
Out[36]: Name Zeiad
Address milan
Department Sales
Age 57
Salary 7000
dtype: object
In [37]: # Search for the employee who receives the min salary and retrieve his complete data
Hr[Hr['Salary']==Hr['Salary']].min()[['Name']+['Address']+['Department']+['Age']+['Salary']
Out[37]: Name Ahmed
Address Alex
Department Account
Age 24
Salary 1000
dtype: object
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 4/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [18]: # Number of employees by department in each governorate
Hr.groupby('Address')['Department'].value_counts()
Out[18]: Address Department
Alex Account 2
Computer 2
Sales 2
Alexandria Computer 1
Alixandria Computer 1
Cairo Computer 4
Account 2
Sales 2
Giza Account 3
Sales 2
Milan Sales 2
milan Sales 1
Name: Department, dtype: int64
#Two different ways to find the solution and how to deal with history to do the calculation
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 5/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [14]: def an (Bonus):
#for r in range(len(Hr['Hire Date'])):
hire_date = datetime.datetime.strptime(Hr['Hire Date'], '%d-%m-%y')
if hire_date >= datetime.datetime(2005, 1, 1):
Hr['Bonus'] = 5/100 * Hr['Salary']
elif hire_date >= datetime.datetime(2003, 1, 1):
Hr['Bonus']= 10/100 * Hr['Salary']
elif hire_date >= datetime.datetime(2000, 1, 1):
Hr['Bonus'] = 15/100 * Hr['Salary']
elif hire_date >= datetime.datetime(1995, 1, 1):
Hr['Bonus'] = 20/100 * Hr['Salary']
elif hire_date >= datetime.datetime(1990, 1, 1):
Hr['Bonus'] = 25/100 * Hr['Salary']
else :
Hr['Bonus']= 30/100 * Hr['Salary']
#Hr['Bonus'] = Hr.apply(an, axis=1)
print(Hr['Bonus'])
0 300.0
1 100.0
2 750.0
3 250.0
4 400.0
5 200.0
6 250.0
7 450.0
8 700.0
9 450.0
10 800.0
11 450.0
12 250.0
13 100.0
14 150.0
15 300.0
16 400.0
17 400.0
18 500.0
19 150.0
20 50.0
21 1200.0
22 1000.0
23 400.0
Name: Bonus, dtype: float64
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 6/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [7]: def an(row):
hire_date = datetime.datetime.strptime(row['Hire Date'], '%d-%m-%y')
if hire_date >= datetime.datetime(2005, 1, 1):
return 5/100 * row['Salary']
elif hire_date >= datetime.datetime(2003, 1, 1):
return 10/100 * row['Salary']
elif hire_date >= datetime.datetime(2000, 1, 1):
return 15/100 * row['Salary']
elif hire_date >= datetime.datetime(1995, 1, 1):
return 20/100 * row['Salary']
elif hire_date >= datetime.datetime(1990, 1, 1):
return 25/100 * row['Salary']
else:
return 30/100 * row['Salary']
Hr['Bonus'] = Hr.apply(an, axis=1)
print(Hr['Bonus'])
0 300.0
1 100.0
2 750.0
3 250.0
4 400.0
5 200.0
6 250.0
7 450.0
8 700.0
9 450.0
10 800.0
11 450.0
12 250.0
13 100.0
14 150.0
15 300.0
16 400.0
17 400.0
18 500.0
19 150.0
20 50.0
21 1200.0
22 1000.0
23 400.0
Name: Bonus, dtype: float64
In [9]: # Spreadsheet after adding the increment column
Hr.head()
Out[9]:
E_ID Name Age Address Telephone Salary Department Hire Date Bonus
0 11 Aleya 46 Cairo 4218483 3000 Account 24-02-03 300.0
1 9 Hassan 25 Cairo 3578283 2000 Sales 21-01-06 100.0
2 15 Ramy 57 Alex 3674313 5000 Computer 21-03-00 750.0
3 18 Ola 28 Milan 4186473 5000 Sales 04-02-07 250.0
4 22 Zeiad 29 Milan 3642303 2000 Sales 01-03-98 400.0
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 7/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [30]: # boxplot mean of age
sns.boxplot(x=Hr['Age'],palette='YlGn')
Out[30]: <AxesSubplot:xlabel='Age'>
In [18]: xa = sns.countplot(x=Hr['Department'],palette='PuBu')
for bar in xa.containers:
xa.bar_label(bar)
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 8/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [22]: xa = sns.countplot(x=Hr['Address'],palette='Set1')
for bar in xa.containers:
xa.bar_label(bar)
In [19]: plt.figure(figsize=(10,5))
sns.countplot(x='Department',hue='Address',data=Hr,palette='hsv')
Out[19]: <AxesSubplot:xlabel='Department', ylabel='count'>
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 9/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [95]: # Age with Department
#Comparing ages with salaries in different governorates
plt.figure(figsize=(8,5))
sns.boxplot(x=Hr['Address'],hue=Hr['Department'],y=Hr['Age'],palette='Set1')
Out[95]: <AxesSubplot:xlabel='Address', ylabel='Age'>
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 10/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [96]: #Compare ages with salaries in different departments
sns.jointplot(x='Salary',y='Age',hue='Department',data=Hr)
Out[96]: <seaborn.axisgrid.JointGrid at 0x2043cd52d30>
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 11/12
3/27/24, 11:20 PM Untitled11 - Jupyter Notebook
In [41]: #Compare ages with salaries in different Address
sns.jointplot(x='Salary',y='Age',hue='Address',data=Hr)
Out[41]: <seaborn.axisgrid.JointGrid at 0x1a1bade56d0>
In [ ]:
localhost:8889/notebooks/mahmoud1/Untitled11.ipynb 12/12