Indian Food Analysis 1

Download as pdf or txt
Download as pdf or txt
You are on page 1of 22

In [45]: import warnings

warnings.filterwarnings('ignore')

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [46]: import pandas as pd


data=pd.read_csv(r"C:\Users\yukta\OneDrive\Desktop\indian_food.csv")
food_raw_df=pd.DataFrame(data)
print(data)
name ingredients \
0 Balu shahi Maida flour, yogurt, oil, sugar
1 Boondi Gram flour, ghee, sugar
2 Gajar ka halwa Carrots, milk, sugar, ghee, cashews, raisins
3 Ghevar Flour, ghee, kewra, milk, clarified butter, su...
4 Gulab jamun Milk powder, plain flour, baking powder, ghee,...
.. ... ...
250 Til Pitha Glutinous rice, black sesame seeds, gur
251 Bebinca Coconut milk, egg yolks, clarified butter, all...
252 Shufta Cottage cheese, dry dates, dried rose petals, ...
253 Mawa Bati Milk powder, dry fruits, arrowroot powder, all...
254 Pinaca Brown rice, fennel seeds, grated coconut, blac...

diet prep_time cook_time flavor_profile course st


ate \
0 vegetarian 45 25.0 sweet dessert West Ben
gal
1 vegetarian 80 30.0 sweet dessert Rajast
han
2 vegetarian 15 60.0 sweet dessert Pun
jab
3 vegetarian 15 30.0 sweet dessert Rajast
han
4 vegetarian 15 40.0 sweet dessert West Ben
gal
.. ... ... ... ... ...
...
250 vegetarian 5 30.0 sweet dessert As
sam
251 vegetarian 20 60.0 sweet dessert
Goa
252 vegetarian NaN NaN sweet dessert Jammu & Kash
mir
253 vegetarian 20 45.0 sweet dessert Madhya Prad
esh
254 vegetarian NaN NaN sweet dessert
Goa

region
0 East
1 NaN
2 North
3 West
4 NaN
.. ...
250 North East
251 West
252 North
253 Central
254 West

[255 rows x 9 columns]

In [49]: food_raw_df.head()
Out[49]: name ingredients diet prep_time cook_time flavor_profile cou

Maida flour,
Balu
0 yogurt, oil, vegetarian 45 25.0 sweet des
shahi
sugar

Gram flour,
1 Boondi vegetarian 80 30.0 sweet des
ghee, sugar

Carrots,
Gajar milk, sugar,
2 ka ghee, vegetarian 15 60.0 sweet des
halwa cashews,
raisins

Flour, ghee,
kewra, milk,
3 Ghevar vegetarian 15 30.0 sweet des
clarified
butter, su...

Milk powder,
plain flour,
Gulab
4 baking vegetarian 15 40.0 sweet des
jamun
powder,
ghee,...

In [51]: food_raw_df.tail()

Out[51]: name ingredients diet prep_time cook_time flavor_profile

Glutinous
rice, black
250 Til Pitha vegetarian 5 30.0 sweet
sesame
seeds, gur

Coconut
milk, egg
251 Bebinca yolks, vegetarian 20 60.0 sweet
clarified
butter, all...

Cottage
cheese, dry
252 Shufta dates, dried vegetarian NaN NaN sweet
rose petals,
...

Milk powder,
Mawa dry fruits,
253 vegetarian 20 45.0 sweet
Bati arrowroot
powder, all...

Brown rice,
fennel
seeds,
254 Pinaca vegetarian NaN NaN sweet
grated
coconut,
blac...

In [15]: food_raw_df.shape

Out[15]: (255, 9)

In [17]: food_raw_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 255 entries, 0 to 254
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 name 255 non-null object
1 ingredients 255 non-null object
2 diet 255 non-null object
3 prep_time 226 non-null object
4 cook_time 227 non-null float64
5 flavor_profile 226 non-null object
6 course 255 non-null object
7 state 231 non-null object
8 region 239 non-null object
dtypes: float64(1), object(8)
memory usage: 18.1+ KB

In [207… food_raw_df.describe()

Out[207… cook_time

count 227.000000

mean 38.911894

std 49.421711

min 2.000000

25% 20.000000

50% 30.000000

75% 45.000000

max 720.000000

In [193… food_raw_df.dtypes

Out[193… name object


ingredients object
diet object
prep_time object
cook_time float64
flavor_profile object
course object
state object
region object
area object
dtype: object

In [195… food_raw_df.isna().sum()
Out[195… name 0
ingredients 0
diet 0
prep_time 29
cook_time 28
flavor_profile 29
course 0
state 24
region 16
area 0
dtype: int64

In [199… food_raw_df.columns

Out[199… Index(['name', 'ingredients', 'diet', 'prep_time', 'cook_time',


'flavor_profile', 'course', 'state', 'region', 'area'],
dtype='object')

In [53]: food_raw_df[food_raw_df.duplicated('ingredients')]

Out[53]: name ingredients diet prep_time cook_time flavor_profile

Gram flour,
10 Laddu vegetarian 10 40.0 sweet
ghee, sugar

Chhena,
25 Ledikeni vegetarian 45 45.0 sweet
sugar, ghee

Arbi ke
patte,
sesame
199 Patra vegetarian 10 40.0 spicy
seeds, gur,
bengal gram
...

In [31]: food_raw_df.loc[0:5,['name','prep_time','cook_time']]

Out[31]: name prep_time cook_time

0 Balu shahi 45 25.0

1 Boondi 80 30.0

2 Gajar ka halwa 15 60.0

3 Ghevar 15 30.0

4 Gulab jamun 15 40.0

5 Imarti 10 50.0

In [33]: food_raw_df.loc[3,['name','prep_time']]

Out[33]: name Ghevar


prep_time 15
Name: 3, dtype: object

In [35]: food_raw_df.iloc[0:3,1:5]
Out[35]: ingredients diet prep_time cook_time

0 Maida flour, yogurt, oil, sugar vegetarian 45 25.0

1 Gram flour, ghee, sugar vegetarian 80 30.0

Carrots, milk, sugar, ghee, cashews,


2 vegetarian 15 60.0
raisins

In [57]: food_raw_df['cook_time'].dropna().tail()

Out[57]: 247 50.0


249 20.0
250 30.0
251 60.0
253 45.0
Name: cook_time, dtype: float64

In [55]: food_raw_df[['state','region']].dropna()

Out[55]: state region

0 West Bengal East

2 Punjab North

3 Rajasthan West

5 West Bengal East

6 Uttar Pradesh North

... ... ...

250 Assam North East

251 Goa West

252 Jammu & Kashmir North

253 Madhya Pradesh Central

254 Goa West

228 rows × 2 columns

In [41]: food_raw_df[['state','region']].fillna("currentry unavailable")


Out[41]: state region

0 West Bengal East

1 Rajasthan currentry unavailable

2 Punjab North

3 Rajasthan West

4 West Bengal currentry unavailable

... ... ...

250 Assam North East

251 Goa West

252 Jammu & Kashmir North

253 Madhya Pradesh Central

254 Goa West

255 rows × 2 columns

In [43]: food_raw_df['area']=food_raw_df['state'].astype(str)+food_raw_df['region'
food_raw_df
Out[43]: name ingredients diet prep_time cook_time flavor_profile

Maida flour,
Balu
0 yogurt, oil, vegetarian 45 25.0 sweet
shahi
sugar

Gram flour,
1 Boondi vegetarian 80 30.0 sweet
ghee, sugar

Carrots,
Gajar milk, sugar,
2 ka ghee, vegetarian 15 60.0 sweet
halwa cashews,
raisins

Flour, ghee,
kewra, milk,
3 Ghevar vegetarian 15 30.0 sweet
clarified
butter, su...

Milk powder,
plain flour,
Gulab
4 baking vegetarian 15 40.0 sweet
jamun
powder,
ghee,...

... ... ... ... ... ... ...

Glutinous
rice, black
250 Til Pitha vegetarian 5 30.0 sweet
sesame
seeds, gur

Coconut
milk, egg
251 Bebinca yolks, vegetarian 20 60.0 sweet
clarified
butter, all...

Cottage
cheese, dry
252 Shufta dates, dried vegetarian NaN NaN sweet
rose petals,
...

Milk powder,
Mawa dry fruits,
253 vegetarian 20 45.0 sweet
Bati arrowroot
powder, all...

Brown rice,
fennel
seeds,
254 Pinaca vegetarian NaN NaN sweet
grated
coconut,
blac...

255 rows × 10 columns

In [45]: food_raw_df[food_raw_df.cook_time <= 25.0].count()


Out[45]: name 72
ingredients 72
diet 72
prep_time 72
cook_time 72
flavor_profile 59
course 72
state 61
region 63
area 72
dtype: int64

In [47]: food_raw_df[food_raw_df.cook_time <= 25.0].count().tail()

Out[47]: flavor_profile 59
course 72
state 61
region 63
area 72
dtype: int64

In [49]: food_raw_df.flavor_profile.unique()

Out[49]: array(['sweet', 'spicy', 'bitter', nan, 'sour'], dtype=object)

In [51]: food_raw_df.region.unique()

Out[51]: array(['East', nan, 'North', 'West', 'North East', 'South', 'Central'],


dtype=object)

In [61]: food_raw_df.state.nunique()

Out[61]: 24

In [99]: region_counts = food_raw_df.region.value_counts()


region_counts

Out[99]: region
West 73
South 59
North 49
East 30
North East 25
Central 3
Name: count, dtype: int64

In [213… food_raw_df['cook_time'].mean()

Out[213… 38.91189427312775

In [215… food_raw_df['cook_time'].min()

Out[215… 2.0

In [217… food_raw_df['cook_time'].max()

Out[217… 720.0
In [117… plt.figure(figsize=(12,6))
plt.title("Distribution of Dishes Across Different Regions of India")
plt.pie(region_counts, labels=region_counts.index, autopct='%1.1f%%', sta

In [121… top_states = food_raw_df.state.value_counts().head(15)


top_states

Out[121… state
Gujarat 35
Punjab 32
Maharashtra 30
West Bengal 24
Assam 21
Tamil Nadu 20
Andhra Pradesh 10
Uttar Pradesh 9
Kerala 8
Odisha 7
Karnataka 6
Rajasthan 6
Telangana 5
Bihar 3
Goa 3
Name: count, dtype: int64

In [135… sns.countplot(y=food_raw_df.flavor_profile)
plt.xticks(rotation=50);
plt.title("Flavor Profiles")plt.ylabel(None);
In [225… plt.figure(figsize=(16,8))
sns.distplot(food_raw_df['cook_time'],color='g')
plt.show()

In [229… food_raw_df[food_raw_df['cook_time']==2]

Out[229… name ingredients diet prep_time cook_time flavor_profile c

Kala chana,
mashed
Pani
109 potato, vegetarian 15 2.0 spicy
puri
boondi, sev,
lemon

In [231… food_raw_df[food_raw_df['cook_time']==720]
Out[231… name ingredients diet prep_time cook_time flavor_profile

Curd, sugar,
62 Shrikhand saffron, vegetarian 10 720.0 sweet
cardamom

In [235… food_raw_df['cook_time'].fillna(value=38.9,inplace=True)

In [239… food_raw_df.isna().sum()

Out[239… name 0
ingredients 0
diet 0
prep_time 29
cook_time 0
flavor_profile 29
course 0
state 24
region 16
area 0
dtype: int64

In [243… food_raw_df[food_raw_df['region'].isna()] #to see null values rows


Out[243… name ingredients diet prep_time cook_time flavor_profile

Gram flour,
1 Boondi vegetarian 80 30.0 swee
ghee, sugar

Milk powder,
plain flour,
Gulab
4 baking vegetarian 15 40.0 swee
jamun
powder,
ghee,...

Cashews,
ghee,
7 Kaju katli vegetarian 10 20.0 swee
cardamom,
sugar

Milk, rice,
9 Kheer sugar, dried vegetarian 10 40.0 swee
fruits

Gram flour,
10 Laddu vegetarian 10 40.0 swee
ghee, sugar

Refined
flour, besan,
12 Nankhatai ghee, vegetarian 20 30.0 swee
powdered
sugar, yo...

Moong dal,
green peas,
94 Khichdi ginger, vegetarian 40 20.0 spicy
tomato,
green c...

Rose syrup,
Kulfi falooda sev,
96 vegetarian 45 25.0 swee
falooda mixed nuts,
saffron, ...

Bottle gourd,
coconut oil,
Lauki ki
98 garam vegetarian 10 20.0 spicy
subji
masala,
ginge...

Kala chana,
mashed
109 Pani puri potato, vegetarian 15 2.0 spicy
boondi, sev,
lemon

Whole wheat
flour, musk
110 Panjeeri melon vegetarian 10 25.0 swee
seeds,
poppy see...

Urad dal,
sev, lemon
111 Papad juice, vegetarian 5 5.0 spicy
chopped
tomatoes

Potatoes,
green peas,
garam
117 Samosa vegetarian 30 30.0 spicy
masala,
ginger,
dough
name ingredients diet prep_time cook_time flavor_profile

Chana dal,
urad dal,
164 Upma ginger, curry vegetarian 10 20.0 spicy
leaves,
sugar

Brown rice,
Brown
231 soy sauce, vegetarian 15 25.0 NaN
Rice
olive oil

Red pepper,
red onion,
248 Red Rice butter, vegetarian NaN 38.9 NaN
watercress,
oli...

In [245… food_raw_df['region'].value_counts()

Out[245… region
West 73
South 59
North 49
East 30
North East 25
Central 3
Name: count, dtype: int64

In [247… food_raw_df[food_raw_df['flavor_profile'].isna()] #to see null values ro


Out[247… name ingredients diet prep_time cook_time flavor_profile

Whole wheat
flour, olive
78 Chapati oil, hot vegetarian 10 10.0 NaN
water, all
p...

Whole wheat
104 Naan flour, honey, vegetarian 60 30.0 NaN
butter, garlic

Garam
masala
powder,
116 Rongi vegetarian 10 30.0 NaN
tomato,
kasuri
methi, cin...

Carrot,
yellow
131 Kanji mustard, red vegetarian 10 45.0 NaN
chilli, black
salt

Coconut oil,
cucumber,
145 Pachadi curd, curry vegetarian 10 25.0 NaN
leaves,
mus...

Yogurt,
ginger, curry
146 Paniyaram leaves, vegetarian 10 20.0 NaN
baking soda,
gre...

Arhar dal,
sambar
Paruppu powder,
150 vegetarian 10 20.0 NaN
sadam tomato,
curry
leaves...

Urad dal,
lemon,
Puli
153 tamarind, vegetarian 10 20.0 NaN
sadam
cooked rice,
curry ...

Brown rice
flour, sugar,
155 Puttu vegetarian 495 40.0 NaN
grated
coconut

Thin rice
flakes, black
157 Sandige sesame vegetarian 120 60.0 NaN
seeds, curry
le...

Sevai,
parboiled
158 Sevai vegetarian 120 30.0 NaN
rice,
steamer
name ingredients diet prep_time cook_time flavor_profile

Urad dal,
curd,
Thayir
159 sesame oil, vegetarian 10 20.0 NaN
sadam
ginger, curry
leav...

Coconut,
whole red
beans,
160 Theeyal vegetarian 15 20.0 NaN
masala,
sesame oil,
...

Jowar flour,
171 Bhakri sesame vegetarian 20 25.0 NaN
seeds

Condensed
milk, nestle
Copra
176 cream, vegetarian 20 30.0 NaN
paak
coconut ice,
red...

Urad dal,
bhuna
chana,
179 Dahi vada vegetarian 30 30.0 NaN
garam
masala,
dates, ta...

Arhar dal,
coconut oil,
180 Dalithoy vegetarian 5 20.0 NaN
curry leaves,
mustard ...

Wheat flour,
189 Kansar cashews, vegetarian 10 40.0 NaN
rapeseed oil

Semolina,
clarified
216 Farsi Puri butter, oil, vegetarian NaN 38.9 NaN
white flour,
...

Raw papaya,
panch
phoran
222 Khar vegetarian 10 20.0 NaN
masala,
nigella
seeds...

Maida,
224 Luchi vegetarian 20 30.0 NaN
vegetable oil

Brinjal,
onions, salt,
Bengena
227 sesame vegetarian v 38.9 NaN
Pitika
seeds,
coriander

Potatoes,
garam
Bilahi masala, non
228 10 20.0 NaN
Maas tomatoes, vegetarian
mustard
oil,...
name ingredients diet prep_time cook_time flavor_profile

Forbidden
black rice,
non
229 Black rice chicken, NaN 38.9 NaN
vegetarian
olive oil,
sliv...

Brown rice,
Brown
231 soy sauce, vegetarian 15 25.0 NaN
Rice
olive oil

Coconut,
prawns,
Chingri curd, non
236 15 30.0 NaN
Bhape mustard vegetarian
seed, green
chili

Curd,
cooked rice,
244 Pakhala vegetarian NaN 38.9 NaN
curry leaves,
dry chilli

Tea leaves,
white
245 Pani Pitha sesame vegetarian 10 20.0 NaN
seeds, dry
coconut, s...

Red pepper,
red onion,
248 Red Rice butter, vegetarian NaN 38.9 NaN
watercress,
oli...

In [251… sns.countplot(food_raw_df['diet'])
food_raw_df['diet'].value_counts()

Out[251… diet
vegetarian 226
non vegetarian 29
Name: count, dtype: int64

In [257… sns.countplot(food_raw_df['flavor_profile'])
food_raw_df['flavor_profile'].value_counts()
plt.title("counts of flavour")

Out[257… Text(0.5, 1.0, 'counts of flavour')

In [259… sns.countplot(food_raw_df['course'])
food_raw_df['course'].value_counts()
plt.title("counts of course")

Out[259… Text(0.5, 1.0, 'counts of course')

In [263… plt.figure(figsize=(24,10))
sns.countplot(food_raw_df['state'])
food_raw_df['state'].value_counts()
Out[263… state
Gujarat 35
Punjab 32
Maharashtra 30
West Bengal 24
Assam 21
Tamil Nadu 20
Andhra Pradesh 10
Uttar Pradesh 9
Kerala 8
Odisha 7
Karnataka 6
Rajasthan 6
Telangana 5
Bihar 3
Goa 3
Manipur 2
Jammu & Kashmir 2
Madhya Pradesh 2
Uttarakhand 1
Tripura 1
Nagaland 1
NCT of Delhi 1
Chhattisgarh 1
Haryana 1
Name: count, dtype: int64

In [265… state_reciepies=food_raw_df.groupby('state')

In [267… state_reciepies['diet'].value_counts()
Out[267… state diet
Andhra Pradesh vegetarian 10
Assam vegetarian 11
non vegetarian 10
Bihar vegetarian 3
Chhattisgarh vegetarian 1
Goa vegetarian 2
non vegetarian 1
Gujarat vegetarian 35
Haryana vegetarian 1
Jammu & Kashmir vegetarian 2
Karnataka vegetarian 6
Kerala vegetarian 7
non vegetarian 1
Madhya Pradesh vegetarian 2
Maharashtra vegetarian 28
non vegetarian 2
Manipur non vegetarian 1
vegetarian 1
NCT of Delhi non vegetarian 1
Nagaland non vegetarian 1
Odisha vegetarian 7
Punjab vegetarian 28
non vegetarian 4
Rajasthan vegetarian 6
Tamil Nadu vegetarian 19
non vegetarian 1
Telangana vegetarian 4
non vegetarian 1
Tripura non vegetarian 1
Uttar Pradesh vegetarian 9
Uttarakhand vegetarian 1
West Bengal vegetarian 19
non vegetarian 5
Name: count, dtype: int64

In [271… food_raw_df[food_raw_df['state']=='Karnataka']
Out[271… name ingredients diet prep_time cook_time flavor_profile

Milk, Sugar,
Dharwad
40 Dharwadi vegetarian 20 60.0 swee
pedha
buffalo milk

Besan flour,
semolina,
Mysore
45 mung bean, vegetarian 5 20.0 swee
pak
jaggery,
coc...

Maida flour,
turmeric,
Obbattu
46 coconut, vegetarian 180 60.0 swee
holige
chickpeas,
jag...

Split pigeon
Bisi bele peas, chana
126 vegetarian 30 45.0 spic
bath dal, urad
dal, green ...

Moong dal,
cucumber,
140 Koshambri vegetarian 10 20.0 spic
curry leaves,
green chili...

Thin rice
flakes, black
157 Sandige sesame vegetarian 120 60.0 NaN
seeds, curry
le...

In [273… food_raw_df[food_raw_df['state']=='Telangana']

Out[273… name ingredients diet prep_time cook_time flavor_profile

Double
Loaf bread,
41 ka vegetarian 20 30.0 sweet
milk
meetha

Qubani
Apricots,
51 ka vegetarian 10 20.0 sweet
sugar syrup
meetha

Vermicelli
Sheer
52 pudding, vegetarian 10 20.0 sweet
korma
milk

Chicken
thighs,
non
75 Biryani basmati rice, 30 120.0 spicy
vegetarian
star anise,
swee...

Rose water,
Shahi milk, white
120 vegetarian 10 30.0 sweet
tukra bread slices,
saffron,...

In [309… plt.hist(food_raw_df.cook_time)
Out[309… (array([243., 11., 0., 0., 0., 0., 0., 0., 0., 1.]),
array([ 2. , 73.8, 145.6, 217.4, 289.2, 361. , 432.8, 504.6, 576.4,
648.2, 720. ]),
<BarContainer object of 10 artists>)

You might also like