Exploring Factors Influencing Mood Swings in Women: A Comprehensive Analysis
Exploring Factors Influencing Mood Swings in Women: A Comprehensive Analysis
Exploring Factors Influencing Mood Swings in Women: A Comprehensive Analysis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('women_mood_swings_dataset.csv')
df.head()
Positive_Interaction_Frequency Weekly_Self_Care_Activities \
0 3 1
1 10 2
2 9 4
3 0 3
4 1 4
Mood_Tracking_App_Usage
0 No
1 No
2 Yes
3 No
4 No
[5 rows x 32 columns]
df.tail()
Participant_ID Age Relationship_Role Relationship_Duration
Mood_Level \
4995 P4996 33 Girlfriend 16.6
6
4996 P4997 37 Wife 16.8
10
4997 P4998 46 Girlfriend 11.9
4
4998 P4999 20 Girlfriend 8.8
2
4999 P5000 40 Wife 9.5
8
Positive_Interaction_Frequency Weekly_Self_Care_Activities \
4995 8 5
4996 3 1
4997 9 0
4998 1 3
4999 6 5
Mood_Tracking_App_Usage
4995 No
4996 Yes
4997 No
4998 No
4999 Yes
[5 rows x 32 columns]
df.shape
(5000, 32)
df.columns
df.duplicated().sum()
df.isnull().sum()
Participant_ID 0
Age 0
Relationship_Role 0
Relationship_Duration 0
Mood_Level 0
Time_of_Day 0
Menstrual_Phase 0
PMS_Symptoms 0
Sleep_Quality 0
Sleep_Duration 0
Recent_Activity 0
Stress_Level 0
Recent_Argument 0
Conflict_Intensity 0
Partner_Support_Level 0
Self_Esteem_Level 0
Physical_Activity_Level 0
Social_Interaction_Level 0
Diet_Quality 0
Caffeine_Intake 0
Alcohol_Intake 0
Hormonal_Birth_Control 0
Mental_Health_Status 0
Health_Conditions 1662
Use_of_Therapy 0
Weather 0
Temperature 0
Perceived_Financial_Stress 0
Partner_Presence 0
Positive_Interaction_Frequency 0
Weekly_Self_Care_Activities 0
Mood_Tracking_App_Usage 0
dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 32 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Participant_ID 5000 non-null object
1 Age 5000 non-null int64
2 Relationship_Role 5000 non-null object
3 Relationship_Duration 5000 non-null float64
4 Mood_Level 5000 non-null int64
5 Time_of_Day 5000 non-null object
6 Menstrual_Phase 5000 non-null object
7 PMS_Symptoms 5000 non-null object
8 Sleep_Quality 5000 non-null int64
9 Sleep_Duration 5000 non-null float64
10 Recent_Activity 5000 non-null object
11 Stress_Level 5000 non-null int64
12 Recent_Argument 5000 non-null object
13 Conflict_Intensity 5000 non-null int64
14 Partner_Support_Level 5000 non-null int64
15 Self_Esteem_Level 5000 non-null int64
16 Physical_Activity_Level 5000 non-null object
17 Social_Interaction_Level 5000 non-null int64
18 Diet_Quality 5000 non-null int64
19 Caffeine_Intake 5000 non-null int64
20 Alcohol_Intake 5000 non-null int64
21 Hormonal_Birth_Control 5000 non-null object
22 Mental_Health_Status 5000 non-null int64
23 Health_Conditions 3338 non-null object
24 Use_of_Therapy 5000 non-null object
25 Weather 5000 non-null object
26 Temperature 5000 non-null float64
27 Perceived_Financial_Stress 5000 non-null int64
28 Partner_Presence 5000 non-null object
29 Positive_Interaction_Frequency 5000 non-null int64
30 Weekly_Self_Care_Activities 5000 non-null int64
31 Mood_Tracking_App_Usage 5000 non-null object
dtypes: float64(3), int64(15), object(14)
memory usage: 1.2+ MB
df.describe()
Partner_Support_Level Self_Esteem_Level
Social_Interaction_Level \
count 5000.000000 5000.000000
5000.00000
mean 5.552400 5.560600
5.43100
std 2.863371 2.857194
2.89346
min 1.000000 1.000000
1.00000
25% 3.000000 3.000000
3.00000
50% 6.000000 6.000000
5.00000
75% 8.000000 8.000000
8.00000
max 10.000000 10.000000
10.00000
Temperature Perceived_Financial_Stress \
count 5000.000000 5000.00000
mean 25.136460 5.55460
std 5.787268 2.85192
min 15.000000 1.00000
25% 20.100000 3.00000
50% 25.200000 6.00000
75% 30.100000 8.00000
max 35.000000 10.00000
Positive_Interaction_Frequency Weekly_Self_Care_Activities
count 5000.000000 5000.000000
mean 5.049000 3.032400
std 3.178399 1.993626
min 0.000000 0.000000
25% 2.000000 1.000000
50% 5.000000 3.000000
75% 8.000000 5.000000
max 10.000000 6.000000
df['Health_Conditions'] = df['Health_Conditions'].fillna("Not
Available")
df.nunique()
Participant_ID 5000
Age 32
Relationship_Role 2
Relationship_Duration 296
Mood_Level 10
Time_of_Day 4
Menstrual_Phase 4
PMS_Symptoms 2
Sleep_Quality 10
Sleep_Duration 61
Recent_Activity 6
Stress_Level 10
Recent_Argument 2
Conflict_Intensity 11
Partner_Support_Level 10
Self_Esteem_Level 10
Physical_Activity_Level 4
Social_Interaction_Level 10
Diet_Quality 10
Caffeine_Intake 5
Alcohol_Intake 10
Hormonal_Birth_Control 2
Mental_Health_Status 10
Health_Conditions 3
Use_of_Therapy 2
Weather 5
Temperature 201
Perceived_Financial_Stress 10
Partner_Presence 2
Positive_Interaction_Frequency 11
Weekly_Self_Care_Activities 7
Mood_Tracking_App_Usage 2
dtype: int64
object_columns = df.select_dtypes(include=['object']).columns
print("Object type columns:")
print(object_columns)
numerical_columns = df.select_dtypes(include=['int64',
'float64']).columns
print("\nNumerical type columns:")
print(numerical_columns)
def classify_features(df):
categorical_features = []
non_categorical_features = []
discrete_features = []
continuous_features = []
for i in continuous:
plt.figure(figsize=(15,6))
sns.histplot(df[i], bins = 20, kde = True, palette='hls')
plt.xticks(rotation = 90)
plt.show()
for i in continuous:
plt.figure(figsize=(15,6))
sns.distplot(df[i], bins = 20, kde = True)
plt.xticks(rotation = 90)
plt.show()
for i in continuous:
plt.figure(figsize=(15, 6))
sns.boxplot(x=i, data=df, palette='hls')
plt.xticks(rotation=90)
plt.show()
for i in discrete:
print(i)
print(df[i].unique())
print()
Caffeine_Intake
[4 0 1 2 3]
Weekly_Self_Care_Activities
[1 2 4 3 6 0 5]
for i in discrete:
print(i)
print(df[i].value_counts())
print()
Caffeine_Intake
Caffeine_Intake
0 1037
1 1007
4 994
3 982
2 980
Name: count, dtype: int64
Weekly_Self_Care_Activities
Weekly_Self_Care_Activities
5 755
4 727
3 723
1 708
6 705
0 696
2 686
Name: count, dtype: int64
for i in discrete:
plt.figure(figsize=(15, 6))
ax = sns.countplot(x=i, data=df, palette='hls')
for p in ax.patches:
height = p.get_height()
ax.annotate(f'{height}',
xy=(p.get_x() + p.get_width() / 2., height),
xytext=(0, 10),
textcoords='offset points',
ha='center', va='center')
plt.show()
import plotly.express as px
for i in discrete:
counts = df[i].value_counts()
fig = px.pie(counts, values=counts.values, names=counts.index,
title=f'Distribution of {i}')
fig.show()
for i in categorical:
print(i)
print(df[i].unique())
print()
Relationship_Role
['Wife' 'Girlfriend']
Time_of_Day
['Morning' 'Evening' 'Night' 'Afternoon']
Menstrual_Phase
['Ovulation' 'Follicular' 'Menstrual' 'Luteal']
PMS_Symptoms
['Yes' 'No']
Recent_Activity
['Socializing' 'Reading' 'Exercising' 'Working' 'Watching TV'
'Resting']
Recent_Argument
['Yes' 'No']
Physical_Activity_Level
['Sedentary' 'Light' 'Intense' 'Moderate']
Hormonal_Birth_Control
['No' 'Yes']
Health_Conditions
['Not Available' 'PCOS' 'Thyroid']
Use_of_Therapy
['No' 'Yes']
Weather
['Cloudy' 'Stormy' 'Snowy' 'Sunny' 'Rainy']
Partner_Presence
['No' 'Yes']
Mood_Tracking_App_Usage
['No' 'Yes']
for i in categorical:
print(i)
print(df[i].value_counts())
print()
Relationship_Role
Relationship_Role
Wife 2544
Girlfriend 2456
Name: count, dtype: int64
Time_of_Day
Time_of_Day
Evening 1283
Night 1283
Morning 1221
Afternoon 1213
Name: count, dtype: int64
Menstrual_Phase
Menstrual_Phase
Menstrual 1284
Luteal 1274
Follicular 1227
Ovulation 1215
Name: count, dtype: int64
PMS_Symptoms
PMS_Symptoms
No 2529
Yes 2471
Name: count, dtype: int64
Recent_Activity
Recent_Activity
Working 876
Resting 859
Watching TV 853
Reading 828
Exercising 825
Socializing 759
Name: count, dtype: int64
Recent_Argument
Recent_Argument
Yes 2519
No 2481
Name: count, dtype: int64
Physical_Activity_Level
Physical_Activity_Level
Sedentary 1282
Light 1263
Moderate 1229
Intense 1226
Name: count, dtype: int64
Hormonal_Birth_Control
Hormonal_Birth_Control
No 2518
Yes 2482
Name: count, dtype: int64
Health_Conditions
Health_Conditions
PCOS 1694
Not Available 1662
Thyroid 1644
Name: count, dtype: int64
Use_of_Therapy
Use_of_Therapy
Yes 2517
No 2483
Name: count, dtype: int64
Weather
Weather
Stormy 1016
Rainy 1011
Cloudy 1002
Sunny 998
Snowy 973
Name: count, dtype: int64
Partner_Presence
Partner_Presence
Yes 2547
No 2453
Name: count, dtype: int64
Mood_Tracking_App_Usage
Mood_Tracking_App_Usage
Yes 2503
No 2497
Name: count, dtype: int64
for i in categorical:
plt.figure(figsize=(15, 6))
ax = sns.countplot(x=i, data=df, palette='hls')
for p in ax.patches:
height = p.get_height()
ax.annotate(f'{height}',
xy=(p.get_x() + p.get_width() / 2., height),
xytext=(0, 10),
textcoords='offset points',
ha='center', va='center')
plt.show()
for i in categorical:
counts = df[i].value_counts()
fig = px.pie(counts, values=counts.values, names=counts.index,
title=f'Distribution of {i}')
fig.show()
sns.set(style="whitegrid")
lineplot_with_hue('Sleep_Duration', 'Mood_Level',
'Hormonal_Birth_Control')
def barplot_with_hue(x, y, hue):
plt.figure(figsize=(12, 6))
sns.barplot(x=x, y=y, hue=hue, data=df, ci=None)
plt.title(f'Bar Plot of {y} by {x} with Hue: {hue}')
plt.xlabel(x)
plt.ylabel(y)
plt.legend(title=hue)
plt.show()
barplot_with_hue('Relationship_Role', 'Mood_Level', 'PMS_Symptoms')
plt.figure(figsize=(12, 6))
sns.countplot(x='PMS_Symptoms', hue='Relationship_Role', data=df)
plt.title('Count of PMS Symptoms by Relationship Role')
plt.xlabel('PMS Symptoms')
plt.ylabel('Count')
plt.legend(title='Relationship Role')
plt.show()
contingency_table = pd.crosstab(df['Use_of_Therapy'],
df['Health_Conditions'])
print("Contingency Table for Use of Therapy and Health Conditions:\n",
contingency_table)
plt.figure(figsize=(14, 10))
correlation_matrix = df[['Mood_Level', 'Stress_Level',
'Sleep_Quality', 'Sleep_Duration', 'Caffeine_Intake',
'Alcohol_Intake',
'Social_Interaction_Level']].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm',
fmt='.2f')
plt.title('Heatmap of Correlation Matrix')
plt.show()
ax.set_xlabel(x)
ax.set_ylabel(y)
ax.set_zlabel(z)
ax.set_title(f'3D Surface Plot of {z} based on {x} and {y}')
plt.show()
pivot_mood_relationship_time = df.pivot_table(
values='Mood_Level',
index='Relationship_Role',
columns='Time_of_Day',
aggfunc='mean',
fill_value=0
)
print("Average Mood Level by Relationship Role and Time of Day:")
pivot_mood_relationship_time
pivot_pms_count = df.pivot_table(
index='Menstrual_Phase',
columns='PMS_Symptoms',
aggfunc='count',
fill_value=0
)
Weekly_Self_Care_Activities
PMS_Symptoms No Yes
Menstrual_Phase
Follicular 615 612
Luteal 666 608
Menstrual 641 643
Ovulation 607 608
[4 rows x 60 columns]
pivot_stress_activity_hbc = df.pivot_table(
values='Stress_Level',
index='Physical_Activity_Level',
columns='Hormonal_Birth_Control',
aggfunc='mean',
fill_value=0
)
Hormonal_Birth_Control No Yes
Physical_Activity_Level
Intense 5.381180 5.479132
Light 5.559375 5.382022
Moderate 5.590323 5.586207
Sedentary 5.477021 5.400922
pivot_sleep_quality = df.pivot_table(
values='Sleep_Quality',
index='Time_of_Day',
columns='Recent_Activity',
aggfunc='mean',
fill_value=0
)
print("\nAverage Sleep Quality by Time of Day and Recent Activity:")
pivot_sleep_quality
Recent_Activity Working
Time_of_Day
Afternoon 5.435644
Evening 5.340517
Morning 5.239437
Night 5.641921
pivot_alcohol_intake = df.pivot_table(
values='Alcohol_Intake',
index='Menstrual_Phase',
aggfunc='sum',
fill_value=0
)
Alcohol_Intake
Menstrual_Phase
Follicular 5316
Luteal 5693
Menstrual 5694
Ovulation 5563
import statsmodels.api as sm
======================================================================
========
Dep. Variable: Mood_Level R-squared:
0.000
Model: OLS Adj. R-squared:
-0.000
Method: Least Squares F-statistic:
0.4977
Date: Tue, 15 Oct 2024 Prob (F-statistic):
0.737
Time: 15:58:07 Log-Likelihood:
-12338.
No. Observations: 5000 AIC:
2.469e+04
Df Residuals: 4995 BIC:
2.472e+04
Df Model: 4
======================================================================
===========
coef std err t P>|t| [0.025
0.975]
----------------------------------------------------------------------
-----------
const 5.4313 0.203 26.692 0.000 5.032
5.830
Age 0.0039 0.004 0.885 0.376 -0.005
0.012
Stress_Level -0.0141 0.014 -1.011 0.312 -0.042
0.013
Sleep_Quality -0.0041 0.014 -0.293 0.770 -0.032
0.023
Diet_Quality 0.0048 0.014 0.335 0.738 -0.023
0.033
======================================================================
========
Omnibus: 5041.377 Durbin-Watson:
1.977
Prob(Omnibus): 0.000 Jarque-Bera (JB):
309.710
Skew: 0.030 Prob(JB):
5.59e-68
Kurtosis: 1.782 Cond. No.
182.
======================================================================
========
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is
correctly specified.
df = df.drop(['Participant_ID'], axis = 1)
numerical_df = df.select_dtypes(include=[np.number])
correlation_with_target = numerical_df.corr()
['Mood_Level'].drop('Mood_Level')
correlation_sorted =
correlation_with_target.sort_values(ascending=False)
plt.figure(figsize=(10, 6))
sns.barplot(x=correlation_sorted.index, y=correlation_sorted.values,
palette='coolwarm')
plt.title('Correlation of Numerical Features with Mood Level')
plt.xlabel('Numerical Features')
plt.ylabel('Correlation Coefficient')
plt.xticks(rotation=90)
plt.axhline(0, color='black', linewidth=0.8, linestyle='--')
plt.show()
correlation_sorted
Social_Interaction_Level 0.027553
Temperature 0.024572
Age 0.012562
Perceived_Financial_Stress 0.007309
Mental_Health_Status 0.004247
Diet_Quality 0.004230
Caffeine_Intake 0.002617
Relationship_Duration -0.001332
Sleep_Quality -0.003942
Conflict_Intensity -0.003963
Sleep_Duration -0.006298
Self_Esteem_Level -0.008760
Stress_Level -0.014308
Positive_Interaction_Frequency -0.014825
Partner_Support_Level -0.015594
Weekly_Self_Care_Activities -0.020878
Alcohol_Intake -0.024365
Name: Mood_Level, dtype: float64
alpha = 0.05
if p_value < alpha:
print("There is a significant difference in mood swings between
wife and girlfriend.")
else:
print("No significant difference in mood swings between wife and
girlfriend.")
Thanks !!!