import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import mysql.connector
import time
import chardet
def detect_encoding(file_path):
with open(file_path, 'rb') as f:
result = chardet.detect(f.read())
return result['encoding']
def load_data():
try:
# Detect encoding of the file
encoding = detect_encoding("C:\\Users\\user\\Documents\\
literacy_rate.csv.xlsx")
df = pd.read_excel("C:\\Users\\user\\Documents\\literacy_rate.csv.xlsx",
encoding=encoding)
return df
except Exception as e:
print(f"Error loading data: {e}")
sys.exit()
def load_data():
try:
df = pd.read_excel("C:\\Users\\user\\Documents\\literacy_rate.csv.xlsx")
return df
except Exception as e:
print(f"Error loading data: {e}")
sys.exit()
def main_menu():
print("\nMain Menu:")
print("1. Data Visualization")
print("2. Data Analysis")
print("3. Data Manipulation")
print("4. Exit")
choice = input("Enter your choice (1/2/3/4): ")
return choice
def data_visualization(df):
while True:
print("\nData Visualization Options:")
print("1. Line chart of a particular year")
print("2. Bar chart for different years")
print("3. Histogram for a particular year")
print("4. Line chart for average literacy rate year-wise")
print("5. Back to main menu")
choice = input("Enter your choice (1/2/3/4/5): ")
if choice == '1':
year = input("Enter the year (1951, 1961, 1971, 1981, 1991, 2001,
2011): ")
if year in df.columns:
plt.figure(figsize=(10, 6))
plt.plot(df['State'], df[year], marker='o')
plt.title(f"Literacy Rate in India States ({year})")
plt.xlabel('State')
plt.ylabel('Literacy Rate (%)')
plt.xticks(rotation=90)
plt.grid(True)
plt.show()
else:
print("Invalid year. Please try again.")
elif choice == '2':
years = ['1951', '1961', '1971', '1981', '1991', '2001', '2011']
df_years = df[years].transpose()
df_years.columns = df['State']
df_years.plot(kind='bar', figsize=(10, 6))
plt.title("Literacy Rate Comparison Across States (1951-2011)")
plt.xlabel('Year')
plt.ylabel('Literacy Rate (%)')
plt.xticks(rotation=45)
plt.show()
elif choice == '3':
year = input("Enter the year (1951, 1961, 1971, 1981, 1991, 2001,
2011): ")
if year in df.columns:
plt.figure(figsize=(10, 6))
plt.hist(df[year], bins=20, edgecolor='black')
plt.title(f"Literacy Rate Distribution in India States ({year})")
plt.xlabel('Literacy Rate (%)')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
else:
print("Invalid year. Please try again.")
elif choice == '4':
avg_literacy = df[years].mean()
avg_literacy.plot(kind='line', marker='o', figsize=(10, 6))
plt.title("Average Literacy Rate Year-wise")
plt.xlabel('Year')
plt.ylabel('Average Literacy Rate (%)')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()
elif choice == '5':
break
else:
print("Invalid choice. Please try again.")
# Function for Data Analysis
def data_analysis(df):
print("\nData Analysis Options:")
print("1. State with highest literacy rate in 2011")
print("2. State with lowest literacy rate in 1951")
print("3. Average literacy rate across all states in 2001")
choice = input("Enter your choice (1/2/3): ")
if choice == '1':
state = df.loc[df['2011'].idxmax(), 'State']
literacy_rate = df['2011'].max()
print(f"The state with the highest literacy rate in 2011 is {state} with a
literacy rate of {literacy_rate}%.")
elif choice == '2':
state = df.loc[df['1951'].idxmin(), 'State']
literacy_rate = df['1951'].min()
print(f"The state with the lowest literacy rate in 1951 is {state} with a
literacy rate of {literacy_rate}%.")
elif choice == '3':
avg_literacy_2001 = df['2001'].mean()
print(f"The average literacy rate across all states in 2001 is
{avg_literacy_2001:.2f}%.")
else:
print("Invalid choice. Please try again.")
def data_manipulation(df):
print("\nData Manipulation Options:")
print("1. Add a new column for the difference in literacy rate from 1951 to
2011")
print("2. Add a new row for a hypothetical state")
print("3. Back to main menu")
choice = input("Enter your choice (1/2/3): ")
if choice == '1':
df['Literacy Difference (1951-2011)'] = df['2011'] - df['1951']
print("\nLiteracy Difference Column added successfully.")
print(df[['State', 'Literacy Difference (1951-2011)']].head())
elif choice == '2':
state = input("Enter the name of the new state: ")
literacy_rates = input("Enter the literacy rates for 1951, 1961, 1971,
1981, 1991, 2001, 2011 (separated by commas): ")
literacy_rates = list(map(float, literacy_rates.split(',')))
new_row = pd.DataFrame([literacy_rates], columns=['1951', '1961', '1971',
'1981', '1991', '2001', '2011'])
new_row['State'] = state
df = pd.concat([df, new_row], ignore_index=True)
print(f"\nNew state {state} added successfully.")
print(df.tail())
elif choice == '3':
return df
else:
print("Invalid choice. Please try again.")
return df
def main():
df = load_data()
while True:
choice = main_menu()
if choice == '1':
data_visualization(df)
elif choice == '2':
data_analysis(df)
elif choice == '3':
df = data_manipulation(df)
elif choice == '4':
print("Exiting the program.")
time.sleep(2)
sys.exit()
else:
print("Invalid choice. Please try again.")
if __name__ == "__main__":
main()