DA lab
DA lab
output:
A B C D
0 1.0 NaN 1.0 1
1 2.0 2.0 2.0 2
2 NaN 3.0 3.0 3
3 4.0 4.0 NaN 4
4 5.0 5.0 5.0 5
output:
A B C D
1 2.0 2.0 2.0 2
4 5.0 5.0 5.0 5
import pandas as pd
# Creating a DataFrame
df = pd.DataFrame(data)
Output:
Original Dataset:
ID Name
0 1 Alice
1 2 Bob
2 3 Charlie
3 4 David
4 1 Alice
5 3 Charlie
6 5 Eve
Duplicate Records:
ID Name
4 1 Alice
5 3 Charlie
import pandas as pd
import numpy as np
from scipy import stats
# Creating a DataFrame
df = pd.DataFrame(data)
# Display datasets
print("Dataset with Noise:")
print(df)
print("\nZ-Scores:")
print(z_scores)
OUTPUT:
Z-Scores:
0 0.412811
1 0.381402
2 0.397107
3 0.365697
4 0.349992
5 2.445459
6 0.538450
import pandas as pd
import numpy as np
df = pd.DataFrame(data)
OUTPUT:
A B C D
0 1.0 3.5 1.0 1
1 2.0 2.0 2.0 2
2 3.0 3.0 3.0 3
3 4.0 4.0 1.0 4
4 5.0 5.0 5.0 5
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
# Given data
data = {
"Area (sq ft)": [2600, 3000, 3200, 3600, 4000],
"Price ($)": [550000, 565000, 610000, 630000, 725000]
}
# Create DataFrame
df = pd.DataFrame(data)
print("Data:")
print(df)
# Scatter plot
plt.scatter(df["Area (sq ft)"], df["Price ($)"], color='blue', label='Actual Data')
plt.xlabel("Area (sq ft)")
plt.ylabel("Price ($)")
plt.title("House Prices vs. Area")
output:
Data:
Area (sq ft) Price ($)
0 2600 550000
1 3000 565000
2 3200 610000
3 3600 630000
4 4000 725000
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
# Assuming X_train and y_train contain hours studied and pass/fail labels (0 or 1)
# Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)
output:
Predicted Class (Pass=1, Fail=0): 1
Probability of Passing: 0.5019075444554663
Implement Arima model on time series data
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
df = pd.DataFrame(data)
df.set_index("Day", inplace=True) # Set day as index
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Convert to DataFrame
df = pd.DataFrame(data)
# Grouping by DiseaseStatus
print("\nMean Values by Disease Status:")
print(df.groupby("DiseaseStatus").mean(numeric_only=True))
# 1. Age Distribution
plt.figure(figsize=(6,4))
sns.histplot(df["Age"], bins=10, kde=True, color="blue")
plt.title("Age Distribution")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()
Output:
Descriptive Statistics:
PatientID Age BloodPressure Glucose
count 4.000000 4.000000 4.00000 4.000000
mean 102.500000 53.250000 122.50000 93.333333
std 1.290994 6.184658 10.40833 6.236096
min 101.000000 45.000000 110.00000 85.000000
25% 101.750000 51.750000 117.50000 91.250000
50% 102.500000 54.000000 122.50000 94.166667
75% 103.250000 55.500000 127.50000 96.250000
max 104.000000 60.000000 135.00000 100.000000
import pandas as pd
import numpy as np
data = {
"Quantity Sold": [50, 65, 80, 95, 110, 130, 150, 160, 175, 190, 210, 230],
"Revenue": [5000, 6500, 8000, 9500, 11000, 13000, 15000, 16000, 17500, 19000, 21000,
23000]
# Convert to DataFrame
df = pd.DataFrame(data)
X = df[["Month"]]
y = df["Quantity Sold"]
model = LinearRegression()
model.fit(X_train, y_train)
# Display Predictions
plt.figure(figsize=(8, 5))
plt.xlabel("Month")
plt.ylabel("Quantity Sold")
plt.legend()
plt.grid(True)
plt.show()
output:
Apply predictive analytics on weather forecasting
import pandas as pd
import numpy as np
from prophet import Prophet
# Create DataFrame
df = pd.DataFrame({'ds': days, 'y': temperature}) # 'ds' = Date, 'y' = Target Variable
(Temperature)
# Predict
forecast = model.predict(future)
# Print results
print(forecast[['ds', 'yhat']].tail(10)) # Show last 10 predictions
# Plot results
model.plot(forecast)
output:
ds yhat
120 2024-04-30 14.443973
121 2024-05-01 13.656203
122 2024-05-02 18.774803
123 2024-05-03 19.789734
124 2024-05-04 14.318090
125 2024-05-05 18.549194
126 2024-05-06 11.512856
127 2024-05-07 14.114318
128 2024-05-08 13.326549
129 2024-05-09 18.445149
Create a simple dataset in python and perform visualization techniques such as types of
maps-( bars,colum,line, scatter ,3D cubes)
# Sample data
categories = ['A', 'B', 'C', 'D', 'E']
values1 = [10, 25, 35, 20, 15]
values2 = [30, 40, 20, 10, 25]
# Bar Chart
plt.bar(categories, values1, color='blue')
plt.title('Bar Chart')
plt.show()
# Line Chart
plt.plot(categories, values1, marker='o', linestyle='-', color='red', label='Value1')
plt.plot(categories, values2, marker='s', linestyle='--', color='blue', label='Value2')
plt.title('Line Chart')
plt.legend()
plt.show()
# Scatter Plot
plt.scatter(values1, values2, color='purple')
plt.title('Scatter Plot')
plt.xlabel('Value1')
plt.ylabel('Value2')
plt.show()
# 3D Scatter Plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(values1, values2, np.arange(len(categories)), color='brown')
ax.set_title('3D Scatter Plot')
plt.show()
Output:
Implement Decision Tree Induction for classification.
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, plot_tree
import matplotlib.pyplot as plt
# Simple dataset
data = {
"Weather": ["Sunny", "Rainy", "Overcast"],
"Temperature": ["Hot", "Cool", "Mild"],
"Play": ["No", "Yes", "Yes"]
}
# Convert to DataFrame
df = pd.DataFrame(data)
output:
Will they play outside? No
# Dataset
data = {
"Weather": ["Sunny", "Rainy", "Overcast"],
"Temperature": ["Hot", "Cool", "Mild"],
"Play": ["No", "Yes", "Yes"]
}
# Convert to DataFrame
df = pd.DataFrame(data)
Output:
Perform object segmentation using hierarchical methods.
import numpy as np
import matplotlib.pyplot as plt
from skimage.segmentation import felzenszwalb
from skimage.io import imread
from skimage.color import rgb2gray
# Load the image
image_path = r"C:\Users\VAAAG\Downloads\apple.jpg" # Replace with your image path
image = imread(image_path)
ax[1].imshow(segments, cmap='nipy_spectral')
ax[1].set_title("Segmented Image")
ax[1].axis("off")
plt.show()
Output: