Python for Data Science Practical Codes
Question 1.a:
# 1.a Creation of different types of NumPy arrays
import numpy as np
# 1D array
array_1d = np.array([1, 2, 3, 4, 5])
print("1D Array:", array_1d)
# 2D array
array_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("2D Array:\n", array_2d)
# 3D array
array_3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print("3D Array:\n", array_3d)
# Using built-in functions
zeros = np.zeros((2, 3))
ones = np.ones((3, 3))
arange_array = np.arange(10)
linspace_array = np.linspace(0, 1, 5)
print("Zeros:\n", zeros)
print("Ones:\n", ones)
print("Arange:\n", arange_array)
print("Linspace:\n", linspace_array)
Python for Data Science Practical Codes
Question 1.b:
# 1.b Bivariate Analysis on Diabetes Data using Logistic Regression
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
# Replace with correct path or link
url = 'diabetes.csv'
data = pd.read_csv(url)
# Bivariate Analysis
sns.pairplot(data, hue='Outcome')
plt.show()
# Logistic Regression
X = data.drop('Outcome', axis=1)
y = data['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
Python for Data Science Practical Codes
Question 2.a:
# 2.a Basic Arithmetic Operations with NumPy Arrays
import numpy as np
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print("Addition:", a + b)
print("Subtraction:", a - b)
print("Multiplication:", a * b)
print("Division:", a / b)
print("Power:", a ** 2)
Python for Data Science Practical Codes
Question 2.b:
# 2.b Density and Contour Plots on Adult Dataset
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Replace with correct path or link
url = 'adult.csv'
data = pd.read_csv(url)
# Drop rows with missing values for plotting
data = data.dropna()
# Convert categorical to numeric if needed
data['income'] = data['income'].astype('category').cat.codes
# Density plot
sns.kdeplot(data=data, x='age', hue='income', fill=True)
plt.title('Density Plot of Age by Income')
plt.show()
# Contour plot (using a sample)
sns.kdeplot(data=data, x='age', y='hours-per-week', fill=True)
plt.title('Contour Plot of Age vs Hours-per-week')
plt.show()
Python for Data Science Practical Codes
Question 3.a:
# 3.a Creation of an Array using Built-In NumPy Functions
import numpy as np
zeros_array = np.zeros((2, 2))
ones_array = np.ones((3, 3))
identity_matrix = np.eye(4)
random_array = np.random.rand(2, 3)
print("Zeros Array:\n", zeros_array)
print("Ones Array:\n", ones_array)
print("Identity Matrix:\n", identity_matrix)
print("Random Array:\n", random_array)
Python for Data Science Practical Codes
Question 3.b:
# 3.b Descriptive Analytics with Pandas on Iris Dataset
import pandas as pd
# Replace with correct path or link
url = 'https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv'
data = pd.read_csv(url)
print("First 5 Rows:\n", data.head())
print("\nSummary Statistics:\n", data.describe())
print("\nSpecies Count:\n", data['species'].value_counts())
Python for Data Science Practical Codes
Question 4.a:
# 4.a Creation of a DataFrame from Dictionary
import pandas as pd
data = {
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print(df)
Python for Data Science Practical Codes
Question 4.b:
# 4.b Descriptive Analytics on Iris Dataset from scikit-learn
from sklearn.datasets import load_iris
import pandas as pd
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target
print("First 5 Rows:\n", df.head())
print("\nSummary Statistics:\n", df.describe())
print("\nTarget Count:\n", df['target'].value_counts())
Python for Data Science Practical Codes
Question 5.a:
# 5.a Creation of a DataFrame from N-Dimensional Arrays
import numpy as np
import pandas as pd
array = np.array([[1, 2, 3], [4, 5, 6]])
df = pd.DataFrame(array, columns=['Column1', 'Column2', 'Column3'])
print(df)
Python for Data Science Practical Codes
Question 5.b:
# 5.b Univariate Statistical Analysis on Diabetes Data
import pandas as pd
# Replace with correct path or link
url = 'diabetes.csv'
data = pd.read_csv(url)
print(data.describe())
print("\nOutcome Counts:\n", data['Outcome'].value_counts())
Python for Data Science Practical Codes
Question 6.a:
# 6.a Bivariate Analysis on Diabetes Data using Linear Regression
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
# Replace with correct path or link
url = 'diabetes.csv'
data = pd.read_csv(url)
# Example: BMI vs Glucose
sns.scatterplot(x='BMI', y='Glucose', data=data)
plt.title('BMI vs Glucose')
plt.show()
X = data[['BMI']]
y = data['Glucose']
model = LinearRegression()
model.fit(X, y)
print("Coefficient:", model.coef_)
print("Intercept:", model.intercept_)
Python for Data Science Practical Codes
Question 6.b:
# 6.b Creation of different types of NumPy arrays and displaying basic information
import numpy as np
a = np.array([[1, 2, 3], [4, 5, 6]])
print("Array:\n", a)
print("Shape:", a.shape)
print("Data Type:", a.dtype)
print("Size:", a.size)
print("Dimension:", a.ndim)
Python for Data Science Practical Codes
Question 7.a:
# 7.a NumPy File Operations
import numpy as np
array = np.array([1, 2, 3, 4, 5])
np.save('my_array.npy', array)
# Load the array
loaded_array = np.load('my_array.npy')
print("Loaded Array:", loaded_array)
Python for Data Science Practical Codes
Question 7.b:
# 7.b Descriptive Analytics with Pandas on Iris Dataset (from path or web)
import pandas as pd
# Replace with correct path or link
url = 'https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv'
df = pd.read_csv(url)
print(df.describe())
print(df['species'].value_counts())
Python for Data Science Practical Codes
Question 8.a:
# 8.a 3D Plotting on Adult Dataset
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# Replace with correct path or link
url = 'adult.csv'
df = pd.read_csv(url)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df['age'], df['hours-per-week'], df['education-num'], c='red')
ax.set_xlabel('Age')
ax.set_ylabel('Hours per Week')
ax.set_zlabel('Education Num')
plt.show()
Python for Data Science Practical Codes
Question 8.b:
# 8.b Creation of a DataFrame from Series
import pandas as pd
s1 = pd.Series([1, 2, 3], name="A")
s2 = pd.Series([4, 5, 6], name="B")
df = pd.concat([s1, s2], axis=1)
print(df)
Python for Data Science Practical Codes
Question 9.a:
# 9.a Histograms on Adult Dataset
import pandas as pd
import matplotlib.pyplot as plt
# Replace with correct path or link
url = 'adult.csv'
df = pd.read_csv(url)
df['age'].hist(bins=20)
plt.title('Histogram of Age')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()
Python for Data Science Practical Codes
Question 9.b:
# 9.b NumPy Built-in Array Creation and Operations
import numpy as np
array = np.arange(1, 6)
print("Array:", array)
print("Squared:", array ** 2)
print("Mean:", np.mean(array))
print("Standard Deviation:", np.std(array))
Python for Data Science Practical Codes
Question 10.a:
# 10.a Univariate Statistical Analysis on Diabetes Data
import pandas as pd
# Replace with correct path or link
url = 'diabetes.csv'
df = pd.read_csv(url)
print("Summary Statistics:\n", df.describe())
print("Outcome Distribution:\n", df['Outcome'].value_counts())
Python for Data Science Practical Codes
Question 10.b:
# 10.b Array Creation using Built-in NumPy Functions
import numpy as np
a = np.linspace(1, 10, 5)
b = np.full((2, 2), 7)
print("Linspace Array:", a)
print("Full Array:\n", b)
Python for Data Science Practical Codes
Question 11.a:
# 11.a Normal Curves and Scatter Plots on UCI Dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Replace with correct path or link
url = 'adult.csv'
df = pd.read_csv(url)
# Normal curve on 'age'
sns.kdeplot(df['age'], fill=True)
plt.title("Normal Curve of Age")
plt.show()
# Scatter plot
sns.scatterplot(data=df, x='age', y='hours-per-week')
plt.title("Scatter Plot: Age vs Hours-per-week")
plt.show()
Python for Data Science Practical Codes
Question 11.b:
# 11.b NumPy Array Types and Info
import numpy as np
arr = np.array([[10, 20, 30], [40, 50, 60]])
print("Array:\n", arr)
print("Shape:", arr.shape)
print("Size:", arr.size)
print("Datatype:", arr.dtype)
print("Dimension:", arr.ndim)