Python Codes
Python Codes
Python Codes
LIST
# create a list
# accessing elements
print(my_list[0]) # 1
print(my_list[3]) # "four"
# append element
my_list.append("six")
# insert element
my_list.insert(2, "two")
# replace element
my_list[4] = 5
# delete element
del my_list[1]
TUPLE
# create a tuple
my_tuple = (1, 2, 3, "four", "five")
# accessing elements
print(my_tuple[0]) # 1
print(my_tuple[3]) # "four"
STRING
# create a string
my_string = "hello world"
# accessing elements
print(my_string[0]) # "h"
print(my_string[6]) # "w"
# replace element
my_string = my_string.replace("world", "python")
DICTIONARY
# create a dictionary
my_dict = {"name": "John", "age": 30, "city": "New York"}
# accessing elements
print(my_dict["name"]) # "John"
print(my_dict["age"]) # 30
# add element
my_dict["country"] = "USA"
# delete element
del my_dict["city"]
# accessing elements
print(my_series["a"]) # 1
print(my_series["d"]) # 4
# add element
my_series["f"] = 6
# delete element
my_series = my_series.drop("b")
DATAFRAMES
import pandas as pd
df = pd.DataFrame(my_data)
# accessing rows
print(df.loc[0]) # first row
# accessing columns
print(df["name"]) # name column
# add row
new_row = {"name": "Mary", "age": 28, "country": "Australia"}
df = df.append(new_row, ignore_index=True)
# delete row
df = df.drop(1)
df = pd.read_csv("my_data.csv")
DATA CLEANING
import pandas as pd
# remove duplicates
df = df.drop_duplicates()
# rename column
df = df.rename(columns={"name": "full_name"})
DATA MANIPULATION
import pandas as pd
# read csv files
df1 = pd.read_csv("my_data1.csv")
df2 = pd.read_csv("my_data2.csv")
FREQUENCY TABLE
import pandas as pd
CROSS TABLE
import pandas as pd
DESCRIPTIVE STATISTICS
import pandas as pd
DATA VISUALIZATION
import pandas as pd
df = pd.read_csv("my_data.csv")
# create histogram
plt.hist(df["age"], bins=10)
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()
Example question:
1. Construct a data frame a dictionary with default
python index.
```python
import pandas as pd
df = pd.DataFrame(data)
print(df)
Output:
```
name age gender
0 John 32 M
1 Alice 25 F
2 Bob 45 M
3 Jane 19 F
```
python index.
```python
data = {'John': 32, 'Alice': 25, 'Bob': 45, 'Jane': 19}
s = pd.Series(data)
print(s)
```
Output:
```
John 32
Alice 25
Bob 45
Jane 19
dtype: int64
```
print(df)
```
Output:
```
name age gender
a John 32 M
b Alice 25 F
c Bob 45 M
d Jane 19 F
```
```python
df = pd.read_csv('cars.csv')
```
print(missing_values_count)
```
Output:
```
car_ID 0
symboling 0
CarName 0
fueltype 0
aspiration 0
doornumber 0
carbody 0
drivewheel 0
enginelocation 1
wheelbase 0
carlength 0
carwidth 0
carheight 0
curbweight 0
enginetype 0
cylindernumber 0
enginesize 0
fuelsystem 0
boreratio 0
stroke 0
compressionratio 0
horsepower 0
peakrpm 0
citympg 0
highwaympg 0
price 0
dtype: int64
```
7. If you find any missing values for numerical
variables, replace with its mean.
```python
df.fillna(df.mean(), inplace=True)
```
print(frequency_table)
```
Output:
```
sedan 96
hatchback
# assuming 'df' is the name of the data frame with the relevant columns
cross_tab = pd.crosstab(df['carbody'], df['enginelocation'], normalize='index')
print(cross_tab)
```
This will give us a table with the percentage of each engine location for each car body type.
```python
sedan_df = df[df['carbody'] == 'sedan']
print("Average price for sedan cars with rwd drive wheel:", rwd_mean_price)
print("Average price for sedan cars with fwd drive wheel:", fwd_mean_price)
```
description_by_carbody = grouped_by_carbody.describe()
print(description_by_carbody)
```
This will give us the summary statistics for each column, grouped by car body type.
```python
import matplotlib.pyplot as plt
engine_loc_counts = df['enginelocation'].value_counts()
engine_loc_counts.plot(kind='bar')
plt.title('Engine Location')
plt.xlabel('Location')
plt.ylabel('Count')
plt.show()
```
This will give us a bar chart with the count of each engine location.
This will give us a boxplot of the price column grouped by cylinder number.
# Access the second element of tuple1, which is a list, and then access the second element of
the list
print(tuple1[1][1])
```
Output:
```
20
```
element2 = tuple2[4]
print(element1, element2)
```
Output:
```
44 55
```
print(array)
```
Output:
```
[[100 110]
[120 130]
[140 150]
[160 170]
[180 190]]
```
Array1 = np.array([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
print(third_column)
```
Output:
```
[30 60 90]
```
Array2 = np.array([[34, 43, 73], [82, 22, 12], [53, 94, 66]])
Output:
```
[[34 73]
[82 12]
[53 66]]
```
print(concatenated_array)
```
Output:
```
[[1 2]
[3 4]
[5 6]
[7 8]]
```
print(List1)
```
Output:
```
[10, 20, 72, 30, 40, 65]
```