Skip to content

Commit 860be30

Browse files
Coder2jCoder2j
Coder2j
authored and
Coder2j
committed
feature/add-source-code
* added README for the repo * added license * added all the jupyter notebooks used in the video
1 parent fa5f3ee commit 860be30

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3360
-0
lines changed

.gitignore

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/#use-with-ide
110+
.pdm.toml
111+
112+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113+
__pypackages__/
114+
115+
# Celery stuff
116+
celerybeat-schedule
117+
celerybeat.pid
118+
119+
# SageMath parsed files
120+
*.sage.py
121+
122+
# Environments
123+
.env
124+
.venv
125+
env/
126+
venv/
127+
ENV/
128+
env.bak/
129+
venv.bak/
130+
131+
# Spyder project settings
132+
.spyderproject
133+
.spyproject
134+
135+
# Rope project settings
136+
.ropeproject
137+
138+
# mkdocs documentation
139+
/site
140+
141+
# mypy
142+
.mypy_cache/
143+
.dmypy.json
144+
dmypy.json
145+
146+
# Pyre type checker
147+
.pyre/
148+
149+
# pytype static type analyzer
150+
.pytype/
151+
152+
# Cython debug symbols
153+
cython_debug/
154+
155+
# PyCharm
156+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158+
# and can be added to the global gitignore or merged into this file. For a more nuclear
159+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160+
#.idea/

01-PySpark-Get-Started.ipynb

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "fe9fd1c0-db30-47b1-bbe2-0b1cbd97a9e2",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"# Set the PySpark environment variables\n",
11+
"import os\n",
12+
"os.environ['SPARK_HOME'] = \"/Users/coder2j/Apps/Spark\"\n",
13+
"os.environ['PYSPARK_DRIVER_PYTHON'] = 'jupyter'\n",
14+
"os.environ['PYSPARK_DRIVER_PYTHON_OPTS'] = 'lab'\n",
15+
"os.environ['PYSPARK_PYTHON'] = 'python'"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": 2,
21+
"id": "4a915758-1498-4831-820b-a44fd888e87b",
22+
"metadata": {},
23+
"outputs": [],
24+
"source": [
25+
"# Import PySpark\n",
26+
"from pyspark.sql import SparkSession"
27+
]
28+
},
29+
{
30+
"cell_type": "code",
31+
"execution_count": 3,
32+
"id": "bb53020b-1e79-4893-a13a-4968fa120fa3",
33+
"metadata": {},
34+
"outputs": [
35+
{
36+
"name": "stderr",
37+
"output_type": "stream",
38+
"text": [
39+
"Setting default log level to \"WARN\".\n",
40+
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
41+
"23/06/25 21:26:04 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
42+
]
43+
}
44+
],
45+
"source": [
46+
"# Create a SparkSession\n",
47+
"spark = SparkSession.builder \\\n",
48+
" .appName(\"PySpark-Get-Started\") \\\n",
49+
" .getOrCreate()"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": 4,
55+
"id": "dde43975-a1f5-4ad1-88a3-76eb84215f2b",
56+
"metadata": {},
57+
"outputs": [
58+
{
59+
"name": "stderr",
60+
"output_type": "stream",
61+
"text": [
62+
" \r"
63+
]
64+
},
65+
{
66+
"name": "stdout",
67+
"output_type": "stream",
68+
"text": [
69+
"+-------+---+\n",
70+
"| Name|Age|\n",
71+
"+-------+---+\n",
72+
"| Alice| 25|\n",
73+
"| Bob| 30|\n",
74+
"|Charlie| 35|\n",
75+
"+-------+---+\n",
76+
"\n"
77+
]
78+
}
79+
],
80+
"source": [
81+
"# Test the setup\n",
82+
"data = [(\"Alice\", 25), (\"Bob\", 30), (\"Charlie\", 35)]\n",
83+
"df = spark.createDataFrame(data, [\"Name\", \"Age\"])\n",
84+
"df.show()"
85+
]
86+
},
87+
{
88+
"cell_type": "code",
89+
"execution_count": null,
90+
"id": "096c5a89-058c-488a-9d9e-146fdb6a44dd",
91+
"metadata": {},
92+
"outputs": [],
93+
"source": []
94+
}
95+
],
96+
"metadata": {
97+
"kernelspec": {
98+
"display_name": "Python 3 (ipykernel)",
99+
"language": "python",
100+
"name": "python3"
101+
},
102+
"language_info": {
103+
"codemirror_mode": {
104+
"name": "ipython",
105+
"version": 3
106+
},
107+
"file_extension": ".py",
108+
"mimetype": "text/x-python",
109+
"name": "python",
110+
"nbconvert_exporter": "python",
111+
"pygments_lexer": "ipython3",
112+
"version": "3.10.8"
113+
}
114+
},
115+
"nbformat": 4,
116+
"nbformat_minor": 5
117+
}

0 commit comments

Comments
 (0)