Colab Spark Initialize Step

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 1

# Open colab and paste the following in a cell and execute

!apt-get update # Update apt-get repository.


!apt-get install openjdk-8-jdk-headless -qq > /dev/null # Install Java.
!wget -q http://archive.apache.org/dist/spark/spark-3.1.1/spark-3.1.1-bin-
hadoop3.2.tgz # Download Apache Sparks.
!tar xf spark-3.1.1-bin-hadoop3.2.tgz # Unzip the tgz file.
!pip install -q findspark # Install findspark. Adds PySpark to the System path
during runtime.

# Set environment variables


import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.1.1-bin-hadoop3.2"

!ls

# Initialize findspark
import findspark
findspark.init()

# Create a PySpark session


from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()

#Create a PySpark session


from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()

spark = SparkSession.builder.appName("nik").getOrCreate()

from pyspark import SparkContext


sc = SparkContext.getOrCreate()

You might also like