27 lines
1.0 KiB
Python
27 lines
1.0 KiB
Python
from pyspark.sql import SparkSession
|
|
import os
|
|
|
|
os.environ['JAVA_HOME'] = 'D:\CodeDevelopment\DevelopmentEnvironment\Java\jdk-17.0.5'
|
|
os.environ['HADOOP_HOME'] = 'D:\CodeDevelopment\DevelopmentEnvironment\hadoop-2.8.1'
|
|
# 创建 SparkSession
|
|
spark = SparkSession \
|
|
.builder \
|
|
.appName("Kafka Example") \
|
|
.master("local[*]") \
|
|
.config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.1.2") \
|
|
.config("spark.executorEnv.PATH", "D:\CodeDevelopment\DevelopmentEnvironment\Java\jdk-17.0.5") \
|
|
.config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0") \
|
|
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
|
|
.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
|
|
.getOrCreate()
|
|
|
|
|
|
|
|
# 读取 Kafka 数据
|
|
df = spark.readStream.format("kafka").option("kafka.bootstrap.servers", "niit-node2:9092").option("subscribe", "orders").load()
|
|
df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
|
|
|
|
# 展示数据
|
|
df.show()
|
|
|