from pyspark.sql import SparkSession import os os.environ['JAVA_HOME'] = 'D:\CodeDevelopment\DevelopmentEnvironment\Java\jdk-17.0.5' os.environ['HADOOP_HOME'] = 'D:\CodeDevelopment\DevelopmentEnvironment\hadoop-2.8.1' # 创建 SparkSession spark = SparkSession \ .builder \ .appName("Kafka Example") \ .master("local[*]") \ .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.1.2") \ .config("spark.executorEnv.PATH", "D:\CodeDevelopment\DevelopmentEnvironment\Java\jdk-17.0.5") \ .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0") \ .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ .getOrCreate() # 读取 Kafka 数据 df = spark.readStream.format("kafka").option("kafka.bootstrap.servers", "niit-node2:9092").option("subscribe", "orders").load() df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") # 展示数据 df.show()