spark/testnc.py
2025-01-02 17:32:13 +08:00

14 lines
409 B
Python

from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("StreamingApp").getOrCreate() # 创建 SparkSession
df = spark.readStream.format("socket").option("host", "niit-node2").option("port", "9999").load()
df = df.selectExpr("explode(split(value, '\t'))as word") \
.groupBy("word") \
.count()
df.writeStream.outputMode("complete").format("console").start().awaitTermination()