feat:提交代码

This commit is contained in:
samlyy 2025-06-17 20:02:26 +08:00
commit c6ea6ca4ef
12 changed files with 442 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/.venv/

3
.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,3 @@
# 默认忽略的文件
/shelf/
/workspace.xml

View File

@ -0,0 +1,16 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="3">
<item index="0" class="java.lang.String" itemvalue="confluent-kafka" />
<item index="1" class="java.lang.String" itemvalue="pymysql" />
<item index="2" class="java.lang.String" itemvalue="Flask" />
</list>
</value>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml generated Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.8 (pythonProject) (2)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/pythonProject.iml" filepath="$PROJECT_DIR$/.idea/pythonProject.iml" />
</modules>
</component>
</project>

10
.idea/pythonProject.iml generated Normal file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.12" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

6
.idea/vcs.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

74
cf.py Normal file
View File

@ -0,0 +1,74 @@
from confluent_kafka import Consumer, KafkaException
import json
import time
# Kafka 配置
conf = {
'bootstrap.servers': 'niit-node3:9092',
'group.id': 'recommendation-group',
'auto.offset.reset': 'earliest'
}
consumer = Consumer(conf)
consumer.subscribe(['orders'])
# 状态存储category -> { product -> (total_rating, count) }
product_ratings = {}
def update_product_rating(category, product, rating):
if category not in product_ratings:
product_ratings[category] = {}
ratings = product_ratings[category]
if product in ratings:
total, cnt = ratings[product]
ratings[product] = (total + rating, cnt + 1)
else:
ratings[product] = (rating, 1)
def get_top_n_products(ratings_dict, n=3):
# 计算平均评分并排序
rated_products = [
(prod, total / cnt) for prod, (total, cnt) in ratings_dict.items()
]
rated_products.sort(key=lambda x: x[1], reverse=True)
return [p[0] for p in rated_products[:n]]
try:
while True:
msg = consumer.poll(timeout=1.0)
if msg is None:
continue
if msg.error():
if msg.error().code() == KafkaException._PARTITION_EOF:
print(f"Reached end of partition: {msg.topic()} [{msg.partition()}]")
else:
raise KafkaException(msg.error())
else:
# 解析消息内容
try:
# 注意Java 生产者发送的是 tab 分隔的字符串,不是 JSON
line = msg.value().decode('utf-8').strip()
parts = line.split('\t')
if len(parts) != 6:
continue
category, product, quantity, date, rating_str, is_valid = parts
if is_valid != "Y":
continue
rating = int(rating_str)
# 更新评分
update_product_rating(category, product, rating)
# 获取当前类别下的 Top 3
top3 = get_top_n_products(product_ratings[category])
print(f"[{category}] Top 3: {', '.join(top3)}")
except Exception as e:
print("Error parsing message:", str(e))
continue
except KeyboardInterrupt:
print("Stopping consumer...")
finally:
consumer.close()

113
cf2.py Normal file
View File

@ -0,0 +1,113 @@
from confluent_kafka import Consumer, KafkaException
import json
import time
import mysql.connector
from mysql.connector import Error
# =================== Kafka 配置 ===================
conf = {
'bootstrap.servers': 'niit-node3:9092',
'group.id': 'recommendation-group',
'auto.offset.reset': 'earliest'
}
consumer = Consumer(conf)
consumer.subscribe(['orders'])
# =================== MySQL 配置 ===================
mysql_config = {
'host': '172.16.5.3',
'port': 3306,
'user': 'root',
'password': '123456',
'database': 'orders_db'
}
# 创建数据库连接
try:
conn = mysql.connector.connect(**mysql_config)
if conn.is_connected():
print("✅ 成功连接到 MySQL 数据库")
cursor = conn.cursor()
except Error as e:
print("❌ 连接 MySQL 失败:", e)
exit(1)
# =================== 状态存储 ===================
product_ratings = {}
def update_product_rating(category, product, rating):
if category not in product_ratings:
product_ratings[category] = {}
ratings = product_ratings[category]
if product in ratings:
total, cnt = ratings[product]
ratings[product] = (total + rating, cnt + 1)
else:
ratings[product] = (rating, 1)
def get_top_n_products(ratings_dict, n=3):
rated_products = [
(prod, total / cnt) for prod, (total, cnt) in ratings_dict.items()
]
rated_products.sort(key=lambda x: x[1], reverse=True)
return [p[0] for p in rated_products[:n]]
# =================== 插入 MySQL 函数 ===================
def insert_top_products_to_mysql(category, top_products_list):
try:
# 将商品列表转为 JSON 字符串
top_products_json = json.dumps(top_products_list, ensure_ascii=False)
query = """
INSERT INTO category_top_products (category, top_products)
VALUES (%s, %s)
"""
cursor.execute(query, (category, top_products_json))
conn.commit()
print(f"🟢 已插入数据库:[{category}] {top_products_json}")
except Error as e:
print(f"🔴 插入数据库失败 [{category}]:", e)
conn.rollback()
# =================== 主循环 ===================
try:
while True:
msg = consumer.poll(timeout=1.0)
if msg is None:
continue
if msg.error():
if msg.error().code() == KafkaException._PARTITION_EOF:
print(f"Reached end of partition: {msg.topic()} [{msg.partition()}]")
else:
raise KafkaException(msg.error())
else:
try:
line = msg.value().decode('utf-8').strip()
parts = line.split('\t')
if len(parts) != 6:
continue
category, product, quantity, date, rating_str, is_valid = parts
if is_valid != "Y":
continue
rating = int(rating_str)
update_product_rating(category, product, rating)
top3 = get_top_n_products(product_ratings[category])
print(f"[{category}] Top 3: {', '.join(top3)}")
# 插入 MySQL
insert_top_products_to_mysql(category, top3)
except Exception as e:
print("⚠️ 解析消息错误:", str(e))
continue
except KeyboardInterrupt:
print("\n🛑 停止消费者...")
finally:
consumer.close()
cursor.close()
conn.close()
print("🔌 数据库和 Kafka 连接已关闭")

121
cf3.py Normal file
View File

@ -0,0 +1,121 @@
from confluent_kafka import Consumer, KafkaException
import json
import time
import mysql.connector
from mysql.connector import Error
from collections import defaultdict
# Kafka 配置
conf = {
'bootstrap.servers': 'niit-node3:9092',
'group.id': 'recommendation-group',
'auto.offset.reset': 'earliest'
}
consumer = Consumer(conf)
consumer.subscribe(['orders'])
# MySQL 配置
mysql_config = {
'host': '172.16.5.3',
'port': 3306,
'user': 'root',
'password': '123456',
'database': 'orders_db'
}
# 创建数据库连接
try:
conn = mysql.connector.connect(**mysql_config)
if conn.is_connected():
print(" 成功连接到 MySQL 数据库")
cursor = conn.cursor()
except Error as e:
print(" 连接 MySQL 失败:", e)
exit(1)
# =================== 状态存储
window_duration = 300 # 5 分钟
current_window_start = int(time.time())
# 缓存当前窗口的商品评分:{ category -> { product -> (total_rating, count) } }
product_ratings_window = defaultdict(lambda: defaultdict(lambda: [0, 0]))
def get_top_n_products(ratings_dict, n=3):
rated_products = [
(prod, total / cnt) for prod, (total, cnt) in ratings_dict.items()
]
rated_products.sort(key=lambda x: x[1], reverse=True)
return [p[0] for p in rated_products[:n]]
# =================== 插入 MySQL 函数
def insert_top_products_to_mysql(category, top_products_list, window_end_time):
try:
top_products_json = json.dumps(top_products_list, ensure_ascii=False)
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(window_end_time))
query = """
INSERT INTO category_top_products (category, top_products, timestamp)
VALUES (%s, %s, %s)
"""
cursor.execute(query, (category, top_products_json, timestamp))
conn.commit()
print(f" 已插入数据库:[{category}] {top_products_json} @ {timestamp}")
except Error as e:
print(f" 插入数据库失败 [{category}]:", e)
conn.rollback()
# =================== 主循环
try:
while True:
msg = consumer.poll(timeout=1.0)
current_time = int(time.time())
# 判断是否到了窗口结束时间
if current_time - current_window_start >= window_duration:
print(f"\n 开始处理从 {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(current_window_start))} 到现在的时间窗口...")
# 为每个类别生成 Top 3 推荐
for category, products in product_ratings_window.items():
top3 = get_top_n_products(products)
insert_top_products_to_mysql(category, top3, current_time)
# 清空窗口缓存,更新窗口起始时间
product_ratings_window.clear()
current_window_start = current_time
print(" 窗口已重置\n")
# 如果有消息,则继续处理
if msg is None:
continue
if msg.error():
if msg.error().code() == KafkaException._PARTITION_EOF:
print(f"Reached end of partition: {msg.topic()} [{msg.partition()}]")
else:
raise KafkaException(msg.error())
else:
try:
line = msg.value().decode('utf-8').strip()
parts = line.split('\t')
if len(parts) != 6:
continue
category, product, quantity, date, rating_str, is_valid = parts
if is_valid != "Y":
continue
rating = int(rating_str)
# 更新当前窗口评分
product_ratings_window[category][product][0] += rating
product_ratings_window[category][product][1] += 1
except Exception as e:
print("⚠ 解析消息错误:", str(e))
continue
except KeyboardInterrupt:
print("\n 停止消费者...")
finally:
consumer.close()
cursor.close()
conn.close()
print(" 数据库和 Kafka 连接已关闭")

77
cf_test.py Normal file
View File

@ -0,0 +1,77 @@
from collections import defaultdict
import random
# 商品类别和商品名称映射
CATEGORIES = ["电器", "服饰", "食品", "玩具", "手机"]
PRODUCT_NAMES = {
"电器": ["电视", "冰箱", "洗衣机", "空调", "吸尘器", "电饭煲", "微波炉", "电磁炉", "热水器", "空气净化器"],
"服饰": ["T恤", "牛仔裤", "羽绒服", "衬衫", "运动鞋", "夹克", "卫衣", "连衣裙", "短裤", "风衣"],
"食品": ["巧克力", "饼干", "方便面", "牛奶", "饮料", "面包", "糖果", "果冻", "薯片", "蛋挞"],
"玩具": ["积木", "拼图", "玩偶", "遥控车", "毛绒玩具", "魔方", "乐高", "变形金刚", "洋娃娃", "电子琴"],
"手机": ["华为", "苹果", "小米", "OPPO", "vivo", "荣耀", "三星", "魅族", "联想", "努比亚"]
}
# 用户评分 (0, 50, 100) -> 不喜欢、还行、很喜欢
RATINGS = [0, 50, 100]
IS_VALID = ["Y", "N"]
# ================== 生成假数据 ==================
def generate_fake_orders(n=50):
orders = []
for _ in range(n):
category = random.choice(CATEGORIES)
product = random.choice(PRODUCT_NAMES[category])
rating = random.choice(RATINGS)
is_valid = random.choice(IS_VALID)
orders.append({
'category': category,
'product': product,
'rating': rating,
'isValid': is_valid
})
return orders
# ================== 推荐逻辑 ==================
def recommend_top_n(orders, n=3):
# 存储:{ category: { product: (total_rating, count) } }
product_ratings = defaultdict(lambda: defaultdict(lambda: [0, 0])) # [总评分, 数量]
for order in orders:
if order['isValid'] != 'Y':
continue
category = order['category']
product = order['product']
rating = order['rating']
product_ratings[category][product][0] += rating
product_ratings[category][product][1] += 1
# 计算平均评分并排序
recommendations = {}
for category, products in product_ratings.items():
rated_products = [
(product, total / count) for product, (total, count) in products.items()
]
rated_products.sort(key=lambda x: x[1], reverse=True)
recommendations[category] = [p[0] for p in rated_products[:n]]
return recommendations
# ================== 主程序入口 ==================
if __name__ == '__main__':
fake_data = generate_fake_orders(100)
print("=== 假数据样本 ===")
for d in fake_data[:10]: # 打印前10条数据看看
print(d)
print("\n=== 开始推荐 Top 3 商品 ===")
result = recommend_top_n(fake_data, n=3)
for category, top_products in result.items():
print(f"[{category}] 推荐商品:{', '.join(top_products)}")