import requests
import csv
import json
import time
import random
from urllib.parse import unquote
class QQWuxiaRankSpiderPro:
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
"Referer": "https://wuxia.qq.com/",
"Origin": "https://wuxia.qq.com",
"Content-Type": "application/x-www-form-urlencoded", # 新增必要请求头
"Cookie": "运行时候复制一个新的cookie过来" # 更新Cookie
}
self.base_params = {
"iChartId": 286635,
"iSubChartId": 286635,
"aid": 999,
"type": 158
}
self.session = requests.Session()
self.total_pages = None
self.slde_token = "运行时候复制一个新的sldeToken过来" # 从请求路径中提取的最新sldeToken
def _generate_request_data(self, page):
"""生成POST请求体数据"""
return {
"iChartId": 286635,
"iSubChartId": 286635,
"sldeToken": self.slde_token, # 修正参数名
"aid": 999,
"type": 158,
"page": page,
"key": "" # 根据实际请求参数调整
}
def _decode_unicode(self, text):
"""双重解码Unicode字符串"""
try:
return bytes(text, 'utf-8').decode('unicode_escape')
except:
return text
def _decode_data_item(self, item):
"""精细化数据字段解析"""
decoded = {
"排名": item.get("rand", "N/A"),
"角色名": self._decode_unicode(item.get("sRoleName", "")).replace('\x7f', '').strip(),
"门派": self._decode_unicode(item.get("sMenPaiName", "未知门派")),
"服务器": self._decode_unicode(item.get("sAreaName", "")),
"帮派": unquote(item.get("sBangPaiName", "")),
"战力值": item.get("iResult", "隐藏数值"),
"记录时间": item.get("dtRecInsert", "未知时间"),
"留言": self._decode_unicode(item.get("sIntroduce", "")).strip(),
"性别": "女" if item.get("gender") == "1" else "男",
"头像URL": item.get("sHeadUrl", "")
}
if str(decoded["战力值"]).isdigit():
decoded["战力值"] = int(decoded["战力值"])
if str(decoded["排名"]).isdigit():
decoded["排名"] = int(decoded["排名"])
return decoded
def fetch_page_data(self, page):
"""获取并解析单页数据"""
try:
time.sleep(random.uniform(3.0, 5.0))
# 改为POST请求并发送表单数据
response = self.session.post(
"https://comm.ams.game.qq.com/ide/", # 修正请求域名
headers=self.headers,
data=self._generate_request_data(page), # 发送表单数据
timeout=20
)
if response.status_code != 200:
print(f"HTTP异常状态码:{response.status_code}")
return None, None
try:
json_data = response.json()
except json.JSONDecodeError as e:
print(f"JSON解析失败:{str(e)}")
return None, None
if json_data.get("ret") != 0 or json_data.get("iRet") != 0:
print(f"接口异常:{json_data.get('sMsg')}")
return None, None
j_data = json_data.get("jData", {})
total_pages = int(j_data.get("iCountPage", 0))
current_page = int(j_data.get("iNowPage", 0))
rank_list = j_data.get("jRankList", [])
if not isinstance(rank_list, list):
return None, None
decoded_data = []
for item in rank_list:
try:
decoded_data.append(self._decode_data_item(item))
except Exception as e:
print(f"数据解析异常:{str(e)}")
return decoded_data, total_pages
except Exception as e:
print(f"请求异常:{str(e)}")
return None, None
def save_to_csv(self, data, filename="wuxia_rank.csv"):
"""保存数据"""
if not data:
return
with open(filename, "a", newline="", encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=[
"排名", "角色名", "门派", "服务器", "帮派",
"战力值", "记录时间", "留言", "性别", "头像URL"
])
if f.tell() == 0:
writer.writeheader()
writer.writerows(data)
def run(self):
"""采集控制"""
current_page = 1
max_retry = 3
print("=== 开始采集天涯明月刀全服功力榜 ===")
# 初始化文件
with open("wuxia_rank.csv", "w", encoding="utf-8-sig") as f:
pass
while True:
data, total_pages = self.fetch_page_data(current_page)
retry_count = 0
while not data and retry_count < max_retry:
retry_count += 1
print(f"第{retry_count}次重试...")
time.sleep(3)
data, total_pages = self.fetch_page_data(current_page)
if not data:
print(f"第{current_page}页采集失败")
break
self.save_to_csv(data)
print(f"第{current_page}页已保存({len(data)}条)")
if total_pages and current_page >= total_pages:
print("采集完成")
break
current_page += 1
print("=== 采集结束 ===")
if __name__ == "__main__":
spider = QQWuxiaRankSpiderPro()
spider.run()
import csv
import json
import time
import random
from urllib.parse import unquote
class QQWuxiaRankSpiderPro:
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
"Referer": "https://wuxia.qq.com/",
"Origin": "https://wuxia.qq.com",
"Content-Type": "application/x-www-form-urlencoded", # 新增必要请求头
"Cookie": "运行时候复制一个新的cookie过来" # 更新Cookie
}
self.base_params = {
"iChartId": 286635,
"iSubChartId": 286635,
"aid": 999,
"type": 158
}
self.session = requests.Session()
self.total_pages = None
self.slde_token = "运行时候复制一个新的sldeToken过来" # 从请求路径中提取的最新sldeToken
def _generate_request_data(self, page):
"""生成POST请求体数据"""
return {
"iChartId": 286635,
"iSubChartId": 286635,
"sldeToken": self.slde_token, # 修正参数名
"aid": 999,
"type": 158,
"page": page,
"key": "" # 根据实际请求参数调整
}
def _decode_unicode(self, text):
"""双重解码Unicode字符串"""
try:
return bytes(text, 'utf-8').decode('unicode_escape')
except:
return text
def _decode_data_item(self, item):
"""精细化数据字段解析"""
decoded = {
"排名": item.get("rand", "N/A"),
"角色名": self._decode_unicode(item.get("sRoleName", "")).replace('\x7f', '').strip(),
"门派": self._decode_unicode(item.get("sMenPaiName", "未知门派")),
"服务器": self._decode_unicode(item.get("sAreaName", "")),
"帮派": unquote(item.get("sBangPaiName", "")),
"战力值": item.get("iResult", "隐藏数值"),
"记录时间": item.get("dtRecInsert", "未知时间"),
"留言": self._decode_unicode(item.get("sIntroduce", "")).strip(),
"性别": "女" if item.get("gender") == "1" else "男",
"头像URL": item.get("sHeadUrl", "")
}
if str(decoded["战力值"]).isdigit():
decoded["战力值"] = int(decoded["战力值"])
if str(decoded["排名"]).isdigit():
decoded["排名"] = int(decoded["排名"])
return decoded
def fetch_page_data(self, page):
"""获取并解析单页数据"""
try:
time.sleep(random.uniform(3.0, 5.0))
# 改为POST请求并发送表单数据
response = self.session.post(
"https://comm.ams.game.qq.com/ide/", # 修正请求域名
headers=self.headers,
data=self._generate_request_data(page), # 发送表单数据
timeout=20
)
if response.status_code != 200:
print(f"HTTP异常状态码:{response.status_code}")
return None, None
try:
json_data = response.json()
except json.JSONDecodeError as e:
print(f"JSON解析失败:{str(e)}")
return None, None
if json_data.get("ret") != 0 or json_data.get("iRet") != 0:
print(f"接口异常:{json_data.get('sMsg')}")
return None, None
j_data = json_data.get("jData", {})
total_pages = int(j_data.get("iCountPage", 0))
current_page = int(j_data.get("iNowPage", 0))
rank_list = j_data.get("jRankList", [])
if not isinstance(rank_list, list):
return None, None
decoded_data = []
for item in rank_list:
try:
decoded_data.append(self._decode_data_item(item))
except Exception as e:
print(f"数据解析异常:{str(e)}")
return decoded_data, total_pages
except Exception as e:
print(f"请求异常:{str(e)}")
return None, None
def save_to_csv(self, data, filename="wuxia_rank.csv"):
"""保存数据"""
if not data:
return
with open(filename, "a", newline="", encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=[
"排名", "角色名", "门派", "服务器", "帮派",
"战力值", "记录时间", "留言", "性别", "头像URL"
])
if f.tell() == 0:
writer.writeheader()
writer.writerows(data)
def run(self):
"""采集控制"""
current_page = 1
max_retry = 3
print("=== 开始采集天涯明月刀全服功力榜 ===")
# 初始化文件
with open("wuxia_rank.csv", "w", encoding="utf-8-sig") as f:
pass
while True:
data, total_pages = self.fetch_page_data(current_page)
retry_count = 0
while not data and retry_count < max_retry:
retry_count += 1
print(f"第{retry_count}次重试...")
time.sleep(3)
data, total_pages = self.fetch_page_data(current_page)
if not data:
print(f"第{current_page}页采集失败")
break
self.save_to_csv(data)
print(f"第{current_page}页已保存({len(data)}条)")
if total_pages and current_page >= total_pages:
print("采集完成")
break
current_page += 1
print("=== 采集结束 ===")
if __name__ == "__main__":
spider = QQWuxiaRankSpiderPro()
spider.run()