diff --git a/.gitignore b/.gitignore index bcafc10..367f488 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,5 @@ dist-ssr *.local /pywxdump/ui/web/* /pywxdump/ui/web/assets/* +/pywxdump/wxdump_work +test2.py diff --git a/pywxdump/analyzer/utils.py b/pywxdump/analyzer/utils.py index 5de07ed..8f1d285 100644 --- a/pywxdump/analyzer/utils.py +++ b/pywxdump/analyzer/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*-# # ------------------------------------------------------------------------------- -# Name: utils.py +# Name: api_utils.py # Description: # Author: xaoyaoo # Date: 2023/12/03 diff --git a/pywxdump/api/api_utils/__init__.py b/pywxdump/api/api_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pywxdump/api/api_utils/dify.py b/pywxdump/api/api_utils/dify.py new file mode 100644 index 0000000..f43a193 --- /dev/null +++ b/pywxdump/api/api_utils/dify.py @@ -0,0 +1,14 @@ +# 接入dify工作流,用来制作微信聊天记录可视化 + +class Dify(object): + def __init__(self): + api_key = "" + api_base_url = "" + + #TODO: 文件上传 + def file_upload(self): + pass + + #TODO: 运行工作流 + def run_workflows(self): + pass \ No newline at end of file diff --git a/pywxdump/api/api_utils/html.py b/pywxdump/api/api_utils/html.py new file mode 100644 index 0000000..a1df3c1 --- /dev/null +++ b/pywxdump/api/api_utils/html.py @@ -0,0 +1,939 @@ +# 创建html可视化页面 +import json +import re + + +class HtmlController(object): + def __init__(self): + pass + + # 根据json返回html + def create_html(self,json_data) -> str: + json_data = r"```json\n{\n\"header\": {\n\"title\": \"群聊报告\",\n\"date\": \"2025-04-29\",\n\"metaInfo\": {\n\"totalMessages\": \"35\",\n\"activeUsers\": \"12\",\n\"timeRange\": \"07:03:10 - 15:36:25\"\n}\n},\n\"sections\": {\n\"hotTopics\": {\n\"items\": [\n{\n\"name\": \"AI技术讨论\",\n\"category\": \"科技\",\n\"summary\": \"群内围绕Qwen3开源、Vidu Q1体验、夸克AI相机等AI技术展开热烈讨论,涉及模型性能、应用场景和开发者体验。多位成员分享了相关技术文章和体验报告。\",\n\"keywords\": [\"Qwen3\", \"Vidu\", \"AI相机\"],\n\"mentions\": \"15\"\n},\n{\n\"name\": \"熬夜与工作压力\",\n\"category\": \"生活\",\n\"summary\": \"成员们讨论熬夜工作现象,分享各自熬夜经历,对比互联网大厂与普通开发者的工作强度差异,引发关于工作生活平衡的思考。\",\n\"keywords\": [\"熬夜\", \"加班\", \"工作强度\"],\n\"mentions\": \"8\"\n}\n]\n},\n\"tutorials\": {\n\"items\": [\n{\n\"type\": \"TUTORIAL\",\n\"title\": \"体验完刚上线的Vidu Q1,后劲有点大(附AI视频创作教程)\",\n\"sharedBy\": \"苍何\",\n\"time\": \"2025-04-29 09:39:42\",\n\"summary\": \"分享Vidu Q1 AI视频创作工具的体验和教程,介绍其清晰度和一致性的提升。\",\n\"keyPoints\": [\"AI视频清晰度提升\", \"一致性改进\"],\n\"url\": \"http://mp.weixin.qq.com/s?__biz=MzU4NTE1Mjg4MA==&mid=2247493267&idx=1&sn=0189fb501578ce8e27142fbe2f590d03&chksm=fc9a946728c367005c19cb5a335300d05d51a441f9f20424a0a72c904a47bdf003252576318a&mpshare=1&scene=1&srcid=04297l70B2zsuypDfjUh0rh5&sharer_shareinfo=181efb947f938ab90786c776bf7bbda7&sharer_shareinfo_first=181efb947f938ab90786c776bf7bbda7#rd\",\n\"domain\": \"mp.weixin.qq.com\",\n\"category\": \"AI工具\"\n},\n{\n\"type\": \"TUTORIAL\",\n\"title\": \"阿里新出的夸克AI相机,强大到我有点陌生\",\n\"sharedBy\": \"苍何\",\n\"time\": \"2025-04-29 09:42:38\",\n\"summary\": \"介绍夸克AI相机的新奇玩法和功能,展示其强大的AI图像处理能力。\",\n\"keyPoints\": [\"新奇玩法\", \"抽象功能\"],\n\"url\": \"http://mp.weixin.qq.com/s?__biz=MzU4NTE1Mjg4MA==&mid=2247493275&idx=1&sn=93556ddd1da7fb8733a23a7c4adbb76b&chksm=fc2a2d25774cce23c75acd8850b85c585c0bcf78d14b810e157efaec5106abf563cf58e26aef&mpshare=1&scene=1&srcid=0429vDf8NbEzNLBQQyFlABmU&sharer_shareinfo=28b94477ec8201b88aa30338e82e8999&sharer_shareinfo_first=28b94477ec8201b88aa30338e82e8999#rd\",\n\"domain\": \"mp.weixin.qq.com\",\n\"category\": \"AI应用\"\n},\n{\n\"type\": \"RESOURCE\",\n\"title\": \"仅2MB,Windows瞬间超级丝滑!\",\n\"sharedBy\": \"AHapi²⁰²⁵\",\n\"time\": \"2025-04-29 11:13:38\",\n\"summary\": \"分享一款轻量级Windows优化工具,声称能显著提升系统运行速度。\",\n\"keyPoints\": [\"2MB大小\", \"系统优化\"],\n\"url\": \"https://mp.weixin.qq.com/s/es77Jc6Du03ppJD5XJeQUg\",\n\"domain\": \"mp.weixin.qq.com\",\n\"category\": \"系统工具\"\n}\n]\n},\n\"importantMessages\": {\n\"items\": [\n{\n\"time\": \"2025-04-29 10:00:18\",\n\"sender\": \"苍何\",\n\"type\": \"NEWS\",\n\"priority\": \"高\",\n\"content\": \"2025年04月29日 AI科技早报:阿里开源8款Qwen3模型,腾讯开源Kuikly跨端框架,OpenAI推出ChatGPT购物功能等11条重要新闻。\",\n\"fullContent\": \"2025年04月29日 AI科技早报1、阿里开源8款Qwen3模型,集成MCP,性能超DeepSeek-R1、OpenAI o1。2、Qafind Labs发布ChatDLM扩散语言模型,推理速度高达2800 tokens/s。3、腾讯开源Kuikly跨端框架,基于Kotlin支持多平台开发,已应用于QQ。4、OpenAI 推出 ChatGPT 购物功能,用户可通过 ChatGPT 便捷购物。5、字节Seed团队提出PHD-Transformer,突破预训练长度扩展瓶颈。6、百度发布文心快码3.5版本与多模态AI智能体Zulu,助力工程师提效。7、Kimi与财新传媒合作,提供专业财经内容,推动AI+传统媒体融合。8、苹果加速「N50」智能眼镜项目,融合AI技术预计2027年亮相。9、研究显示OpenAI o3在病毒学领域超越94%人类专家,生物安全引关注。10、华为测试自研AI芯片Ascend 910D,旨在替代英伟达H100芯片。11、🔥【记得收藏】早报同步更新到开源 AI 知识库:https://u55dyuejxc.feishu.cn/wiki/FkmNwxYHDigJ3akIUGHc8MSTn4d\"\n}\n]\n},\n\"dialogues\": {\n\"items\": [\n{\n\"type\": \"DIALOGUE\",\n\"messages\": [\n{\n\"speaker\": \"好名字\",\n\"time\": \"2025-04-29 08:16:23\",\n\"content\": \"这个我弄完,ai做的小程序有bug,流程走不通,还改不了[捂脸]\"\n},\n{\n\"speaker\": \"贾👦🏻\",\n\"time\": \"2025-04-29 08:54:33\",\n\"content\": \"可以微调 不过源码需要买的\"\n},\n{\n\"speaker\": \"好名字\",\n\"time\": \"2025-04-29 09:13:32\",\n\"content\": \"微调一次,然后再想调就需要开会员了\"\n},\n{\n\"speaker\": \"贾👦🏻\",\n\"time\": \"2025-04-29 09:14:09\",\n\"content\": \"需求变更一个字 就需要重新购买[破涕为笑]\"\n}\n],\n\"highlight\": \"AI小程序开发中的商业化模式讨论\",\n\"relatedTopic\": \"AI开发工具\"\n},\n{\n\"type\": \"DIALOGUE\",\n\"messages\": [\n{\n\"speaker\": \"苍何\",\n\"time\": \"2025-04-29 09:26:49\",\n\"content\": \"我熬不动\"\n},\n{\n\"speaker\": \"AHapi²⁰²⁵\",\n\"time\": \"2025-04-29 09:27:25\",\n\"content\": \"不要卷别人[旺柴]别人写了 就不卷他们了\"\n},\n{\n\"speaker\": \"苍何\",\n\"time\": \"2025-04-29 09:27:55\",\n\"content\": \"新闻得第一时间,做不到写了也没啥用\"\n},\n{\n\"speaker\": \"苍何\",\n\"time\": \"2025-04-29 09:28:03\",\n\"content\": \"还不如写些应用\"\n},\n{\n\"speaker\": \"大风(Wind)\",\n\"time\": \"2025-04-29 09:28:23\",\n\"content\": \"看看哪些是5-7点发推文的,基本都是卷王了\"\n},\n{\n\"speaker\": \"沉默王二\",\n\"time\": \"2025-04-29 09:28:44\",\n\"content\": \"身体能扛住确实离谱\"\n},\n{\n\"speaker\": \"苍何\",\n\"time\": \"2025-04-29 09:29:03\",\n\"content\": \"是啊,太肝了\"\n}\n],\n\"highlight\": \"关于工作强度和熬夜文化的讨论\",\n\"relatedTopic\": \"工作生活平衡\"\n}\n]\n},\n\"qa\": {\n\"items\": [\n{\n\"question\": {\n\"asker\": \"银色子弹-捷\",\n\"time\": \"2025-04-29 11:10:26\",\n\"content\": \"问一下win11电脑,你长时间没清理,运行慢,一般用什么来清理电脑? 不要360啊,那个太流氓了,想知道各位大佬有没有优秀的软件推荐一下\",\n\"tags\": [\"Windows优化\", \"系统清理\"]\n},\n\"answers\": [\n{\n\"responder\": \"昏沉沉的\",\n\"time\": \"2025-04-29 11:11:59\",\n\"content\": \"ccclean\",\n\"isAccepted\": false\n},\n{\n\"responder\": \"🤑程序儒\",\n\"time\": \"2025-04-29 11:13:07\",\n\"content\": \"360极速版、Wise Care 365\",\n\"isAccepted\": false\n},\n{\n\"responder\": \"AHapi²⁰²⁵\",\n\"time\": \"2025-04-29 11:13:38\",\n\"content\": \"仅2MB,Windows瞬间超级丝滑!这才是,真神器!\",\n\"isAccepted\": true\n}\n]\n},\n{\n\"question\": {\n\"asker\": \"ಠ_ಠ 闲鱼一条ಠ_ಠ\",\n\"time\": \"2025-04-29 11:37:49\",\n\"content\": \"请问哪位哥还有扣子的邀请码吗?\",\n\"tags\": [\"邀请码\", \"扣子空间\"]\n},\n\"answers\": [\n{\n\"responder\": \"贾👦🏻\",\n\"time\": \"2025-04-29 11:40:37\",\n\"content\": \"RootUser_2105656329 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!https://www.coze.cn/space-preview?invite_code=SCL7DAL0\",\n\"isAccepted\": true\n},\n{\n\"responder\": \"9527\",\n\"time\": \"2025-04-29 11:47:43\",\n\"content\": \"RootUser_2106519373 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!https://www.coze.cn/space-preview?invite_code=A8IT4MUE\",\n\"isAccepted\": false\n}\n]\n}\n]\n},\n\"analytics\": {\n\"heatmap\": [\n{\n\"topic\": \"AI技术\",\n\"percentage\": \"45%\",\n\"color\": \"#3da9fc\",\n\"count\": \"16\"\n},\n{\n\"topic\": \"工作讨论\",\n\"percentage\": \"25%\",\n\"color\": \"#4361ee\",\n\"count\": \"9\"\n},\n{\n\"topic\": \"工具推荐\",\n\"percentage\": \"15%\",\n\"color\": \"#00b4d8\",\n\"count\": \"5\"\n},\n{\n\"topic\": \"其他\",\n\"percentage\": \"15%\",\n\"color\": \"#7209b7\",\n\"count\": \"5\"\n}\n],\n\"chattyRanking\": [\n{\n\"rank\": 1,\n\"name\": \"苍何\",\n\"count\": \"7\",\n\"characteristics\": [\"技术分享\", \"新闻发布\"],\n\"commonWords\": [\"AI\", \"开源\", \"熬夜\"]\n},\n{\n\"rank\": 2,\n\"name\": \"AHapi²⁰²⁵\",\n\"count\": \"6\",\n\"characteristics\": [\"幽默评论\", \"资源分享\"],\n\"commonWords\": [\"旺柴\", \"加班\", \"神器\"]\n},\n{\n\"rank\": 3,\n\"name\": \"贾👦🏻\",\n\"count\": \"3\",\n\"characteristics\": [\"问题解答\", \"邀请码分享\"],\n\"commonWords\": [\"源码\", \"购买\", \"邀请\"]\n}\n],\n\"nightOwl\": {\n\"name\": \"苍何\",\n\"title\": \"熬夜冠军\",\n\"latestTime\": \"09:42:54\",\n\"messageCount\": \"7\",\n\"lastMessage\": \"我熬夜写了这一篇[旺柴]\"\n}\n},\n\"wordCloud\": {\n\"words\": [\n{\n\"text\": \"AI\",\n\"size\": 42,\n\"color\": \"#00b4d8\",\n\"rotation\": 0\n},\n{\n\"text\": \"熬夜\",\n\"size\": 36,\n\"color\": \"#4361ee\",\n\"rotation\": -15\n},\n{\n\"text\": \"开源\",\n\"size\": 32,\n\"color\": \"#00b4d8\",\n\"rotation\": 15\n},\n{\n\"text\": \"Qwen3\",\n\"size\": 28,\n\"color\": \"#3da9fc\",\n\"rotation\": -10\n},\n{\n\"text\": \"Vidu\",\n\"size\": 26,\n\"color\": \"#3da9fc\",\n\"rotation\": 10\n},\n{\n\"text\": \"清理\",\n\"size\": 24,\n\"color\": \"#7209b7\",\n\"rotation\": -5\n},\n{\n\"text\": \"邀请码\",\n\"size\": 22,\n\"color\": \"#7209b7\",\n\"rotation\": 5\n}\n],\n\"legend\": [\n{\"color\": \"#00b4d8\", \"label\": \"技术 相关词汇\"},\n{\"color\": \"#4361ee\", \"label\": \"生活 相关词汇\"},\n{\"color\": \"#7209b7\", \"label\": \"工具 相关词汇\"}\n]\n}\n},\n\"footer\": {\n\"dataSource\": \"群聊聊天记录\",\n\"generationTime\": \"2025-04-29 16:00:00\",\n\"statisticalPeriod\": \"2025-04-29 07:03:10 - 15:36:25\",\n\"disclaimer\": \"本报告内容基于群聊公开讨论,如有不当内容或侵权问题请联系管理员处理。\"\n}\n}\n```" + + # 加载模板 + html = """ + + + + + [群/用户名称]日报 - [日期] + + + +
+

[群/用户名称]日报

+

[日期]

+
+ 总消息数:[数量] + 活跃用户:[数量] + 时间范围:[时间范围] +
+
+ + + + +
+

今日讨论热点

+
+ + + + +
+
+ + +
+

实用教程与资源分享

+
+ + + + +
+
+ + +
+

重要消息汇总

+
+ + + + +
+
+ + +
+

有趣对话或金句

+
+ + + + +
+
+ + +
+

问题与解答

+
+ + + + +
+
+ + +
+

群内数据可视化

+ + +

话题热度

+
+ + + + + + +
+ + + +
+

话唠榜

+
+ + +
+
+ + + +

熬夜冠军

+
+ + + +
+
+ + +
+

热门词云

+
+ +
+ + + + + +
+ +
+ + + +
+
+
+ + + + + + + + """ + + json_data = json_data[7:-3] + # 清洗json_data + # 判断是否是转义的换行符 + if '\n' in json_data: + json_data = json_data.replace('\n', '\n') + + + else: + json_data = json_data.replace(r'\"', '"').replace(r"\n", '\n') + + # print(json_data) + + # 使用正则表达式查找json字符串 + pattern = re.compile('{.*}', flags=re.IGNORECASE | re.MULTILINE | re.S) + print(pattern.search(json_data).group()) + + json_data = json.loads(pattern.search(json_data).group()) + # json_data = json.loads(json_data) + + # print(json_data) + + # print(json.dumps(json_data,indent=4, ensure_ascii=False)) + + # 替换头部信息 + header = json_data['header'] + html = html.replace('[群/用户名称]日报', f"{header['title']}报告") + html = html.replace('[日期]', header['date']) + html = html.replace('总消息数:[数量]', f"总消息数:{header['metaInfo']['totalMessages']}") + html = html.replace('活跃用户:[数量]', f"活跃用户:{header['metaInfo']['activeUsers']}") + html = html.replace('时间范围:[时间范围]', f"时间范围:{header['metaInfo']['timeRange']}") + + # 处理热点话题 + hot_topics = [] + for topic in json_data['sections']['hotTopics']['items']: + keywords = ''.join([f'{kw}' for kw in topic['keywords']]) + hot_topics.append(f""" +
+

{topic['name']}

+
{topic['category']}
+

{topic['summary']}

+
+ {keywords} +
+
提及次数:{topic['mentions']}
+
""") + html = html.replace('', '\n'.join(hot_topics)) + + # 处理教程资源 + tutorials = [] + for tut in json_data['sections']['tutorials']['items']: + points = ''.join([f'
  • {p}
  • ' for p in tut['keyPoints']]) + tutorials.append(f""" +
    +
    {tut['type']}
    +

    {tut['title']}

    +
    + 分享者:{tut['sharedBy']} + +
    +

    {tut['summary']}

    +
    +

    要点:

    + +
    + +
    分类:{tut['category']}
    +
    """) + html = html.replace('', '\n'.join(tutorials)) + + # 处理重要消息 + messages = [] + for msg in json_data['sections']['importantMessages']['items']: + messages.append(f""" +
    +
    + {msg['time']} + {msg['sender']} + {msg['type']} + 优先级:{msg['priority']} +
    +

    {msg['content']}

    +
    +

    {msg['fullContent']}

    +
    +
    """) + html = html.replace('', '\n'.join(messages)) + + # 处理对话 + dialogues = [] + for dia in json_data['sections']['dialogues']['items']: + messages = ''.join([f""" +
    +
    + {m['speaker']} + {m['time']} +
    +

    {m['content']}

    +
    """ for m in dia['messages']]) + dialogues.append(f""" +
    +
    {dia['type']}
    +
    + {messages} +
    +
    {dia['highlight']}
    +
    相关话题:{dia['relatedTopic']}
    +
    """) + html = html.replace('', '\n'.join(dialogues)) + + # 处理问答 + qas = [] + for qa in json_data['sections']['qa']['items']: + tags = ''.join([f'{tag}' for tag in qa['question']['tags']]) + answers = ''.join([f""" +
    +
    + {ans['responder']} + {ans['time']} + {"最佳回答" if ans['isAccepted'] else ""} +
    +

    {ans['content']}

    +
    """ for ans in qa['answers']]) + qas.append(f""" +
    +
    +
    + {qa['question']['asker']} + {qa['question']['time']} +
    +

    {qa['question']['content']}

    +
    + {tags} +
    +
    +
    + {answers} +
    +
    """) + html = html.replace('', '\n'.join(qas)) + + # 处理数据可视化 + heatmap = [] + colors = ['#3da9fc', '#f25f4c', '#7209b7', '#e53170', '#00b4d8', '#4cc9f0'] + for i, topic in enumerate(json_data['sections']['analytics']['heatmap']): + color = colors[i % len(colors)] + heatmap.append(f""" +
    +
    {topic['topic']}
    +
    {topic['percentage']}%
    +
    +
    +
    +
    {topic['count']}条消息
    +
    """) + html = html.replace('', '\n'.join(heatmap)) + + # 处理话唠榜 + chatty = [] + for rank in json_data['sections']['analytics']['chattyRanking']: + words = ''.join([f'{w}' for w in rank['commonWords']]) + characteristics = ''.join([f'{c}' for c in rank['characteristics']]) + chatty.append(f""" +
    +
    {rank['rank']}
    +
    +
    {rank['name']}
    +
    发言数:{rank['count']}
    +
    + {characteristics} +
    +
    + {words} +
    +
    +
    """) + html = html.replace('', '\n'.join(chatty)) + + # 处理熬夜冠军 + nightOwl = json_data['sections']['analytics']['nightOwl'] + + f = f""" +
    +
    👑
    +
    +
    {nightOwl['name']}
    +
    {nightOwl['title']}
    +
    最晚活跃时间:{nightOwl['latestTime']}
    +
    深夜消息数:{nightOwl['messageCount']}
    +
    {nightOwl['lastMessage']}
    +
    注:熬夜时段定义为23:00-06:00,已考虑不同时区
    +
    """ + + html = html.replace('', '\n' + f + '\n') + + # 处理词云 + words = [] + for word in json_data['sections']['wordCloud']['words']: + words.append(f""" + {word['text']}""") + html = html.replace('', '\n'.join(words)) + + # 处理词云的分类 + types = [] + for typ in json_data['sections']['wordCloud']['legend']: + types.append(f"""
    + + {typ['label']} +
    + """ + ) + + html = html.replace('', '\n'.join(types)) + + # 处理页脚 + footer = json_data['footer'] + html = html.replace('[群名称]', footer['dataSource']) + html = html.replace('[当前时间]', footer['generationTime']) + html = html.replace('[日期] [时间范围]', footer['statisticalPeriod']) + + return html \ No newline at end of file diff --git a/pywxdump/api/api_utils/llm.py b/pywxdump/api/api_utils/llm.py new file mode 100644 index 0000000..90db6ea --- /dev/null +++ b/pywxdump/api/api_utils/llm.py @@ -0,0 +1,476 @@ +# LLM api相关 +import enum +import json +import os +import re + +import httpx +import openai +from openai import OpenAI + +from pywxdump.api.remote_server import gc + + + + +class BaseLLMApi(object): + def __init__(self,api_key,base_url=None): + # 设置名字,以供其他函数使用 !!!不使用, + # self.api_key_string = "API_KEY" + # self.base_url_string = "BASE_URL" + # self.env_api_key_string = self.__class__.__name__ + "_" + self.api_key_string + # self.env_base_url_string = self.__class__.__name__ + "_" + self.base_url_string + # self.setting_string = self.__class__.__name__ + "_setting" + + + self.API_KEY = api_key + self.BASE_URL = base_url + + self.module = ( + + )#模型列表 + + + self.HTTP_CLIENT = None + self.isReady = False + self.message = [] + + + # 执行初始化方法 + self.set_default_fn() + + + + + def set_default_fn(self): + if not self.module: + self.set_default_module() + if not self.BASE_URL: + self.set_default_base_url() + if not self.message: + self.set_default_message() + + def set_default_module(self): + self.module = () + + def set_default_base_url(self): + self.BASE_URL = "" + + def set_default_message(self): + """ + 要确保message中至少有两个元素,第一个元素为系统消息,第二个元素为用户消息,且第二个元素中有{{content}} + """ + self.message = [ + {"role": "system", "content": "You are a helpful assistant"}, + {"role": "user", "content": "Hello {{content}}"}, + ] + + + + def set_module(self, module): + self.module = module + + def set_api_key(self, api_key): + self.API_KEY = api_key + + def set_base_url(self, base_url): + self.BASE_URL = base_url + + + def set_message(self, message): + self.message = message + + + + + + # def ready(self): + # if not self.API_KEY: + # # 从配置中获取,这个功能必须配合网页API开启后才能使用 + # self.API_KEY = gc.get_conf(gc.at,self.setting_string)[self.api_key_string] + # if not self.API_KEY: + # raise RuntimeError("API_KEY must be set") + # # 设置环境变量 + # os.environ[self.env_api_key_string] = self.API_KEY + # os.environ[self.env_base_url_string] = self.BASE_URL + # + # self.isReady = True + # return + + + def ready(self): + if not self.BASE_URL and not self.API_KEY: + raise RuntimeError("API_KEY or BASE_URL must be set") + self.isReady = True + + + + @property + def http_client(self): + if not self.isReady: + self.ready() + try: + self.HTTP_CLIENT = OpenAI(api_key=self.API_KEY, base_url=self.BASE_URL) + return self.HTTP_CLIENT + except: + raise RuntimeError("HTTP_CLIENT set not successfully,please check!") + + + + + + + + def send_msg(self, message=None, module=None, stream=False): + """ + 向大模型发送信息 + 如果非流式返回,则直接输出内容, + 否则使用openai文档规定格式输出 + """ + + if message is None: + self.message[1]["content"] = self.message[1].get("content").replace("{{content}}", " ") + message = self.message + else: + self.message[1]["content"] = self.message[1].get("content").replace("{{content}}", message) + message = self.message + + response = self.http_client.chat.completions.create( + model=self.module[module], + messages=message, + stream=stream + ) + if not stream: + return self.process_msg(response.choices[0].message.content) + else: + return self.process_msg(response.response.read().decode("utf-8")) + + + def process_msg(self,x): + return x + +class DeepSeekApi(BaseLLMApi): + + + def set_default_module(self): + self.module = ( + "deepseek-chat", + "deepseek-reasoner" + ) + + def set_default_base_url(self): + self.BASE_URL = "https://api.deepseek.com" + + def set_default_message(self): + self.message = [ + {"role": "system", "content": """从内容中提取出以下信息,可以根据内容多少进行列表扩展或增加,请仔细思索怎么填充内容,如果没有给到合理的名称或其他内容,就以合理的方式思考并添加。 + + - 最后的输出值使用严格的json格式 + + - 不要私自添加json块或减少json块 + + - 内容中不要使用换行符,如果内容原本有多个换行符,删掉原本多余的的换行符,只保留一个换行符再加入进去。 + + - 内容中如果有很奇怪的字符,比如''\''或''\\''影响代码编译的字符,删除原本的字符再加入进去。 + + + + { + + "header": { + + "title": "[群名称]报告", + + "date": "[日期]", + + "metaInfo": { + + "totalMessages": "[数量]", + + "activeUsers": "[数量]", + + "timeRange": "[时间范围]" + + } + + }, + + "sections": { + + "hotTopics": { + + "items": [ + + { + + "name": "[热点话题名称]", + + "category": "[话题分类]", + + "summary": "[简要总结(50-100字)]", + + "keywords": ["[关键词1]", "[关键词2]"], + + "mentions": "[次数]" + + } + + ] + + }, + + "tutorials": { + + "items": [ + + { + + "type": "[TUTORIAL | NEWS | RESOURCE]", + + "title": "[分享的教程或资源标题]", + + "sharedBy": "[昵称]", + + "time": "[时间]", + + "summary": "[内容简介]", + + "keyPoints": ["[要点1]", "[要点2]"], + + "url": "[URL]", + + "domain": "[域名]", + + "category": "[分类]" + + } + + ] + + }, + + "importantMessages": { + + "items": [ + + { + + "time": "[消息时间]", + + "sender": "[发送者昵称]", + + "type": "[NOTICE | EVENT | ANNOUNCEMENT | OTHER]", + + "priority": "[高|中|低]", + + "content": "[消息内容]", + + "fullContent": "[完整通知内容]" + + } + + ] + + }, + + "dialogues": { + + "items": [ + + { + + "type": "[DIALOGUE | QUOTE]", + + "messages": [ + + { + + "speaker": "[说话者昵称]", + + "time": "[发言时间]", + + "content": "[消息内容]" + + } + + ], + + "highlight": "[对话中的金句或亮点]", + + "relatedTopic": "[某某话题]" + + } + + ] + + }, + + "qa": { + + "items": [ + + { + + "question": { + + "asker": "[提问者昵称]", + + "time": "[提问时间]", + + "content": "[问题内容]", + + "tags": ["[相关标签1]", "[相关标签2]"] + + }, + + "answers": [ + + { + + "responder": "[回答者昵称]", + + "time": "[回答时间]", + + "content": "[回答内容]", + + "isAccepted": true + + } + + ] + + } + + ] + + }, + + "analytics": { + + "heatmap": [ + + { + + "topic": "[话题名称]", + + "percentage": "[百分比]", + + "color": "#3da9fc", + + "count": "[数量]" + + } + + ], + + "chattyRanking": [ + + { + + "rank": 1, + + "name": "[群友昵称]", + + "count": "[数量]", + + "characteristics": ["[特点1]", "[特点2]"], + + "commonWords": ["[常用词1]", "[常用词2]"] + + } + + ], + + "nightOwl": { + + "name": "[熬夜冠军昵称]", + + "title": "[熬夜冠军称号]", + + "latestTime": "[时间]", + + "messageCount": "[数量]", + + "lastMessage": "[最后一条深夜消息内容]" + + } + + }, + + "wordCloud": { + + "words": [ + + { + + "text": "[关键词1]", + + "size": 38, + + "color": "#00b4d8", + + "rotation": -15 + + } + + ], + + "legend": [ + + {"color": "#00b4d8", "label": "[分类1] 相关词汇"}, + + {"color": "#4361ee", "label": "[分类2] 相关词汇"} + + ] + + } + + }, + + "footer": { + + "dataSource": "[群名称]聊天记录", + + "generationTime": "[当前时间]", + + "statisticalPeriod": "[日期] [时间范围]", + + "disclaimer": "本报告内容基于群聊公开讨论,如有不当内容或侵权问题请联系管理员处理。" + + } + + }"""}, + {"role": "user", "content": """你好,以下是我要提取的内容: {{content}}"""}, + + ] + + + def process_msg(self,x): + """ + 识别json格式,并返回字典 + """ + pattern = re.compile('{.*}', flags=re.IGNORECASE | re.MULTILINE | re.S) + # print(pattern.search(json_data).group()) + + json_data = json.loads(pattern.search(x).group()) + return json_data + + + + +# if __name__ == "__main__": +# deepseek_api = DeepSeekApi("sk-2ed4377a895d4ce18e086258c254fc8e") +# +# response = deepseek_api.send_msg(module=0,message="""""") +# print(response) + + + + + + + + + + + diff --git a/pywxdump/api/export/exportJSON.py b/pywxdump/api/export/exportJSON.py index f04c179..587f375 100644 --- a/pywxdump/api/export/exportJSON.py +++ b/pywxdump/api/export/exportJSON.py @@ -5,6 +5,7 @@ # Author: xaoyaoo # Date: 2024/04/20 # ------------------------------------------------------------------------------- +import datetime import json import os from pywxdump.db import DBHandler @@ -39,5 +40,135 @@ def export_json(wxid, outpath, db_config, my_wxid="我", indent=4): return True, f"导出成功: {outpath}" +def export_json_mini(wxid, outpath, db_config, my_wxid="我", indent=4): + # 确保输出目录存在 + if not os.path.exists(outpath): + outpath = os.path.join(os.getcwd(), "export_mini" + os.sep + wxid) + os.makedirs(outpath, exist_ok=True) + + db = DBHandler(db_config, my_wxid) + + # 获取消息总数 + count = db.get_msgs_count(wxid) + chatCount = count.get(wxid, 0) + if chatCount == 0: + return False, "没有聊天记录" + + users = {} + page_size = chatCount + 1 # 保持与原函数一致的分页逻辑 + + for i in range(0, chatCount, page_size): + start_index = i + data, users_t = db.get_msgs(wxid, start_index, page_size) + users.update(users_t) # 合并用户信息 + + if not data: + continue + + # 构建简化数据 + mini_data = [] + for msg in data: + # 获取昵称(优先用备注,没有则用昵称,最后用wxid) + user_info = users.get(msg.get("talker"), {}) + nickname = user_info.get("remark") or user_info.get("nickname") or msg.get("talker") + + mini_msg = { + "nickname": nickname, + "message": msg.get("msg", ""), + "time": msg.get("CreateTime", "") + } + mini_data.append(mini_msg) + + # 保存简化后的文件 + save_path = os.path.join(outpath, f"{wxid}_mini_{i}_{i + page_size}.json") + with open(save_path, "w", encoding="utf-8") as f: + json.dump(mini_data, f, ensure_ascii=False, indent=indent) + + return True, f"简化版导出成功: {outpath}" + + +def export_json_mini_time_limit(wxid, outpath, db_config, my_wxid="我", + start_createtime=None, end_createtime=None, indent=4): + """ + 带时间过滤的简化版聊天记录导出 + + :param start_createtime: 开始时间(格式:2025-4-30 16:55:01) + :param end_createtime: 结束时间(格式:2025-4-30 16:55:01) + """ + # 创建输出目录 + if not os.path.exists(outpath): + outpath = os.path.join(os.getcwd(), "export_mini" + os.sep + wxid) + os.makedirs(outpath, exist_ok=True) + + # 初始化数据库连接 + db = DBHandler(db_config, my_wxid) + + # 时间格式转换 + def str_to_timestamp(time_str): + if not time_str: + return None + try: + dt = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") + return int(dt.timestamp()) + except ValueError: + raise ValueError(f"无效时间格式: {time_str},示例: 2025-04-30 16:55:01") + + start_ts = str_to_timestamp(start_createtime) + end_ts = str_to_timestamp(end_createtime) + + # 获取消息数据(带时间过滤) + all_data = [] + users = {} + page_size = 5000 # 每次获取5000条 + start_index = 0 + + while True: + # 获取分页数据(自动包含时间过滤条件) + data, users_t = db.get_msgs( + wxid, + start_index=start_index, + page_size=page_size, + start_createtime=start_ts, + end_createtime=end_ts + ) + + if not data: + break + + all_data.extend(data) + users.update(users_t) + start_index += page_size + + if not all_data: + return False, "指定时间段内没有聊天记录" + + # 构建简化数据结构 + mini_data = [] + for msg in all_data: + talker = msg.get("talker") + user_info = users.get(talker, {}) + + mini_msg = { + "sender": user_info.get("remark") or user_info.get("nickname") or talker, + "content": msg.get("msg", ""), + "timestamp": msg.get("CreateTime") + } + mini_data.append(mini_msg) + + # 生成带时间范围的文件名 + time_suffix = "" + if start_createtime or end_createtime: + start_part = start_createtime.replace(" ", "_").replace(":", "-") if start_createtime else "all" + end_part = end_createtime.replace(" ", "_").replace(":", "-") if end_createtime else "now" + time_suffix = f"_{start_part}_to_{end_part}" + filename = f"{wxid}_mini{time_suffix}_ai.json" + save_path = os.path.join(outpath, filename) + with open(save_path, "w", encoding="utf-8") as f: + json.dump(mini_data, f, ensure_ascii=False, indent=indent) + + return True, f"导出成功: {save_path}", filename + + + if __name__ == '__main__': pass diff --git a/pywxdump/api/remote_server.py b/pywxdump/api/remote_server.py index 630d996..b9eed9b 100644 --- a/pywxdump/api/remote_server.py +++ b/pywxdump/api/remote_server.py @@ -5,6 +5,8 @@ # Author: xaoyaoo # Date: 2024/01/02 # ------------------------------------------------------------------------------- +import datetime +import json import os import time import shutil @@ -12,6 +14,7 @@ from collections import Counter from urllib.parse import quote, unquote from typing import List, Optional +import fastapi.requests from pydantic import BaseModel from fastapi import APIRouter, Response, Body, Query, Request from starlette.responses import StreamingResponse, FileResponse @@ -20,8 +23,10 @@ import pywxdump from pywxdump import decrypt_merge, get_core_db from pywxdump.db import DBHandler from pywxdump.db.utils import download_file, dat2img +from .api_utils.html import HtmlController from .export import export_csv, export_json, export_html +from .export.exportJSON import export_json_mini, export_json_mini_time_limit from .rjson import ReJson, RqJson from .utils import error9999, gc, asyncError9999, rs_loger @@ -134,11 +139,17 @@ def get_msgs(wxid: str = Body(...), start: int = Body(...), limit: int = Body(.. """ my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") db_config = gc.get_conf(my_wxid, "db_config") db = DBHandler(db_config, my_wxid=my_wxid) - msgs, users = db.get_msgs(wxids=wxid, start_index=start, page_size=limit) + + start_createtime = datetime.datetime.strptime("2025-04-28 00:54:33", + "%Y-%m-%d %H:%M:%S").timestamp() + end_createtime = datetime.datetime.now().timestamp() + msgs, users = db.get_msgs(wxids=wxid, start_index=start, page_size=limit, ) # + return ReJson(0, {"msg_list": msgs, "user_list": users}) @@ -464,6 +475,51 @@ def get_export_json(wxid: str = Body(..., embed=True)): return ReJson(2001, body=ret) +class ExportJsonMiniRequest(BaseModel): + start_createtime: int + end_createtime: int + + +@rs_api.api_route('/export_json_mini_select_time', methods=["GET", 'POST']) +def get_export_json(wxid: str = Body(..., embed=True), time: ExportJsonMiniRequest = Body(..., embed=True)): + """ + 导出json,选择时间,迷你版本 + :return: + """ + + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + db_config = gc.get_conf(my_wxid, "db_config") + + if not wxid: + return ReJson(1002, body=f"username is required: {wxid}") + + outpath = os.path.join(gc.work_path, "export", my_wxid, "json", wxid) + if not os.path.exists(outpath): + os.makedirs(outpath) + + start_createtime = time.start_createtime / 1000.0 # 格式为 时间戳 + end_createtime = time.end_createtime / 1000.0 + + + start_createtime = datetime.datetime.fromtimestamp(float(start_createtime)).strftime("%Y-%m-%d %H:%M:%S") #转换成日期格式 + end_createtime = datetime.datetime.fromtimestamp(float(end_createtime)).strftime("%Y-%m-%d %H:%M:%S") + + code, ret, filename = export_json_mini_time_limit(wxid, outpath, db_config, my_wxid=my_wxid, + start_createtime=start_createtime, end_createtime=end_createtime) + if code: + # 成功创建,执行生成可视化页面的逻辑 + # with open(os.path.join(gc.work_path, "export", my_wxid, "html", wxid, filename), "w", encoding="utf-8") as f: + # f.write( + # #现在是fake + # HtmlController().create_html(json_data=None) + # ) + return ReJson(0, ret) + + else: + return ReJson(2001, body=ret) + + class ExportHtmlRequest(BaseModel): wxid: str @@ -502,6 +558,251 @@ def get_export_html(wxid: str = Body(..., embed=True)): # end 导出聊天记录 ******************************************************************************************************* + +# AI可视化生成 ********************************************** +#TODO:查询当前登录用户文件夹下是否有导出数据,是否已经存在ui界面 + +def recursive_listdir(path,list:List): + """ + 遍历文件夹获取所有文件 包括子目录 + """ + + files = os.listdir(path) + for file in files: + file_path = os.path.join(path, file) + if os.path.isdir(file_path): + recursive_listdir(file_path,list) + elif os.path.isfile(file_path): + list.append(file_path) + + + +def de_weight(l1:List,l2:List): + """ + 列表去重,针对特定对象 + """ + len1 = min(len(l1), len(l2)) + len1 = len1-1 if len1 > 1 else len1 + for i in range(len1): + if l1[i]["wxid"] == l2[i]["wxid"] and l1[i]["start_time"] == l2[i]["start_time"] and l1[i]["end_time"] == l2[i][ + "end_time"]: + l1[i]["flag"] = True + l2.pop(i) + + return l1+l2 + + + + + + +@rs_api.api_route('/ai_ui_json_list', methods=["GET", 'POST']) +def get_ai_ui_json_list(): + """ + 获取可视化json文件列表 + """ + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + + + # 遍历json文件夹,查找最后带_ai的文件 + work_path = os.path.join(gc.work_path, "export", my_wxid, "json") + if not os.path.exists(work_path): + os.makedirs(work_path) + file_list:List[str]=[] + recursive_listdir(work_path,list=file_list) + + # 解析文件名 + ui_list = [] + for file in file_list: + if file.split('.')[0].split('_')[-1] == 'ai': + # 可进行ai可视化的文件 + ui_list.append(file) + # print(ui_list) + + # 构造字典对象 + ui_dict_list = [] + for s in ui_list: + wxid = s.split('\\')[-1].split('.')[0].split('_')[0] if "@" in s.split('\\')[-1] else \ + s.split('\\')[-1].split('.')[0].split('_')[1] # wxid + time_start = " ".join(s.split('\\')[-1].split('.')[0].split('_')[2:4]) if "@" in s.split('\\')[ + -1] else " ".join(s.split('\\')[-1].split('.')[0].split('_')[3:5]) # time start + time_end = " ".join(s.split('\\')[-1].split('.')[0].split('_')[5:7]) if "@" in s.split('\\')[-1] else " ".join( + s.split('\\')[-1].split('.')[0].split('_')[6:8]) # time end + ui_dict_list.append({"wxid": wxid, "start_time": time_start, "end_time": time_end, "flag": False}) + + + + # 遍历ai_json文件夹,获取所有文件名 + work_path = os.path.join(gc.work_path, "export", my_wxid, "ai_json") + if not os.path.exists(work_path): + os.makedirs(work_path) + file_list:List[str]=[] + recursive_listdir(work_path,list=file_list) + + # 解析文件名 + ai_list = [] + for file in file_list: + ai_list.append(file) + + ai_dict_list = [] + + # 构造字典对象 + for s in ai_list: + wxid = s.split('\\')[-1].split('.')[0].split('_')[0] if "@" in s.split('\\')[-1] else \ + s.split('\\')[-1].split('.')[0].split('_')[1] # wxid + time_start = " ".join(s.split('\\')[-1].split('.')[0].split('_')[2:4]) if "@" in s.split('\\')[ + -1] else " ".join(s.split('\\')[-1].split('.')[0].split('_')[3:5]) # time start + time_end = " ".join(s.split('\\')[-1].split('.')[0].split('_')[5:7]) if "@" in s.split('\\')[-1] else " ".join( + s.split('\\')[-1].split('.')[0].split('_')[6:8]) # time end + ai_dict_list.append({"wxid": wxid, "start_time": time_start, "end_time": time_end, "flag": True}) + + # # 合并两个字典列表 + # dict_list = ui_dict_list + ai_dict_list + # print(ui_dict_list) + # print(ai_dict_list) + + # 去重 + dict_list = de_weight(ui_dict_list,ai_dict_list) + + return ReJson(0,body={"items":dict_list}) + + + + +def get_file_path(work_path: str, file_name: str) -> str | None: + """ + 获取ai_json文件路径 + """ + # 遍历文件夹内的所有文件,找到对应文件名的文件路径 + + + path_list = os.listdir(work_path) + for path in path_list: + full_path = os.path.join(work_path, path) + if os.path.isfile(full_path) and path == file_name: + return full_path + elif os.path.isdir(full_path): + result = get_file_path(full_path, file_name) + if result is not None: + return result + return None + +class FileNameRequest(BaseModel): + wxid: str + start_time: str + end_time: str + +@rs_api.api_route('/db_to_ai_json', methods=["GET", 'POST']) +def db_to_ai_json(file_name: FileNameRequest = Body(..., embed=True)): + """ + 导出聊天记录到ai_json + """ + start_time = file_name.start_time + end_time = file_name.end_time + wxid = file_name.wxid + + + file_name = wxid + '_mini_' + start_time.replace(' ', '_').replace(':', '-') + '_to_' + end_time.replace(' ', '_').replace(':', '-') + '_ai' + # file_name = wxid + '_aiyes_' + start_time.replace(' ', '_').replace(':', '-') + '_' + end_time.replace(' ', '_').replace(':', '-') + file_name = file_name + '.json' + + + + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + + + + result = get_file_path(os.path.join(gc.work_path, "export", my_wxid, "json"), file_name) + + + if result is None: + return ReJson(1002, body=f"file not found: {file_name}") + + # 获取文件内容 + with open(result, "r", encoding="utf-8") as f: + json_data = json.load(f) + if not json_data: + return ReJson(1002, body=f"json_data is empty: {file_name}") + + #通过llm处理,生成ai_json + from .api_utils.llm import DeepSeekApi + # 获取apikey + apikey = gc.get_conf(my_wxid, "deepseek_setting").get("API_KEY") + if not apikey: + return ReJson(1002, body="deepseek_setting.API_KEY is required") + llm_api = DeepSeekApi(api_key=apikey) + json_data = llm_api.send_msg(module=0,message=json.dumps(json_data)) + + # 保存到ai_json + ai_json_path = os.path.join(gc.work_path, "export", my_wxid, "ai_json") + if not os.path.exists(ai_json_path): + os.makedirs(ai_json_path) + + assert isinstance(ai_json_path, str) + file_name = wxid + '_aiyes_' + start_time.replace(' ', '_').replace(':', '-') + '_to_' + end_time.replace(' ', + '_').replace( + ':', '-') + file_name = file_name + '.json' + ai_json_file_path = os.path.join(ai_json_path, file_name) + with open(ai_json_file_path, "w", encoding="utf-8") as f: + json.dump(json_data, f, ensure_ascii=False) + + return ReJson(0, body=f"save to {ai_json_file_path}") + + + +class FileNameGetUiRequest(BaseModel): + wxid: str + start_time: str + end_time: str + +# 获取可视化界面json文件 +@rs_api.api_route('/get_ui_json', methods=["GET", 'POST']) +def get_ui_json(file_name: FileNameGetUiRequest = Body(..., embed=True)): + """ + 获取可视化界面json文件 + """ + # print(file_name.wxid) + + start_time = file_name.start_time + end_time = file_name.end_time + wxid = file_name.wxid if "@" in file_name.wxid else "wxid_" + file_name.wxid + + + # start_time = datetime.datetime.fromtimestamp(float(start_time)).strftime("%Y-%m-%d %H:%M:%S") #转换成日期格式 + # end_time = datetime.datetime.fromtimestamp(float(end_time)).strftime("%Y-%m-%d %H:%M:%S") + + file_name = wxid + '_aiyes_' + start_time.replace(' ', '_').replace(':', '-') + '_to_' + end_time.replace(' ', '_').replace(':', '-') + file_name = file_name + '.json' + + + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + + result = get_file_path(os.path.join(gc.work_path, "export", my_wxid, "ai_json"), file_name) + + if result is None: + return ReJson(1002, body=f"file not found: {file_name}") + + # 获取文件内容 + with open(result, "r", encoding="utf-8") as f: + json_data = json.load(f) + if not json_data: + return ReJson(1002, body=f"json_data is empty: {file_name}") + + return ReJson(0, body=json_data) + + + + + + +# AI可视化生成 ******************************************************************************************************* + + + # start 聊天记录分析api ************************************************************************************************** class DateCountRequest(BaseModel): wxid: str = "" @@ -659,4 +960,62 @@ def get_readme(): else: return ReJson(2001, body="status_code is not 200") + +class DifyApiModel(BaseModel): + api_key: str + base_url: str + + +@rs_api.api_route('/dify_setting', methods=["GET", 'POST']) +@error9999 +def dify_setting(request: Request = None, dify: DifyApiModel = Body(None, embed=True)): + """ + dify设置 + """ + + if request.method == "GET": + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + gc.get_conf(my_wxid, "dify_setting") + + return ReJson(0, body=gc.get_conf(my_wxid, "dify_setting")) + + elif request.method == "POST": + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + if not dify.api_key and not dify.base_url: + return ReJson(1002, body="dify_setting is required") + + gc.set_conf(my_wxid, "dify_setting", {"API_KEY": dify.api_key, "BASE_URL": dify.base_url}) + return ReJson(0, body=gc.get_conf(my_wxid, "dify_setting")) + return ReJson(2001, body="status_code is not 200") + + +class DeepSeekApiModel(BaseModel): + api_key: str + + +@rs_api.api_route('/deepseek_setting', methods=["GET", 'POST']) +@error9999 +def deepseek_setting(request: Request = None, deepseek: DeepSeekApiModel = Body(None, embed=True)): + """ + deepseek设置 + """ + if request.method == "GET": + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + gc.get_conf(my_wxid, "deepseek_setting") + + return ReJson(0, body=gc.get_conf(my_wxid, "deepseek_setting")) + + elif request.method == "POST": + my_wxid = gc.get_conf(gc.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + if not deepseek or not deepseek.api_key: + return ReJson(1002, body="deepseek_setting is required") + + gc.set_conf(my_wxid, "deepseek_setting", {"API_KEY": deepseek.api_key}) + return ReJson(0, body=gc.get_conf(my_wxid, "deepseek_setting")) + return ReJson(2001, body="status_code is not 200") + # END 关于、帮助、设置 *************************************************************************************************** diff --git a/pywxdump/api/utils.py b/pywxdump/api/utils.py index a1200a3..a44e01e 100644 --- a/pywxdump/api/utils.py +++ b/pywxdump/api/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*-# # ------------------------------------------------------------------------------- -# Name: utils.py +# Name: api_utils.py # Description: # Author: xaoyaoo # Date: 2024/01/16 diff --git a/pywxdump/db/__init__.py b/pywxdump/db/__init__.py index e20180a..737069b 100644 --- a/pywxdump/db/__init__.py +++ b/pywxdump/db/__init__.py @@ -59,6 +59,8 @@ class DBHandler(MicroHandler, MediaHandler, OpenIMContactHandler, PublicMsgHandl "talker": talker, "room_name": StrTalker, "msg": msg, "src": src, "extra": {}, "CreateTime": CreateTime, } """ + + msgs0, wxid_list0 = self.get_msg_list(wxids=wxids, start_index=start_index, page_size=page_size, msg_type=msg_type, msg_sub_type=msg_sub_type, start_createtime=start_createtime, diff --git a/pywxdump/db/dbMSG.py b/pywxdump/db/dbMSG.py index ed2f075..4d9e0d3 100644 --- a/pywxdump/db/dbMSG.py +++ b/pywxdump/db/dbMSG.py @@ -103,9 +103,16 @@ class MsgHandler(DatabaseBase): f"{sql_sub_type}" f"{sql_start_createtime}" f"{sql_end_createtime}" - f"ORDER BY CreateTime ASC LIMIT ?,?" + f"ORDER BY CreateTime ASC LIMIT ? OFFSET ?" ) - param = param + (start_index, page_size) + + param = param + ( page_size,start_index) + # # 测试 + # print(sql + "\n" + " ".join([str(i) for i in param])) + # print(sql + "\n" + " ".join([str(i) for i in param])) + # print(sql + "\n" + " ".join([str(i) for i in param])) + + result = self.execute(sql, param) if not result: return [], [] diff --git a/pywxdump/db/utils/__init__.py b/pywxdump/db/utils/__init__.py index ecbb005..90ce641 100644 --- a/pywxdump/db/utils/__init__.py +++ b/pywxdump/db/utils/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*-# # ------------------------------------------------------------------------------- # Name: __init__.py.py -# Description: db.utils +# Description: db.api_utils # Author: xaoyaoo # Date: 2024/07/23 # ------------------------------------------------------------------------------- diff --git a/pywxdump/ui/__init__.py b/pywxdump/ui/__init__.py deleted file mode 100644 index 6dab261..0000000 --- a/pywxdump/ui/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*-# -# ------------------------------------------------------------------------------- -# Name: __init__.py.py -# Description: -# Author: xaoyaoo -# Date: 2023/12/03 -# ------------------------------------------------------------------------------- -# from .view_chat import app_show_chat, get_user_list, export - -if __name__ == '__main__': - pass diff --git a/pywxdump/wx_core/utils/common_utils.py b/pywxdump/wx_core/utils/common_utils.py index 7b899e3..4456e7c 100644 --- a/pywxdump/wx_core/utils/common_utils.py +++ b/pywxdump/wx_core/utils/common_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*-# # ------------------------------------------------------------------------------- -# Name: utils.py +# Name: api_utils.py # Description: # Author: xaoyaoo # Date: 2023/12/25 diff --git a/requirements.txt b/requirements.txt index 9cb47ac..d235db5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ +openai==1.77.0 setuptools wheel +pycryptodome pycryptodomex pywin32 silk-python @@ -15,4 +17,4 @@ pymem pydantic==2.7.0 fastapi uvicorn -python-dotenv \ No newline at end of file +python-dotenv diff --git a/temp.md b/temp.md new file mode 100644 index 0000000..30d8dac --- /dev/null +++ b/temp.md @@ -0,0 +1,5 @@ +- 导出json,可选时间 +- 单独导航栏 Ai可视化 +- 可以进行可视化的文件列表 +- 已经可视化的列表 +- 查看 diff --git a/test.html b/test.html new file mode 100644 index 0000000..e2700e6 --- /dev/null +++ b/test.html @@ -0,0 +1,864 @@ + + + + + + 恋爱报告报告 - 2025-04-29 至 2025-04-30 + + + +
    +

    恋爱报告报告

    +

    2025-04-29 至 2025-04-30

    +
    + 总消息数:500+ + 活跃用户:2 + 时间范围:00:05:04 至 22:20:26 +
    +
    + + + + +
    +

    今日讨论热点

    +
    + + +
    +

    恋爱沟通

    +
    情感交流
    +

    双方围绕情感需求、沟通方式和相互理解展开多次对话,涉及'控制欲''陪伴需求''表达方式'等核心矛盾点。典型对话如'你就不能说话吗'-'不困了,跟我聊聊天',体现双方对沟通频率的认知差异。

    +
    + 控制陪伴理解 +
    +
    提及次数:30+
    +
    + +
    +

    项目合作

    +
    学业协作
    +

    4月29日上午集中讨论服务设计项目开发事宜,涉及原型设计、比赛规划、老师对接等具体内容。昏沉沉的提出开发支持意愿,要哄宝宝开心则负责团队协调,双方就'高保真原型''互联网+大赛'等专业概念进行多次确认。

    +
    + 原型开发比赛 +
    +
    提及次数:50+
    +
    + + +
    +
    + + +
    +

    实用教程与资源分享

    +
    + + +
    +
    RESOURCE
    +

    产品设计开发流程

    +
    + 分享者:昏沉沉的 + +
    +

    讨论软件产品开发各阶段要点,强调从调研到高保真原型的设计闭环

    +
    +

    要点:

    +
    • 功能闭环
    • UI完善
    • 比赛适配
    +
    + +
    分类:产品设计
    +
    + + +
    +
    + + +
    +

    重要消息汇总

    +
    + + +
    +
    + 2025-04-29 00:29:04 + 要哄宝宝开心,宝宝开心我就开心 + OTHER + 优先级:高 +
    +

    只要你做的不论是我,还是外人看来是爱我的

    +
    +

    完整情感需求表达

    +
    +
    + +
    +
    + 2025-04-29 10:15:17 + 昏沉沉的 + EVENT + 优先级:中 +
    +

    你说你男朋友做开发的 做出来原型的话可以让你男朋友看一下 可以开发

    +
    +

    项目合作具体提案

    +
    +
    + + +
    +
    + + +
    +

    有趣对话或金句

    +
    + + +
    +
    DIALOGUE
    +
    + +
    +
    + 要哄宝宝开心,宝宝开心我就开心 + 2025-04-29 00:07:44 +
    +

    不困了,跟我聊聊天

    +
    +
    +
    + 昏沉沉的 + 2025-04-29 00:07:42 +
    +

    不能跟你说话吗

    +
    +
    +
    沟通需求 vs 陪伴需求
    +
    相关话题:情感表达差异
    +
    + + +
    +
    + + +
    +

    问题与解答

    +
    + + +
    +
    +
    + 要哄宝宝开心,宝宝开心我就开心 + 2025-04-29 10:33:25 +
    +

    你开发要钱吗

    +
    + 项目费用 +
    +
    +
    + +
    +
    + 昏沉沉的 + 2025-04-29 10:45:18 + 最佳回答 +
    +

    那不就完了

    +
    +
    +
    + + +
    +
    + + +
    +

    群内数据可视化

    + + +

    话题热度

    +
    + + + + +
    +
    情感交流
    +
    45%%
    +
    +
    +
    +
    200+条消息
    +
    + +
    +
    项目讨论
    +
    30%%
    +
    +
    +
    +
    150+条消息
    +
    + + +
    + + + +
    +

    话唠榜

    +
    + +
    +
    1
    +
    +
    要哄宝宝开心,宝宝开心我就开心
    +
    发言数:300+
    +
    + 情感表达细节追问 +
    +
    + OK +
    +
    +
    + +
    +
    + + + +

    熬夜冠军

    +
    + + + +
    +
    👑
    +
    +
    昏沉沉的
    +
    深夜程序员
    +
    最晚活跃时间:01:42:38
    +
    深夜消息数:50+
    +
    对不起
    +
    注:熬夜时段定义为23:00-06:00,已考虑不同时区
    +
    + + +
    +
    + + +
    +

    热门词云

    +
    + +
    + + + + 开发 + + 睡觉 + + +
    + +
    + +
    + + 项目 相关词汇 +
    + +
    + + 情感 相关词汇 +
    + + +
    +
    +
    + + + + + + + + \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..3410376 --- /dev/null +++ b/test.py @@ -0,0 +1,955 @@ +import json +import re + + + +def main(json_data): + # 加载模板 + html = """ + + + + + [群/用户名称]日报 - [日期] + + + +
    +

    [群/用户名称]日报

    +

    [日期]

    +
    + 总消息数:[数量] + 活跃用户:[数量] + 时间范围:[时间范围] +
    +
    + + + + +
    +

    今日讨论热点

    +
    + + + + +
    +
    + + +
    +

    实用教程与资源分享

    +
    + + + + +
    +
    + + +
    +

    重要消息汇总

    +
    + + + + +
    +
    + + +
    +

    有趣对话或金句

    +
    + + + + +
    +
    + + +
    +

    问题与解答

    +
    + + + + +
    +
    + + +
    +

    群内数据可视化

    + + +

    话题热度

    +
    + + + + + + +
    + + + +
    +

    话唠榜

    +
    + + +
    +
    + + + +

    熬夜冠军

    +
    + + + +
    +
    + + +
    +

    热门词云

    +
    + +
    + + + + + +
    + +
    + + + +
    +
    +
    + + + + + + + + """ + + + json_data = json_data[7:-3] + # 清洗json_data + # 判断是否是转义的换行符 + if '\n' in json_data: + json_data = json_data.replace('\n', '\n') + + + else: + json_data = json_data.replace(r'\"','"').replace(r"\n",'\n') + + # print(json_data) + + + + + # 使用正则表达式查找json字符串 + pattern = re.compile('{.*}', flags=re.IGNORECASE | re.MULTILINE | re.S) + # print(pattern.search(json_data).group()) + + json_data = json.loads(pattern.search(json_data).group()) + # json_data = json.loads(json_data) + + + # print(json_data) + + # print(json.dumps(json_data,indent=4, ensure_ascii=False)) + + # 替换头部信息 + header = json_data['header'] + html = html.replace('[群/用户名称]日报', f"{header['title']}报告") + html = html.replace('[日期]', header['date']) + html = html.replace('总消息数:[数量]', f"总消息数:{header['metaInfo']['totalMessages']}") + html = html.replace('活跃用户:[数量]', f"活跃用户:{header['metaInfo']['activeUsers']}") + html = html.replace('时间范围:[时间范围]', f"时间范围:{header['metaInfo']['timeRange']}") + + # 处理热点话题 + hot_topics = [] + for topic in json_data['sections']['hotTopics']['items']: + keywords = ''.join([f'{kw}' for kw in topic['keywords']]) + hot_topics.append(f""" +
    +

    {topic['name']}

    +
    {topic['category']}
    +

    {topic['summary']}

    +
    + {keywords} +
    +
    提及次数:{topic['mentions']}
    +
    """) + html = html.replace('', '\n'.join(hot_topics)) + + # 处理教程资源 + tutorials = [] + for tut in json_data['sections']['tutorials']['items']: + points = ''.join([f'
  • {p}
  • ' for p in tut['keyPoints']]) + tutorials.append(f""" +
    +
    {tut['type']}
    +

    {tut['title']}

    +
    + 分享者:{tut['sharedBy']} + +
    +

    {tut['summary']}

    +
    +

    要点:

    +
      {points}
    +
    + +
    分类:{tut['category']}
    +
    """) + html = html.replace('', '\n'.join(tutorials)) + + # 处理重要消息 + messages = [] + for msg in json_data['sections']['importantMessages']['items']: + messages.append(f""" +
    +
    + {msg['time']} + {msg['sender']} + {msg['type']} + 优先级:{msg['priority']} +
    +

    {msg['content']}

    +
    +

    {msg['fullContent']}

    +
    +
    """) + html = html.replace('', '\n'.join(messages)) + + # 处理对话 + dialogues = [] + for dia in json_data['sections']['dialogues']['items']: + messages = ''.join([f""" +
    +
    + {m['speaker']} + {m['time']} +
    +

    {m['content']}

    +
    """ for m in dia['messages']]) + dialogues.append(f""" +
    +
    {dia['type']}
    +
    + {messages} +
    +
    {dia['highlight']}
    +
    相关话题:{dia['relatedTopic']}
    +
    """) + html = html.replace('', '\n'.join(dialogues)) + + # 处理问答 + qas = [] + for qa in json_data['sections']['qa']['items']: + tags = ''.join([f'{tag}' for tag in qa['question']['tags']]) + answers = ''.join([f""" +
    +
    + {ans['responder']} + {ans['time']} + {"最佳回答" if ans['isAccepted'] else ""} +
    +

    {ans['content']}

    +
    """ for ans in qa['answers']]) + qas.append(f""" +
    +
    +
    + {qa['question']['asker']} + {qa['question']['time']} +
    +

    {qa['question']['content']}

    +
    + {tags} +
    +
    +
    + {answers} +
    +
    """) + html = html.replace('', '\n'.join(qas)) + + # 处理数据可视化 + heatmap = [] + colors = ['#3da9fc', '#f25f4c', '#7209b7', '#e53170', '#00b4d8', '#4cc9f0'] + for i, topic in enumerate(json_data['sections']['analytics']['heatmap']): + color = colors[i % len(colors)] + heatmap.append(f""" +
    +
    {topic['topic']}
    +
    {topic['percentage']}%
    +
    +
    +
    +
    {topic['count']}条消息
    +
    """) + html = html.replace('', '\n'.join(heatmap)) + + # 处理话唠榜 + chatty = [] + for rank in json_data['sections']['analytics']['chattyRanking']: + words = ''.join([f'{w}' for w in rank['commonWords']]) + characteristics = ''.join([f'{c}' for c in rank['characteristics']]) + chatty.append(f""" +
    +
    {rank['rank']}
    +
    +
    {rank['name']}
    +
    发言数:{rank['count']}
    +
    + {characteristics} +
    +
    + {words} +
    +
    +
    """) + html = html.replace('', '\n'.join(chatty)) + + + + # 处理熬夜冠军 + nightOwl = json_data['sections']['analytics']['nightOwl'] + + f = f""" +
    +
    👑
    +
    +
    {nightOwl['name']}
    +
    {nightOwl['title']}
    +
    最晚活跃时间:{nightOwl['latestTime']}
    +
    深夜消息数:{nightOwl['messageCount']}
    +
    {nightOwl['lastMessage']}
    +
    注:熬夜时段定义为23:00-06:00,已考虑不同时区
    +
    """ + + html = html.replace('','\n' + f + '\n') + + + # 处理词云 + words = [] + for word in json_data['sections']['wordCloud']['words']: + + words.append(f""" + {word['text']}""") + html = html.replace('', '\n'.join(words)) + + # 处理词云的分类 + types = [] + for typ in json_data['sections']['wordCloud']['legend']: + types.append(f"""
    + + {typ['label']} +
    + """ + ) + + html = html.replace('', '\n'.join(types)) + + + + + # 处理页脚 + footer = json_data['footer'] + html = html.replace('[群名称]', footer['dataSource']) + html = html.replace('[当前时间]', footer['generationTime']) + html = html.replace('[日期] [时间范围]', footer['statisticalPeriod']) + + return html + + + + + + +if __name__ == '__main__': + + json_data = r"```json\n{\n\"header\": {\n\"title\": \"群聊报告\",\n\"date\": \"2025-04-29\",\n\"metaInfo\": {\n\"totalMessages\": \"35\",\n\"activeUsers\": \"12\",\n\"timeRange\": \"07:03:10 - 15:36:25\"\n}\n},\n\"sections\": {\n\"hotTopics\": {\n\"items\": [\n{\n\"name\": \"AI技术讨论\",\n\"category\": \"科技\",\n\"summary\": \"群内围绕Qwen3开源、Vidu Q1体验、夸克AI相机等AI技术展开热烈讨论,涉及模型性能、应用场景和开发者体验。多位成员分享了相关技术文章和体验报告。\",\n\"keywords\": [\"Qwen3\", \"Vidu\", \"AI相机\"],\n\"mentions\": \"15\"\n},\n{\n\"name\": \"熬夜与工作压力\",\n\"category\": \"生活\",\n\"summary\": \"成员们讨论熬夜工作现象,分享各自熬夜经历,对比互联网大厂与普通开发者的工作强度差异,引发关于工作生活平衡的思考。\",\n\"keywords\": [\"熬夜\", \"加班\", \"工作强度\"],\n\"mentions\": \"8\"\n}\n]\n},\n\"tutorials\": {\n\"items\": [\n{\n\"type\": \"TUTORIAL\",\n\"title\": \"体验完刚上线的Vidu Q1,后劲有点大(附AI视频创作教程)\",\n\"sharedBy\": \"苍何\",\n\"time\": \"2025-04-29 09:39:42\",\n\"summary\": \"分享Vidu Q1 AI视频创作工具的体验和教程,介绍其清晰度和一致性的提升。\",\n\"keyPoints\": [\"AI视频清晰度提升\", \"一致性改进\"],\n\"url\": \"http://mp.weixin.qq.com/s?__biz=MzU4NTE1Mjg4MA==&mid=2247493267&idx=1&sn=0189fb501578ce8e27142fbe2f590d03&chksm=fc9a946728c367005c19cb5a335300d05d51a441f9f20424a0a72c904a47bdf003252576318a&mpshare=1&scene=1&srcid=04297l70B2zsuypDfjUh0rh5&sharer_shareinfo=181efb947f938ab90786c776bf7bbda7&sharer_shareinfo_first=181efb947f938ab90786c776bf7bbda7#rd\",\n\"domain\": \"mp.weixin.qq.com\",\n\"category\": \"AI工具\"\n},\n{\n\"type\": \"TUTORIAL\",\n\"title\": \"阿里新出的夸克AI相机,强大到我有点陌生\",\n\"sharedBy\": \"苍何\",\n\"time\": \"2025-04-29 09:42:38\",\n\"summary\": \"介绍夸克AI相机的新奇玩法和功能,展示其强大的AI图像处理能力。\",\n\"keyPoints\": [\"新奇玩法\", \"抽象功能\"],\n\"url\": \"http://mp.weixin.qq.com/s?__biz=MzU4NTE1Mjg4MA==&mid=2247493275&idx=1&sn=93556ddd1da7fb8733a23a7c4adbb76b&chksm=fc2a2d25774cce23c75acd8850b85c585c0bcf78d14b810e157efaec5106abf563cf58e26aef&mpshare=1&scene=1&srcid=0429vDf8NbEzNLBQQyFlABmU&sharer_shareinfo=28b94477ec8201b88aa30338e82e8999&sharer_shareinfo_first=28b94477ec8201b88aa30338e82e8999#rd\",\n\"domain\": \"mp.weixin.qq.com\",\n\"category\": \"AI应用\"\n},\n{\n\"type\": \"RESOURCE\",\n\"title\": \"仅2MB,Windows瞬间超级丝滑!\",\n\"sharedBy\": \"AHapi²⁰²⁵\",\n\"time\": \"2025-04-29 11:13:38\",\n\"summary\": \"分享一款轻量级Windows优化工具,声称能显著提升系统运行速度。\",\n\"keyPoints\": [\"2MB大小\", \"系统优化\"],\n\"url\": \"https://mp.weixin.qq.com/s/es77Jc6Du03ppJD5XJeQUg\",\n\"domain\": \"mp.weixin.qq.com\",\n\"category\": \"系统工具\"\n}\n]\n},\n\"importantMessages\": {\n\"items\": [\n{\n\"time\": \"2025-04-29 10:00:18\",\n\"sender\": \"苍何\",\n\"type\": \"NEWS\",\n\"priority\": \"高\",\n\"content\": \"2025年04月29日 AI科技早报:阿里开源8款Qwen3模型,腾讯开源Kuikly跨端框架,OpenAI推出ChatGPT购物功能等11条重要新闻。\",\n\"fullContent\": \"2025年04月29日 AI科技早报1、阿里开源8款Qwen3模型,集成MCP,性能超DeepSeek-R1、OpenAI o1。2、Qafind Labs发布ChatDLM扩散语言模型,推理速度高达2800 tokens/s。3、腾讯开源Kuikly跨端框架,基于Kotlin支持多平台开发,已应用于QQ。4、OpenAI 推出 ChatGPT 购物功能,用户可通过 ChatGPT 便捷购物。5、字节Seed团队提出PHD-Transformer,突破预训练长度扩展瓶颈。6、百度发布文心快码3.5版本与多模态AI智能体Zulu,助力工程师提效。7、Kimi与财新传媒合作,提供专业财经内容,推动AI+传统媒体融合。8、苹果加速「N50」智能眼镜项目,融合AI技术预计2027年亮相。9、研究显示OpenAI o3在病毒学领域超越94%人类专家,生物安全引关注。10、华为测试自研AI芯片Ascend 910D,旨在替代英伟达H100芯片。11、🔥【记得收藏】早报同步更新到开源 AI 知识库:https://u55dyuejxc.feishu.cn/wiki/FkmNwxYHDigJ3akIUGHc8MSTn4d\"\n}\n]\n},\n\"dialogues\": {\n\"items\": [\n{\n\"type\": \"DIALOGUE\",\n\"messages\": [\n{\n\"speaker\": \"好名字\",\n\"time\": \"2025-04-29 08:16:23\",\n\"content\": \"这个我弄完,ai做的小程序有bug,流程走不通,还改不了[捂脸]\"\n},\n{\n\"speaker\": \"贾👦🏻\",\n\"time\": \"2025-04-29 08:54:33\",\n\"content\": \"可以微调 不过源码需要买的\"\n},\n{\n\"speaker\": \"好名字\",\n\"time\": \"2025-04-29 09:13:32\",\n\"content\": \"微调一次,然后再想调就需要开会员了\"\n},\n{\n\"speaker\": \"贾👦🏻\",\n\"time\": \"2025-04-29 09:14:09\",\n\"content\": \"需求变更一个字 就需要重新购买[破涕为笑]\"\n}\n],\n\"highlight\": \"AI小程序开发中的商业化模式讨论\",\n\"relatedTopic\": \"AI开发工具\"\n},\n{\n\"type\": \"DIALOGUE\",\n\"messages\": [\n{\n\"speaker\": \"苍何\",\n\"time\": \"2025-04-29 09:26:49\",\n\"content\": \"我熬不动\"\n},\n{\n\"speaker\": \"AHapi²⁰²⁵\",\n\"time\": \"2025-04-29 09:27:25\",\n\"content\": \"不要卷别人[旺柴]别人写了 就不卷他们了\"\n},\n{\n\"speaker\": \"苍何\",\n\"time\": \"2025-04-29 09:27:55\",\n\"content\": \"新闻得第一时间,做不到写了也没啥用\"\n},\n{\n\"speaker\": \"苍何\",\n\"time\": \"2025-04-29 09:28:03\",\n\"content\": \"还不如写些应用\"\n},\n{\n\"speaker\": \"大风(Wind)\",\n\"time\": \"2025-04-29 09:28:23\",\n\"content\": \"看看哪些是5-7点发推文的,基本都是卷王了\"\n},\n{\n\"speaker\": \"沉默王二\",\n\"time\": \"2025-04-29 09:28:44\",\n\"content\": \"身体能扛住确实离谱\"\n},\n{\n\"speaker\": \"苍何\",\n\"time\": \"2025-04-29 09:29:03\",\n\"content\": \"是啊,太肝了\"\n}\n],\n\"highlight\": \"关于工作强度和熬夜文化的讨论\",\n\"relatedTopic\": \"工作生活平衡\"\n}\n]\n},\n\"qa\": {\n\"items\": [\n{\n\"question\": {\n\"asker\": \"银色子弹-捷\",\n\"time\": \"2025-04-29 11:10:26\",\n\"content\": \"问一下win11电脑,你长时间没清理,运行慢,一般用什么来清理电脑? 不要360啊,那个太流氓了,想知道各位大佬有没有优秀的软件推荐一下\",\n\"tags\": [\"Windows优化\", \"系统清理\"]\n},\n\"answers\": [\n{\n\"responder\": \"昏沉沉的\",\n\"time\": \"2025-04-29 11:11:59\",\n\"content\": \"ccclean\",\n\"isAccepted\": false\n},\n{\n\"responder\": \"🤑程序儒\",\n\"time\": \"2025-04-29 11:13:07\",\n\"content\": \"360极速版、Wise Care 365\",\n\"isAccepted\": false\n},\n{\n\"responder\": \"AHapi²⁰²⁵\",\n\"time\": \"2025-04-29 11:13:38\",\n\"content\": \"仅2MB,Windows瞬间超级丝滑!这才是,真神器!\",\n\"isAccepted\": true\n}\n]\n},\n{\n\"question\": {\n\"asker\": \"ಠ_ಠ 闲鱼一条ಠ_ಠ\",\n\"time\": \"2025-04-29 11:37:49\",\n\"content\": \"请问哪位哥还有扣子的邀请码吗?\",\n\"tags\": [\"邀请码\", \"扣子空间\"]\n},\n\"answers\": [\n{\n\"responder\": \"贾👦🏻\",\n\"time\": \"2025-04-29 11:40:37\",\n\"content\": \"RootUser_2105656329 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!https://www.coze.cn/space-preview?invite_code=SCL7DAL0\",\n\"isAccepted\": true\n},\n{\n\"responder\": \"9527\",\n\"time\": \"2025-04-29 11:47:43\",\n\"content\": \"RootUser_2106519373 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!https://www.coze.cn/space-preview?invite_code=A8IT4MUE\",\n\"isAccepted\": false\n}\n]\n}\n]\n},\n\"analytics\": {\n\"heatmap\": [\n{\n\"topic\": \"AI技术\",\n\"percentage\": \"45%\",\n\"color\": \"#3da9fc\",\n\"count\": \"16\"\n},\n{\n\"topic\": \"工作讨论\",\n\"percentage\": \"25%\",\n\"color\": \"#4361ee\",\n\"count\": \"9\"\n},\n{\n\"topic\": \"工具推荐\",\n\"percentage\": \"15%\",\n\"color\": \"#00b4d8\",\n\"count\": \"5\"\n},\n{\n\"topic\": \"其他\",\n\"percentage\": \"15%\",\n\"color\": \"#7209b7\",\n\"count\": \"5\"\n}\n],\n\"chattyRanking\": [\n{\n\"rank\": 1,\n\"name\": \"苍何\",\n\"count\": \"7\",\n\"characteristics\": [\"技术分享\", \"新闻发布\"],\n\"commonWords\": [\"AI\", \"开源\", \"熬夜\"]\n},\n{\n\"rank\": 2,\n\"name\": \"AHapi²⁰²⁵\",\n\"count\": \"6\",\n\"characteristics\": [\"幽默评论\", \"资源分享\"],\n\"commonWords\": [\"旺柴\", \"加班\", \"神器\"]\n},\n{\n\"rank\": 3,\n\"name\": \"贾👦🏻\",\n\"count\": \"3\",\n\"characteristics\": [\"问题解答\", \"邀请码分享\"],\n\"commonWords\": [\"源码\", \"购买\", \"邀请\"]\n}\n],\n\"nightOwl\": {\n\"name\": \"苍何\",\n\"title\": \"熬夜冠军\",\n\"latestTime\": \"09:42:54\",\n\"messageCount\": \"7\",\n\"lastMessage\": \"我熬夜写了这一篇[旺柴]\"\n}\n},\n\"wordCloud\": {\n\"words\": [\n{\n\"text\": \"AI\",\n\"size\": 42,\n\"color\": \"#00b4d8\",\n\"rotation\": 0\n},\n{\n\"text\": \"熬夜\",\n\"size\": 36,\n\"color\": \"#4361ee\",\n\"rotation\": -15\n},\n{\n\"text\": \"开源\",\n\"size\": 32,\n\"color\": \"#00b4d8\",\n\"rotation\": 15\n},\n{\n\"text\": \"Qwen3\",\n\"size\": 28,\n\"color\": \"#3da9fc\",\n\"rotation\": -10\n},\n{\n\"text\": \"Vidu\",\n\"size\": 26,\n\"color\": \"#3da9fc\",\n\"rotation\": 10\n},\n{\n\"text\": \"清理\",\n\"size\": 24,\n\"color\": \"#7209b7\",\n\"rotation\": -5\n},\n{\n\"text\": \"邀请码\",\n\"size\": 22,\n\"color\": \"#7209b7\",\n\"rotation\": 5\n}\n],\n\"legend\": [\n{\"color\": \"#00b4d8\", \"label\": \"技术 相关词汇\"},\n{\"color\": \"#4361ee\", \"label\": \"生活 相关词汇\"},\n{\"color\": \"#7209b7\", \"label\": \"工具 相关词汇\"}\n]\n}\n},\n\"footer\": {\n\"dataSource\": \"群聊聊天记录\",\n\"generationTime\": \"2025-04-29 16:00:00\",\n\"statisticalPeriod\": \"2025-04-29 07:03:10 - 15:36:25\",\n\"disclaimer\": \"本报告内容基于群聊公开讨论,如有不当内容或侵权问题请联系管理员处理。\"\n}\n}\n```" + with open('text.html', 'w', encoding='utf-8') as f: + f.write(main(json_data)) \ No newline at end of file diff --git a/test2.py b/test2.py new file mode 100644 index 0000000..e9423a8 --- /dev/null +++ b/test2.py @@ -0,0 +1,19 @@ +import os +import site +# s = r"E:\project\wx_db_ui\PyWxDump-master\pywxdump\wxdump_work\export\wxid_7l787uu0sm8e22\ai_json\48805389894@chatroom_aiyes_2025-04-30_00-00-00_2025-05-01_to_23-59-59.json" +# wxid = s.split('\\')[-1].split('.')[0].split('_')[0] if "@" in s.split('\\')[-1] else \ +# s.split('\\')[-1].split('.')[0].split('_')[1] # wxid +# time_start = " ".join(s.split('\\')[-1].split('.')[0].split('_')[2:4]) if "@" in s.split('\\')[ +# -1] else " ".join(s.split('\\')[-1].split('.')[0].split('_')[3:5]) # time start +# time_end = " ".join(s.split('\\')[-1].split('.')[0].split('_')[5:7]) if "@" in s.split('\\')[-1] else " ".join( +# s.split('\\')[-1].split('.')[0].split('_')[6:8]) # time end +# +# print(wxid) +# print(time_start) +# print(time_end) + +print(site.getsitepackages()) + + + + diff --git a/tests/build_exe.py b/tests/build_exe.py index 209fc42..0deee45 100644 --- a/tests/build_exe.py +++ b/tests/build_exe.py @@ -145,7 +145,8 @@ with open("dist/wxdump_version_info.txt", "w", encoding="utf-8") as f: # 获取安装包的路径 package_path = site.getsitepackages() if package_path: - package_path = package_path[1] # 假设取第一个安装包的路径 + # package_path = site.getsitepackages()[0] # 假设取第一个安装包的路径 + package_path = r"E:\project\wx_db_ui\PyWxDump-master" current_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在路径 require_path = os.path.join(os.path.dirname(current_path), "requirements.txt") # requirements.txt 路径 @@ -153,7 +154,7 @@ if package_path: hidden_imports = f.read().splitlines() hidden_imports = [i.replace('-', '_').split("=")[0].split("~")[0] for i in hidden_imports if i and i not in ["setuptools", "wheel"]] # 去掉setuptools、wheel - hidden_imports += ["pywxdump", "pywxdump.db", "pywxdump.db.__init__.utils"] + hidden_imports += ["win32com",'Crypto' ,"pywxdump", "pywxdump.db", "pywxdump.db.__init__.utils"] # 获取 ui 文件夹下的所有文件 用于打包 root_path = os.path.join(package_path, 'pywxdump') diff --git a/text.html b/text.html new file mode 100644 index 0000000..8490700 --- /dev/null +++ b/text.html @@ -0,0 +1,1078 @@ + + + + + + 群聊报告报告 - 2025-04-29 + + + +
    +

    群聊报告报告

    +

    2025-04-29

    +
    + 总消息数:35 + 活跃用户:12 + 时间范围:07:03:10 - 15:36:25 +
    +
    + + + + +
    +

    今日讨论热点

    +
    + + +
    +

    AI技术讨论

    +
    科技
    +

    群内围绕Qwen3开源、Vidu Q1体验、夸克AI相机等AI技术展开热烈讨论,涉及模型性能、应用场景和开发者体验。多位成员分享了相关技术文章和体验报告。

    +
    + Qwen3ViduAI相机 +
    +
    提及次数:15
    +
    + +
    +

    熬夜与工作压力

    +
    生活
    +

    成员们讨论熬夜工作现象,分享各自熬夜经历,对比互联网大厂与普通开发者的工作强度差异,引发关于工作生活平衡的思考。

    +
    + 熬夜加班工作强度 +
    +
    提及次数:8
    +
    + + +
    +
    + + +
    +

    实用教程与资源分享

    +
    + + +
    +
    TUTORIAL
    +

    体验完刚上线的Vidu Q1,后劲有点大(附AI视频创作教程)

    +
    + 分享者:苍何 + +
    +

    分享Vidu Q1 AI视频创作工具的体验和教程,介绍其清晰度和一致性的提升。

    +
    +

    要点:

    +
    • AI视频清晰度提升
    • 一致性改进
    +
    + +
    分类:AI工具
    +
    + +
    +
    TUTORIAL
    +

    阿里新出的夸克AI相机,强大到我有点陌生

    +
    + 分享者:苍何 + +
    +

    介绍夸克AI相机的新奇玩法和功能,展示其强大的AI图像处理能力。

    +
    +

    要点:

    +
    • 新奇玩法
    • 抽象功能
    +
    + +
    分类:AI应用
    +
    + +
    +
    RESOURCE
    +

    仅2MB,Windows瞬间超级丝滑!

    +
    + 分享者:AHapi²⁰²⁵ + +
    +

    分享一款轻量级Windows优化工具,声称能显著提升系统运行速度。

    +
    +

    要点:

    +
    • 2MB大小
    • 系统优化
    +
    + +
    分类:系统工具
    +
    + + +
    +
    + + +
    +

    重要消息汇总

    +
    + + +
    +
    + 2025-04-29 10:00:18 + 苍何 + NEWS + 优先级:高 +
    +

    2025年04月29日 AI科技早报:阿里开源8款Qwen3模型,腾讯开源Kuikly跨端框架,OpenAI推出ChatGPT购物功能等11条重要新闻。

    +
    +

    2025年04月29日 AI科技早报1、阿里开源8款Qwen3模型,集成MCP,性能超DeepSeek-R1、OpenAI o1。2、Qafind Labs发布ChatDLM扩散语言模型,推理速度高达2800 tokens/s。3、腾讯开源Kuikly跨端框架,基于Kotlin支持多平台开发,已应用于QQ。4、OpenAI 推出 ChatGPT 购物功能,用户可通过 ChatGPT 便捷购物。5、字节Seed团队提出PHD-Transformer,突破预训练长度扩展瓶颈。6、百度发布文心快码3.5版本与多模态AI智能体Zulu,助力工程师提效。7、Kimi与财新传媒合作,提供专业财经内容,推动AI+传统媒体融合。8、苹果加速「N50」智能眼镜项目,融合AI技术预计2027年亮相。9、研究显示OpenAI o3在病毒学领域超越94%人类专家,生物安全引关注。10、华为测试自研AI芯片Ascend 910D,旨在替代英伟达H100芯片。11、🔥【记得收藏】早报同步更新到开源 AI 知识库:https://u55dyuejxc.feishu.cn/wiki/FkmNwxYHDigJ3akIUGHc8MSTn4d

    +
    +
    + + +
    +
    + + +
    +

    有趣对话或金句

    +
    + + +
    +
    DIALOGUE
    +
    + +
    +
    + 好名字 + 2025-04-29 08:16:23 +
    +

    这个我弄完,ai做的小程序有bug,流程走不通,还改不了[捂脸]

    +
    +
    +
    + 贾👦🏻 + 2025-04-29 08:54:33 +
    +

    可以微调 不过源码需要买的

    +
    +
    +
    + 好名字 + 2025-04-29 09:13:32 +
    +

    微调一次,然后再想调就需要开会员了

    +
    +
    +
    + 贾👦🏻 + 2025-04-29 09:14:09 +
    +

    需求变更一个字 就需要重新购买[破涕为笑]

    +
    +
    +
    AI小程序开发中的商业化模式讨论
    +
    相关话题:AI开发工具
    +
    + +
    +
    DIALOGUE
    +
    + +
    +
    + 苍何 + 2025-04-29 09:26:49 +
    +

    我熬不动

    +
    +
    +
    + AHapi²⁰²⁵ + 2025-04-29 09:27:25 +
    +

    不要卷别人[旺柴]别人写了 就不卷他们了

    +
    +
    +
    + 苍何 + 2025-04-29 09:27:55 +
    +

    新闻得第一时间,做不到写了也没啥用

    +
    +
    +
    + 苍何 + 2025-04-29 09:28:03 +
    +

    还不如写些应用

    +
    +
    +
    + 大风(Wind) + 2025-04-29 09:28:23 +
    +

    看看哪些是5-7点发推文的,基本都是卷王了

    +
    +
    +
    + 沉默王二 + 2025-04-29 09:28:44 +
    +

    身体能扛住确实离谱

    +
    +
    +
    + 苍何 + 2025-04-29 09:29:03 +
    +

    是啊,太肝了

    +
    +
    +
    关于工作强度和熬夜文化的讨论
    +
    相关话题:工作生活平衡
    +
    + + +
    +
    + + +
    +

    问题与解答

    +
    + + +
    +
    +
    + 银色子弹-捷 + 2025-04-29 11:10:26 +
    +

    问一下win11电脑,你长时间没清理,运行慢,一般用什么来清理电脑? 不要360啊,那个太流氓了,想知道各位大佬有没有优秀的软件推荐一下

    +
    + Windows优化系统清理 +
    +
    +
    + +
    +
    + 昏沉沉的 + 2025-04-29 11:11:59 + +
    +

    ccclean

    +
    +
    +
    + 🤑程序儒 + 2025-04-29 11:13:07 + +
    +

    360极速版、Wise Care 365

    +
    +
    +
    + AHapi²⁰²⁵ + 2025-04-29 11:13:38 + 最佳回答 +
    +

    仅2MB,Windows瞬间超级丝滑!这才是,真神器!

    +
    +
    +
    + +
    +
    +
    + ಠ_ಠ 闲鱼一条ಠ_ಠ + 2025-04-29 11:37:49 +
    +

    请问哪位哥还有扣子的邀请码吗?

    +
    + 邀请码扣子空间 +
    +
    +
    + +
    +
    + 贾👦🏻 + 2025-04-29 11:40:37 + 最佳回答 +
    +

    RootUser_2105656329 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!https://www.coze.cn/space-preview?invite_code=SCL7DAL0

    +
    +
    +
    + 9527 + 2025-04-29 11:47:43 + +
    +

    RootUser_2106519373 邀请你体验扣子空间,快来和 Agent 一起开始你的工作吧!https://www.coze.cn/space-preview?invite_code=A8IT4MUE

    +
    +
    +
    + + +
    +
    + + +
    +

    群内数据可视化

    + + +

    话题热度

    +
    + + + + +
    +
    AI技术
    +
    45%%
    +
    +
    +
    +
    16条消息
    +
    + +
    +
    工作讨论
    +
    25%%
    +
    +
    +
    +
    9条消息
    +
    + +
    +
    工具推荐
    +
    15%%
    +
    +
    +
    +
    5条消息
    +
    + +
    +
    其他
    +
    15%%
    +
    +
    +
    +
    5条消息
    +
    + + +
    + + + +
    +

    话唠榜

    +
    + +
    +
    1
    +
    +
    苍何
    +
    发言数:7
    +
    + 技术分享新闻发布 +
    +
    + AI开源熬夜 +
    +
    +
    + +
    +
    2
    +
    +
    AHapi²⁰²⁵
    +
    发言数:6
    +
    + 幽默评论资源分享 +
    +
    + 旺柴加班神器 +
    +
    +
    + +
    +
    3
    +
    +
    贾👦🏻
    +
    发言数:3
    +
    + 问题解答邀请码分享 +
    +
    + 源码购买邀请 +
    +
    +
    + +
    +
    + + + +

    熬夜冠军

    +
    + + + +
    +
    👑
    +
    +
    苍何
    +
    熬夜冠军
    +
    最晚活跃时间:09:42:54
    +
    深夜消息数:7
    +
    我熬夜写了这一篇[旺柴]
    +
    注:熬夜时段定义为23:00-06:00,已考虑不同时区
    +
    + + +
    +
    + + +
    +

    热门词云

    +
    + +
    + + + + AI + + 熬夜 + + 开源 + + Qwen3 + + Vidu + + 清理 + + 邀请码 + + +
    + +
    + +
    + + 技术 相关词汇 +
    + +
    + + 生活 相关词汇 +
    + +
    + + 工具 相关词汇 +
    + + +
    +
    +
    + + +
    +

    数据来源:群聊聊天记录聊天记录

    +

    生成时间:2025-04-29 16:00:00

    +

    统计周期:2025-04-29 [时间范围]

    +

    免责声明:本报告内容基于群聊公开讨论,如有不当内容或侵权问题请联系管理员处理。

    +
    + + + + + \ No newline at end of file diff --git a/微信聊天记录可视化prompt.md b/微信聊天记录可视化prompt.md new file mode 100644 index 0000000..c5109a0 --- /dev/null +++ b/微信聊天记录可视化prompt.md @@ -0,0 +1,762 @@ +任务:根据 提供的微信群聊天记录(json格式)生成今日群/好友日报,输出为风格固定、一致的HTML页面,适合截图分享 +## 日报模式选择 +- 日报模式:[完整版/简化版] (默认为完整版) +- 如果需要简化版,请在提交时注明"生成简化版" + +## 简化版说明 +如选择"简化版",将只生成以下核心部分: +- 今日讨论热点(最多3个) +- 重要消息汇总 +- 话唠榜(仅前3名) +- 简化版词云 +日报内容更精简,适合快速浏览和分享。 + +## 聊天记录格式 +``` json +[ +{ + "nickname": "昏沉沉的", # 发消息人昵称 + "message": "XXX", # 消息内容 + "time": "2025-04-27 11:33:20" #发消息时间 + }, +] +``` + +如未能识别消息格式或未找到有效记录,将显示提示信息并尝试按最佳猜测处理。 + +## 输出要求 +必须使用以下固定的HTML模板和CSS样式,仅更新内容部分,确保每次生成的页面风格完全一致。使用严格定义的深色科技风格。 + + + +## HTML结构模板 + +```html + + + + + [群名称]报告 - [日期] + + + +
    +

    [群名称]报告

    +

    [日期]

    +
    + 总消息数:[数量] + 活跃用户:[数量] + 时间范围:[时间范围] +
    +
    + + +
    +

    今日讨论热点

    +
    + +
    +

    [热点话题名称]

    +
    [话题分类]
    +

    [简要总结(50-100字)]

    +
    + [关键词1] + [关键词2] + +
    +
    提及次数:[次数]
    +
    + +
    +
    + + +
    +

    实用教程与资源分享

    +
    + +
    +
    [TUTORIAL | NEWS | RESOURCE]
    +

    [分享的教程或资源标题]

    +
    + 分享者:[昵称] + +
    +

    [内容简介]

    +
    +

    要点:

    +
      +
    • [要点1]
    • +
    • [要点2]
    • + +
    +
    + +
    分类:[分类]
    +
    + +
    +
    + + +
    +

    重要消息汇总

    +
    + +
    +
    + [消息时间] + [发送者昵称] + [NOTICE | EVENT | ANNOUNCEMENT | OTHER] + 优先级:[高|中|低] +
    +

    [消息内容]

    +
    +

    [完整通知内容]

    +
    +
    + +
    +
    + + +
    +

    有趣对话或金句

    +
    + +
    +
    [DIALOGUE | QUOTE]
    +
    +
    +
    + [说话者昵称] + [发言时间] +
    +

    [消息内容]

    +
    +
    +
    + [说话者昵称] + [发言时间] +
    +

    [消息内容]

    +
    + +
    +
    [对话中的金句或亮点]
    +
    相关话题:[某某话题]
    +
    + +
    +
    + + +
    +

    问题与解答

    +
    + +
    +
    +
    + [提问者昵称] + [提问时间] +
    +

    [问题内容]

    +
    + [相关标签1] + [相关标签2] + +
    +
    +
    +
    +
    + [回答者昵称] + [回答时间] + 最佳回答 +
    +

    [回答内容]

    +
    + +
    +
    + +
    +
    + + +
    +

    群内数据可视化

    + + +

    话题热度

    +
    + +
    +
    [话题名称]
    +
    [百分比]%
    +
    +
    +
    +
    [数量]条消息
    +
    + +
    +
    [话题名称]
    +
    [百分比]%
    +
    +
    +
    +
    [数量]条消息
    +
    + +
    + + +

    话唠榜

    +
    + +
    +
    1
    +
    +
    [群友昵称]
    +
    [数量]条消息
    +
    + [特点1] + [特点2] + +
    +
    + [常用词1] + [常用词2] + +
    +
    +
    + +
    + + +

    熬夜冠军

    +
    + +
    +
    👑
    +
    +
    [熬夜冠军昵称]
    +
    [熬夜冠军称号]
    +
    最晚活跃时间:[时间]
    +
    深夜消息数:[数量]
    +
    [最后一条深夜消息内容]
    +
    注:熬夜时段定义为23:00-06:00,已考虑不同时区
    +
    +
    +
    +
    + + +
    +

    热门词云

    +
    + +
    + + + [关键词1] + + [关键词2] + + [关键词3] + + [关键词4] + + [关键词5] + + +
    + +
    +
    + + [分类1] 相关词汇 +
    +
    + + [分类2] 相关词汇 +
    +
    + + [分类3] 相关词汇 +
    +
    +
    +
    + + + + + \ No newline at end of file