3.1.0,很多更改丢失,重新恢复的文件

This commit is contained in:
xaoyaoo 2024-08-03 00:29:03 +08:00
commit e2b934ebdc
46 changed files with 3435 additions and 2019 deletions

View File

@ -8,24 +8,23 @@
# from .analyzer.db_parsing import read_img_dat, read_emoji, decompress_CompressContent, read_audio_buf, read_audio, \ # from .analyzer.db_parsing import read_img_dat, read_emoji, decompress_CompressContent, read_audio_buf, read_audio, \
# parse_xml_string, read_BytesExtra # parse_xml_string, read_BytesExtra
# from .ui import app_show_chat, get_user_list, export # from .ui import app_show_chat, get_user_list, export
from .wx_info import BiasAddr, read_info, get_wechat_db, batch_decrypt, decrypt, get_core_db from .wx_core import BiasAddr, get_wx_info, get_wx_db, batch_decrypt, decrypt, get_core_db
from .wx_info import merge_copy_db, merge_msg_db, merge_media_msg_db, merge_db, decrypt_merge, merge_real_time_db, \ from .wx_core import merge_db, decrypt_merge, merge_real_time_db, all_merge_real_time_db
all_merge_real_time_db
from .analyzer import DBPool from .analyzer import DBPool
from .dbpreprocess import get_user_list, get_recent_user_list, wxid2userinfo, ParsingMSG, ParsingMicroMsg, \ from .db import MsgHandler, MicroHandler, \
ParsingMediaMSG, ParsingOpenIMContact, ParsingFavorite,ParsingPublicMsg MediaHandler, OpenIMContactHandler, FavoriteHandler, PublicMsgHandler, DBHandler
from .server import start_falsk from .server import start_falsk
import os, json import os, json
try: try:
VERSION_LIST_PATH = os.path.join(os.path.dirname(__file__), "version_list.json") WX_OFFS_PATH = os.path.join(os.path.dirname(__file__), "WX_OFFS.json")
with open(VERSION_LIST_PATH, "r", encoding="utf-8") as f: with open(WX_OFFS_PATH, "r", encoding="utf-8") as f:
VERSION_LIST = json.load(f) WX_OFFS = json.load(f)
except: except:
VERSION_LIST = {} WX_OFFS = {}
VERSION_LIST_PATH = None WX_OFFS_PATH = None
# PYWXDUMP_ROOT_PATH = os.path.dirname(__file__) # PYWXDUMP_ROOT_PATH = os.path.dirname(__file__)
# db_init = DBPool("DBPOOL_INIT") # db_init = DBPool("DBPOOL_INIT")
__version__ = "3.0.42" __version__ = "3.1.0"

View File

@ -10,8 +10,8 @@ import time
from collections import Counter from collections import Counter
import pandas as pd import pandas as pd
from pywxdump.dbpreprocess.utils import xml2dict from pywxdump.db.utils import xml2dict
from pywxdump.dbpreprocess import parsingMSG from pywxdump.db import dbMSG
def date_chat_count(chat_data, interval="W"): def date_chat_count(chat_data, interval="W"):
""" """

View File

@ -5,8 +5,9 @@
# Author: xaoyaoo # Author: xaoyaoo
# Date: 2023/12/14 # Date: 2023/12/14
# ------------------------------------------------------------------------------- # -------------------------------------------------------------------------------
from .api import api from .remote_server import rs_api
from .utils import read_session, save_session from .local_server import ls_api
from .utils import get_conf, set_conf
if __name__ == '__main__': if __name__ == '__main__':
pass pass

View File

@ -1,948 +0,0 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: chat_api.py
# Description:
# Author: xaoyaoo
# Date: 2024/01/02
# -------------------------------------------------------------------------------
import base64
import json
import logging
import os
import re
import time
import shutil
import pythoncom
import pywxdump
from flask import Flask, request, render_template, g, Blueprint, send_file, make_response, session
from pywxdump import get_core_db, all_merge_real_time_db
from pywxdump.api.rjson import ReJson, RqJson
from pywxdump.api.utils import read_session, get_session_wxids, save_session, error9999, gen_base64, validate_title, \
read_session_local_wxid
from pywxdump import read_info, VERSION_LIST, batch_decrypt, BiasAddr, merge_db, decrypt_merge, merge_real_time_db
from pywxdump.dbpreprocess import wxid2userinfo, ParsingMSG, get_user_list, get_recent_user_list, ParsingMediaMSG, \
download_file, export_csv, export_json, ParsingMicroMsg, ParsingPublicMsg
from pywxdump.dbpreprocess.utils import dat2img
# app = Flask(__name__, static_folder='../ui/web/dist', static_url_path='/')
api = Blueprint('api', __name__, template_folder='../ui/web', static_folder='../ui/web/assets/', )
api.debug = False
# 以下为初始化相关 *******************************************************************************************************
@api.route('/api/init_last_local_wxid', methods=["GET", 'POST'])
@error9999
def init_last_local_wxid():
"""
初始化包括key
:return:
"""
local_wxid = read_session_local_wxid(g.sf)
if local_wxid:
return ReJson(0, {"local_wxids": local_wxid})
return ReJson(0, {"local_wxids": []})
@api.route('/api/init_last', methods=["GET", 'POST'])
@error9999
def init_last():
"""
是否初始化
:return:
"""
my_wxid = request.json.get("my_wxid", "")
my_wxid = my_wxid.strip().strip("'").strip('"') if isinstance(my_wxid, str) else ""
if not my_wxid:
my_wxid = read_session(g.sf, "test", "last")
if my_wxid:
save_session(g.sf, "test", "last", my_wxid)
merge_path = read_session(g.sf, my_wxid, "merge_path")
wx_path = read_session(g.sf, my_wxid, "wx_path")
key = read_session(g.sf, my_wxid, "key")
rdata = {
"merge_path": merge_path,
"wx_path": wx_path,
"key": key,
"my_wxid": my_wxid,
"is_init": True,
}
if merge_path and wx_path:
return ReJson(0, rdata)
return ReJson(0, {"is_init": False, "my_wxid": ""})
@api.route('/api/init_key', methods=["GET", 'POST'])
@error9999
def init_key():
"""
初始化包括key
:return:
"""
wx_path = request.json.get("wx_path", "").strip().strip("'").strip('"')
key = request.json.get("key", "").strip().strip("'").strip('"')
my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"')
if not wx_path:
return ReJson(1002, body=f"wx_path is required: {wx_path}")
if not os.path.exists(wx_path):
return ReJson(1001, body=f"wx_path not exists: {wx_path}")
if not key:
return ReJson(1002, body=f"key is required: {key}")
if not my_wxid:
return ReJson(1002, body=f"my_wxid is required: {my_wxid}")
old_merge_save_path = read_session(g.sf, my_wxid, "merge_path")
if isinstance(old_merge_save_path, str) and old_merge_save_path and os.path.exists(old_merge_save_path):
pmsg = ParsingMSG(old_merge_save_path)
pmsg.close_all_connection()
out_path = os.path.join(g.tmp_path, "decrypted", my_wxid) if my_wxid else os.path.join(g.tmp_path, "decrypted")
# 检查文件夹中文件是否被占用
if os.path.exists(out_path):
try:
shutil.rmtree(out_path)
except PermissionError as e:
# 显示堆栈信息
logging.error(f"{e}", exc_info=True)
return ReJson(2001, body=str(e))
code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=out_path)
time.sleep(1)
if code:
# 移动merge_save_path到g.tmp_path/my_wxid
if not os.path.exists(os.path.join(g.tmp_path, my_wxid)):
os.makedirs(os.path.join(g.tmp_path, my_wxid))
merge_save_path_new = os.path.join(g.tmp_path, my_wxid, "merge_all.db")
shutil.move(merge_save_path, str(merge_save_path_new))
# 删除out_path
if os.path.exists(out_path):
try:
shutil.rmtree(out_path)
except PermissionError as e:
# 显示堆栈信息
logging.error(f"{e}", exc_info=True)
save_session(g.sf, my_wxid, "merge_path", merge_save_path_new)
save_session(g.sf, my_wxid, "wx_path", wx_path)
save_session(g.sf, my_wxid, "key", key)
save_session(g.sf, my_wxid, "my_wxid", my_wxid)
save_session(g.sf, "test", "last", my_wxid)
rdata = {
"merge_path": merge_save_path,
"wx_path": wx_path,
"key": key,
"my_wxid": my_wxid,
"is_init": True,
}
return ReJson(0, rdata)
else:
return ReJson(2001, body=merge_save_path)
@api.route('/api/init_nokey', methods=["GET", 'POST'])
@error9999
def init_nokey():
"""
初始化包括key
:return:
"""
merge_path = request.json.get("merge_path", "").strip().strip("'").strip('"')
wx_path = request.json.get("wx_path", "").strip().strip("'").strip('"')
my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"')
if not wx_path:
return ReJson(1002, body=f"wx_path is required: {wx_path}")
if not os.path.exists(wx_path):
return ReJson(1001, body=f"wx_path not exists: {wx_path}")
if not merge_path:
return ReJson(1002, body=f"merge_path is required: {merge_path}")
if not my_wxid:
return ReJson(1002, body=f"my_wxid is required: {my_wxid}")
key = read_session(g.sf, my_wxid, "key")
save_session(g.sf, my_wxid, "merge_path", merge_path)
save_session(g.sf, my_wxid, "wx_path", wx_path)
save_session(g.sf, my_wxid, "key", key)
save_session(g.sf, my_wxid, "my_wxid", my_wxid)
save_session(g.sf, "test", "last", my_wxid)
rdata = {
"merge_path": merge_path,
"wx_path": wx_path,
"key": "",
"my_wxid": my_wxid,
"is_init": True,
}
return ReJson(0, rdata)
# END 以上为初始化相关 ***************************************************************************************************
# start 以下为聊天联系人相关api *******************************************************************************************
@api.route('/api/recent_user_list', methods=["GET", 'POST'])
@error9999
def recent_user_list():
"""
获取联系人列表
:return:
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
user_list = get_recent_user_list(merge_path, merge_path, limit=200)
return ReJson(0, user_list)
@api.route('/api/user_labels_dict', methods=["GET", 'POST'])
@error9999
def user_labels_dict():
"""
获取标签字典
:return:
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
user_labels_dict = ParsingMicroMsg(merge_path).labels_dict()
return ReJson(0, user_labels_dict)
@api.route('/api/user_list', methods=["GET", 'POST'])
@error9999
def user_list():
"""
获取联系人列表
:return:
"""
if request.method == "GET":
word = request.args.get("word", "")
elif request.method == "POST":
word = request.json.get("word", "")
else:
return ReJson(1003, msg="Unsupported method")
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
user_list = get_user_list(merge_path, merge_path, word)
return ReJson(0, user_list)
@api.route('/api/wxid2user', methods=["GET", 'POST'])
@error9999
def wxid2user():
"""
获取联系人列表
:return:
"""
if request.method == "GET":
word = request.args.get("wxid", "")
elif request.method == "POST":
word = request.json.get("wxid", "")
else:
return ReJson(1003, msg="Unsupported method")
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
user_info = wxid2userinfo(merge_path, merge_path, wxid=word)
return ReJson(0, user_info)
@api.route('/api/mywxid', methods=["GET", 'POST'])
@error9999
def mywxid():
"""
获取我的微信id
:return:
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
return ReJson(0, {"my_wxid": my_wxid})
# end 以上为聊天联系人相关api *********************************************************************************************
# start 以下为聊天记录相关api *********************************************************************************************
@api.route('/api/realtimemsg', methods=["GET", "POST"])
@error9999
def get_real_time_msg():
"""
获取实时消息 使用 merge_real_time_db()函数
:return:
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
key = read_session(g.sf, my_wxid, "key")
wx_path = read_session(g.sf, my_wxid, "wx_path")
if not merge_path or not key or not wx_path or not wx_path:
return ReJson(1002, body="msg_path or media_path or wx_path or key is required")
code, ret = all_merge_real_time_db(key=key, wx_path=wx_path, merge_path=merge_path)
if code:
return ReJson(0, ret)
else:
return ReJson(2001, body=ret)
@api.route('/api/msg_count', methods=["GET", 'POST'])
@error9999
def msg_count():
"""
获取联系人的聊天记录数量
:return:
"""
if request.method == "GET":
wxid = request.args.get("wxid")
elif request.method == "POST":
wxid = request.json.get("wxid")
else:
return ReJson(1003, msg="Unsupported method")
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
chat_count = ParsingMSG(merge_path).msg_count(wxid)
if None in chat_count:
chat_count = ParsingPublicMsg(merge_path).msg_count(wxid)
return ReJson(0, chat_count)
@api.route('/api/imgsrc/<path:imgsrc>', methods=["GET", 'POST'])
def get_imgsrc(imgsrc):
"""
获取图片,从网络获取图片主要功能只是下载图片缓存到本地
:return:
"""
if not imgsrc:
return ReJson(1002)
if imgsrc.startswith("FileStorage"): # 如果是本地图片文件则调用get_img
return get_img(imgsrc)
# 将?后面的参数连接到imgsrc
imgsrc = imgsrc + "?" + request.query_string.decode("utf-8") if request.query_string else imgsrc
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
img_tmp_path = os.path.join(g.tmp_path, my_wxid, "imgsrc")
if not os.path.exists(img_tmp_path):
os.makedirs(img_tmp_path)
file_name = imgsrc.replace("http://", "").replace("https://", "").replace("/", "_").replace("?", "_")
file_name = file_name + ".jpg"
# 如果文件名过长,则将文件明分为目录和文件名
if len(file_name) > 255:
file_name = file_name[:255] + "/" + file_name[255:]
img_path_all = os.path.join(img_tmp_path, file_name)
if os.path.exists(img_path_all):
return send_file(img_path_all)
else:
download_file(imgsrc, img_path_all)
if os.path.exists(img_path_all):
return send_file(img_path_all)
else:
return ReJson(4004, body=imgsrc)
@api.route('/api/img/<path:img_path>', methods=["GET", 'POST'])
@error9999
def get_img(img_path):
"""
获取图片
:return:
"""
if not img_path:
return ReJson(1002)
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = read_session(g.sf, my_wxid, "wx_path")
img_path = img_path.replace("\\\\", "\\")
img_tmp_path = os.path.join(g.tmp_path, my_wxid, "img")
original_img_path = os.path.join(wx_path, img_path)
if os.path.exists(original_img_path):
rc, fomt, md5, out_bytes = dat2img(original_img_path)
if not rc:
return ReJson(1001, body=original_img_path)
imgsavepath = os.path.join(img_tmp_path, img_path + "_" + ".".join([md5, fomt]))
if not os.path.exists(os.path.dirname(imgsavepath)):
os.makedirs(os.path.dirname(imgsavepath))
with open(imgsavepath, "wb") as f:
f.write(out_bytes)
return send_file(imgsavepath)
else:
return ReJson(1001, body=original_img_path)
@api.route('/api/msgs', methods=["GET", 'POST'])
@error9999
def get_msgs():
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
start = request.json.get("start")
limit = request.json.get("limit")
wxid = request.json.get("wxid")
if not wxid:
return ReJson(1002, body=f"wxid is required: {wxid}")
if start and isinstance(start, str) and start.isdigit():
start = int(start)
if limit and isinstance(limit, str) and limit.isdigit():
limit = int(limit)
if start is None or limit is None:
return ReJson(1002, body=f"start or limit is required {start} {limit}")
if not isinstance(start, int) and not isinstance(limit, int):
return ReJson(1002, body=f"start or limit is not int {start} {limit}")
parsing_msg = ParsingMSG(merge_path)
msgs, wxid_list = parsing_msg.msg_list(wxid, start, limit)
if not msgs:
parsing_public_msg = ParsingPublicMsg(merge_path)
msgs, wxid_list = parsing_public_msg.msg_list(wxid, start, limit)
wxid_list.append(my_wxid)
user_list = wxid2userinfo(merge_path, merge_path, wxid_list)
return ReJson(0, {"msg_list": msgs, "user_list": user_list})
@api.route('/api/video/<path:videoPath>', methods=["GET", 'POST'])
def get_video(videoPath):
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = read_session(g.sf, my_wxid, "wx_path")
videoPath = videoPath.replace("\\\\", "\\")
video_tmp_path = os.path.join(g.tmp_path, my_wxid, "video")
original_img_path = os.path.join(wx_path, videoPath)
if not os.path.exists(original_img_path):
return ReJson(5002)
# 复制文件到临时文件夹
video_save_path = os.path.join(video_tmp_path, videoPath)
if not os.path.exists(os.path.dirname(video_save_path)):
os.makedirs(os.path.dirname(video_save_path))
if os.path.exists(video_save_path):
return send_file(video_save_path)
shutil.copy(original_img_path, video_save_path)
return send_file(original_img_path)
@api.route('/api/audio/<path:savePath>', methods=["GET", 'POST'])
def get_audio(savePath):
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
savePath = os.path.join(g.tmp_path, my_wxid, "audio", savePath) # 这个是从url中获取的
if os.path.exists(savePath):
return send_file(savePath)
MsgSvrID = savePath.split("_")[-1].replace(".wav", "")
if not savePath:
return ReJson(1002)
# 判断savePath路径的文件夹是否存在
if not os.path.exists(os.path.dirname(savePath)):
os.makedirs(os.path.dirname(savePath))
parsing_media_msg = ParsingMediaMSG(merge_path)
wave_data = parsing_media_msg.get_audio(MsgSvrID, is_play=False, is_wave=True, save_path=savePath, rate=24000)
if not wave_data:
return ReJson(1001, body="wave_data is required")
if os.path.exists(savePath):
return send_file(savePath)
else:
return ReJson(4004, body=savePath)
@api.route('/api/file_info', methods=["GET", 'POST'])
def get_file_info():
file_path = request.args.get("file_path")
file_path = request.json.get("file_path", file_path)
if not file_path:
return ReJson(1002)
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = read_session(g.sf, my_wxid, "wx_path")
all_file_path = os.path.join(wx_path, file_path)
if not os.path.exists(all_file_path):
return ReJson(5002)
file_name = os.path.basename(all_file_path)
file_size = os.path.getsize(all_file_path)
return ReJson(0, {"file_name": file_name, "file_size": str(file_size)})
@api.route('/api/file/<path:filePath>', methods=["GET", 'POST'])
def get_file(filePath):
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = read_session(g.sf, my_wxid, "wx_path")
all_file_path = os.path.join(wx_path, filePath)
if not os.path.exists(all_file_path):
return ReJson(5002)
return send_file(all_file_path)
# end 以上为聊天记录相关api *********************************************************************************************
# start 导出聊天记录 *****************************************************************************************************
@api.route('/api/export_endb', methods=["GET", 'POST'])
def get_export_endb():
"""
导出加密数据库
:return:
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = read_session(g.sf, my_wxid, "wx_path")
wx_path = request.json.get("wx_path", wx_path)
if not wx_path:
return ReJson(1002, body=f"wx_path is required: {wx_path}")
if not os.path.exists(wx_path):
return ReJson(1001, body=f"wx_path not exists: {wx_path}")
# 分割wx_path的文件名和父目录
code, wxdbpaths = get_core_db(wx_path)
if not code:
return ReJson(2001, body=wxdbpaths)
outpath = os.path.join(g.tmp_path, "export", my_wxid, "endb")
if not os.path.exists(outpath):
os.makedirs(outpath)
for wxdb in wxdbpaths:
# 复制wxdb->outpath, os.path.basename(wxdb)
assert isinstance(outpath, str) # 为了解决pycharm的警告, 无实际意义
shutil.copy(wxdb, os.path.join(outpath, os.path.basename(wxdb)))
return ReJson(0, body=outpath)
@api.route('/api/export_dedb', methods=["GET", "POST"])
def get_export_dedb():
"""
导出解密数据库
:return:
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
key = request.json.get("key", read_session(g.sf, my_wxid, "key"))
wx_path = request.json.get("wx_path", read_session(g.sf, my_wxid, "wx_path"))
if not key:
return ReJson(1002, body=f"key is required: {key}")
if not wx_path:
return ReJson(1002, body=f"wx_path is required: {wx_path}")
if not os.path.exists(wx_path):
return ReJson(1001, body=f"wx_path not exists: {wx_path}")
outpath = os.path.join(g.tmp_path, "export", my_wxid, "dedb")
if not os.path.exists(outpath):
os.makedirs(outpath)
code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=outpath)
time.sleep(1)
if code:
return ReJson(0, body=merge_save_path)
else:
return ReJson(2001, body=merge_save_path)
@api.route('/api/export_csv', methods=["GET", 'POST'])
def get_export_csv():
"""
导出csv
:return:
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wxid = request.json.get("wxid")
# st_ed_time = request.json.get("datetime", [0, 0])
if not wxid:
return ReJson(1002, body=f"username is required: {wxid}")
# if not isinstance(st_ed_time, list) or len(st_ed_time) != 2:
# return ReJson(1002, body=f"datetime is required: {st_ed_time}")
# start, end = st_ed_time
# if not isinstance(start, int) or not isinstance(end, int) or start >= end:
# return ReJson(1002, body=f"datetime is required: {st_ed_time}")
outpath = os.path.join(g.tmp_path, "export", my_wxid, "csv", wxid)
if not os.path.exists(outpath):
os.makedirs(outpath)
code, ret = export_csv(wxid, outpath, read_session(g.sf, my_wxid, "merge_path"))
if code:
return ReJson(0, ret)
else:
return ReJson(2001, body=ret)
@api.route('/api/export_json', methods=["GET", 'POST'])
def get_export_json():
"""
导出json
:return:
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wxid = request.json.get("wxid")
if not wxid:
return ReJson(1002, body=f"username is required: {wxid}")
outpath = os.path.join(g.tmp_path, "export", my_wxid, "json", wxid)
if not os.path.exists(outpath):
os.makedirs(outpath)
code, ret = export_json(wxid, outpath, read_session(g.sf, my_wxid, "merge_path"))
if code:
return ReJson(0, ret)
else:
return ReJson(2001, body=ret)
# @api.route('/api/export', methods=["GET", 'POST'])
# @error9999
# def export():
# """
# 导出聊天记录
# :return:
# """
# export_type = request.json.get("export_type")
# start_time = request.json.get("start_time", 0)
# end_time = request.json.get("end_time", 0)
# chat_type = request.json.get("chat_type")
# username = request.json.get("username")
#
# wx_path = request.json.get("wx_path", read_session(g.sf, "wx_path"))
# key = request.json.get("key", read_session(g.sf, "key"))
#
# if not export_type or not isinstance(export_type, str):
# return ReJson(1002)
#
# # 导出路径
# outpath = os.path.join(g.tmp_path, "export", export_type)
# if not os.path.exists(outpath):
# os.makedirs(outpath)
#
# if export_type == "endb": # 导出加密数据库
# # 获取微信文件夹路径
# if not wx_path:
# return ReJson(1002)
# if not os.path.exists(wx_path):
# return ReJson(1001, body=wx_path)
#
# # 分割wx_path的文件名和父目录
# code, wxdbpaths = get_core_db(wx_path)
# if not code:
# return ReJson(2001, body=wxdbpaths)
#
# for wxdb in wxdbpaths:
# # 复制wxdb->outpath, os.path.basename(wxdb)
# shutil.copy(wxdb, os.path.join(outpath, os.path.basename(wxdb)))
# return ReJson(0, body=outpath)
#
# elif export_type == "dedb":
# if isinstance(start_time, int) and isinstance(end_time, int):
# msg_path = read_session(g.sf, "msg_path")
# micro_path = read_session(g.sf, "micro_path")
# media_path = read_session(g.sf, "media_path")
# dbpaths = [msg_path, media_path, micro_path]
# dbpaths = list(set(dbpaths))
# mergepath = merge_db(dbpaths, os.path.join(outpath, "merge.db"), start_time, end_time)
# return ReJson(0, body=mergepath)
# # if msg_path == media_path and msg_path == media_path:
# # shutil.copy(msg_path, os.path.join(outpath, "merge.db"))
# # return ReJson(0, body=msg_path)
# # else:
# # dbpaths = [msg_path, msg_path, micro_path]
# # dbpaths = list(set(dbpaths))
# # mergepath = merge_db(dbpaths, os.path.join(outpath, "merge.db"), start_time, end_time)
# # return ReJson(0, body=mergepath)
# else:
# return ReJson(1002, body={"start_time": start_time, "end_time": end_time})
#
# elif export_type == "csv":
# outpath = os.path.join(outpath, username)
# if not os.path.exists(outpath):
# os.makedirs(outpath)
# code, ret = analyzer.export_csv(username, outpath, read_session(g.sf, "msg_path"))
# if code:
# return ReJson(0, ret)
# else:
# return ReJson(2001, body=ret)
# elif export_type == "json":
# outpath = os.path.join(outpath, username)
# if not os.path.exists(outpath):
# os.makedirs(outpath)
# code, ret = analyzer.export_json(username, outpath, read_session(g.sf, "msg_path"))
# if code:
# return ReJson(0, ret)
# else:
# return ReJson(2001, body=ret)
# elif export_type == "html":
# outpath = os.path.join(outpath, username)
# if os.path.exists(outpath):
# shutil.rmtree(outpath)
# if not os.path.exists(outpath):
# os.makedirs(outpath)
# # chat_type_tups = []
# # for ct in chat_type:
# # tup = analyzer.get_name_typeid(ct)
# # if tup:
# # chat_type_tups += tup
# # if not chat_type_tups:
# # return ReJson(1002)
#
# # 复制文件 html
# export_html = os.path.join(os.path.dirname(pywxdump.VERSION_LIST_PATH), "ui", "export")
# indexhtml_path = os.path.join(export_html, "index.html")
# assets_path = os.path.join(export_html, "assets")
# if not os.path.exists(indexhtml_path) or not os.path.exists(assets_path):
# return ReJson(1001)
# js_path = ""
# css_path = ""
# for file in os.listdir(assets_path):
# if file.endswith('.js'):
# js_path = os.path.join(assets_path, file)
# elif file.endswith('.css'):
# css_path = os.path.join(assets_path, file)
# else:
# continue
# # 读取html,js,css
# with open(indexhtml_path, 'r', encoding='utf-8') as f:
# html = f.read()
# with open(js_path, 'r', encoding='utf-8') as f:
# js = f.read()
# with open(css_path, 'r', encoding='utf-8') as f:
# css = f.read()
#
# html = re.sub(r'<script .*?></script>', '', html) # 删除所有的script标签
# html = re.sub(r'<link rel="stylesheet" .*?>', '', html) # 删除所有的link标签
#
# html = html.replace('</head>', f'<style>{css}</style></head>')
# html = html.replace('</head>', f'<script type="module" crossorigin>{js}</script></head>')
# # END 生成index.html
#
# rdata = func_get_msgs(0, 10000000, username, "", "")
#
# msg_list = rdata["msg_list"]
# for i in range(len(msg_list)):
# if msg_list[i]["type_name"] == "语音":
# savePath = msg_list[i]["content"]["src"]
# MsgSvrID = savePath.split("_")[-1].replace(".wav", "")
# if not savePath:
# continue
# media_path = read_session(g.sf, "media_path")
# wave_data = read_audio(MsgSvrID, is_wave=True, DB_PATH=media_path)
# if not wave_data:
# continue
# # 判断savePath路径的文件夹是否存在
# savePath = os.path.join(outpath, savePath)
# if not os.path.exists(os.path.dirname(savePath)):
# os.makedirs(os.path.dirname(savePath))
# with open(savePath, "wb") as f:
# f.write(wave_data)
# elif msg_list[i]["type_name"] == "图片":
# img_path = msg_list[i]["content"]["src"]
# wx_path = read_session(g.sf, "wx_path")
# img_path_all = os.path.join(wx_path, img_path)
#
# if os.path.exists(img_path_all):
# fomt, md5, out_bytes = read_img_dat(img_path_all)
# imgsavepath = os.path.join(outpath, "img", img_path + "_" + ".".join([md5, fomt]))
# if not os.path.exists(os.path.dirname(imgsavepath)):
# os.makedirs(os.path.dirname(imgsavepath))
# with open(imgsavepath, "wb") as f:
# f.write(out_bytes)
# msg_list[i]["content"]["src"] = os.path.join("img", img_path + "_" + ".".join([md5, fomt]))
#
# rdata["msg_list"] = msg_list
# rdata["myuserdata"] = rdata["user_list"][rdata["my_wxid"]]
# rdata["myuserdata"]["chat_count"] = len(rdata["msg_list"])
# save_data = rdata
# save_json_path = os.path.join(outpath, "data")
# if not os.path.exists(save_json_path):
# os.makedirs(save_json_path)
# with open(os.path.join(save_json_path, "msg_user.json"), "w", encoding="utf-8") as f:
# json.dump(save_data, f, ensure_ascii=False)
#
# json_base64 = gen_base64(os.path.join(save_json_path, "msg_user.json"))
# html = html.replace('"./data/msg_user.json"', f'"{json_base64}"')
#
# with open(os.path.join(outpath, "index.html"), 'w', encoding='utf-8') as f:
# f.write(html)
# return ReJson(0, outpath)
#
# elif export_type == "pdf":
# pass
# elif export_type == "docx":
# pass
# else:
# return ReJson(1002)
#
# return ReJson(9999, "")
# end 导出聊天记录 *******************************************************************************************************
# start 聊天记录分析api **************************************************************************************************
@api.route('/api/date_count', methods=["GET", 'POST'])
@error9999
def get_date_count():
"""
获取日期统计
"""
my_wxid = read_session(g.sf, "test", "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = read_session(g.sf, my_wxid, "merge_path")
date_count = ParsingMSG(merge_path).date_count()
return ReJson(0, date_count)
@api.route('/api/wordcloud', methods=["GET", 'POST'])
@error9999
def wordcloud():
pass
# start 这部分为专业工具的api *********************************************************************************************
@api.route('/api/wxinfo', methods=["GET", 'POST'])
@error9999
def get_wxinfo():
"""
获取微信信息
:return:
"""
import pythoncom
pythoncom.CoInitialize()
wxinfos = read_info(VERSION_LIST)
pythoncom.CoUninitialize()
return ReJson(0, wxinfos)
@api.route('/api/biasaddr', methods=["GET", 'POST'])
@error9999
def biasaddr():
"""
BiasAddr
:return:
"""
mobile = request.json.get("mobile")
name = request.json.get("name")
account = request.json.get("account")
key = request.json.get("key", "")
wxdbPath = request.json.get("wxdbPath", "")
if not mobile or not name or not account:
return ReJson(1002)
pythoncom.CoInitialize()
rdata = BiasAddr(account, mobile, name, key, wxdbPath).run()
return ReJson(0, str(rdata))
@api.route('/api/decrypt', methods=["GET", 'POST'])
@error9999
def decrypt():
"""
解密
:return:
"""
key = request.json.get("key")
if not key:
return ReJson(1002)
wxdb_path = request.json.get("wxdbPath")
if not wxdb_path:
return ReJson(1002)
out_path = request.json.get("outPath")
if not out_path:
out_path = g.tmp_path
wxinfos = batch_decrypt(key, wxdb_path, out_path=out_path)
return ReJson(0, str(wxinfos))
@api.route('/api/merge', methods=["GET", 'POST'])
@error9999
def merge():
"""
合并
:return:
"""
wxdb_path = request.json.get("dbPath")
if not wxdb_path:
return ReJson(1002)
out_path = request.json.get("outPath")
if not out_path:
return ReJson(1002)
rdata = merge_db(wxdb_path, out_path)
return ReJson(0, str(rdata))
# END 这部分为专业工具的api ***********************************************************************************************
# 关于、帮助、设置 *******************************************************************************************************
@api.route('/api/check_update', methods=["GET", 'POST'])
@error9999
def check_update():
"""
检查更新
:return:
"""
url = "https://api.github.com/repos/xaoyaoo/PyWxDump/tags"
try:
import requests
res = requests.get(url)
if res.status_code == 200:
data = res.json()
NEW_VERSION = data[0].get("name")
if NEW_VERSION[1:] != pywxdump.__version__:
msg = "有新版本"
else:
msg = "已经是最新版本"
return ReJson(0, body={"msg": msg, "latest_version": NEW_VERSION,
"latest_url": "https://github.com/xaoyaoo/PyWxDump/releases/tag/" + NEW_VERSION})
else:
return ReJson(2001, body="status_code is not 200")
except Exception as e:
return ReJson(9999, msg=str(e))
@api.route('/api/version', methods=["GET", 'POST'])
@error9999
def version():
"""
版本
:return:
"""
return ReJson(0, pywxdump.__version__)
# END 关于、帮助、设置 ***************************************************************************************************
@api.route('/')
@error9999
def index():
return render_template('index.html')

View File

@ -0,0 +1,283 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: local_server.py
# Description:
# Author: xaoyaoo
# Date: 2024/08/01
# -------------------------------------------------------------------------------
import base64
import json
import logging
import os
import re
import time
import shutil
import pythoncom
import pywxdump
from flask import Flask, request, render_template, g, Blueprint, send_file, make_response, session
from pywxdump import get_core_db, all_merge_real_time_db
from pywxdump.api.rjson import ReJson, RqJson
from pywxdump.api.utils import get_conf, get_conf_wxids, set_conf, error9999, gen_base64, validate_title, \
get_conf_local_wxid, ls_loger
from pywxdump import get_wx_info, WX_OFFS, batch_decrypt, BiasAddr, merge_db, decrypt_merge, merge_real_time_db
from pywxdump.db import DBHandler, download_file, export_csv, export_json
ls_api = Blueprint('ls_api', __name__, template_folder='../ui/web', static_folder='../ui/web/assets/', )
ls_api.debug = False
# 以下为初始化相关 *******************************************************************************************************
@ls_api.route('/api/ls/init_last_local_wxid', methods=["GET", 'POST'])
@error9999
def init_last_local_wxid():
"""
初始化包括key
:return:
"""
local_wxid = get_conf_local_wxid(g.caf)
local_wxid.remove(g.at)
if local_wxid:
return ReJson(0, {"local_wxids": local_wxid})
return ReJson(0, {"local_wxids": []})
@ls_api.route('/api/ls/init_last', methods=["GET", 'POST'])
@error9999
def init_last():
"""
是否初始化
:return:
"""
my_wxid = request.json.get("my_wxid", "")
my_wxid = my_wxid.strip().strip("'").strip('"') if isinstance(my_wxid, str) else ""
if not my_wxid:
my_wxid = get_conf(g.caf, "auto_setting", "last")
if my_wxid:
set_conf(g.caf, "auto_setting", "last", my_wxid)
merge_path = get_conf(g.caf, my_wxid, "merge_path")
wx_path = get_conf(g.caf, my_wxid, "wx_path")
key = get_conf(g.caf, my_wxid, "key")
rdata = {
"merge_path": merge_path,
"wx_path": wx_path,
"key": key,
"my_wxid": my_wxid,
"is_init": True,
}
if merge_path and wx_path:
return ReJson(0, rdata)
return ReJson(0, {"is_init": False, "my_wxid": ""})
@ls_api.route('/api/ls/init_key', methods=["GET", 'POST'])
@error9999
def init_key():
"""
初始化包括key
:return:
"""
wx_path = request.json.get("wx_path", "").strip().strip("'").strip('"')
key = request.json.get("key", "").strip().strip("'").strip('"')
my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"')
if not wx_path:
return ReJson(1002, body=f"wx_path is required: {wx_path}")
if not os.path.exists(wx_path):
return ReJson(1001, body=f"wx_path not exists: {wx_path}")
if not key:
return ReJson(1002, body=f"key is required: {key}")
if not my_wxid:
return ReJson(1002, body=f"my_wxid is required: {my_wxid}")
# db_config = get_conf(g.caf, my_wxid, "db_config")
# if isinstance(db_config, dict) and db_config and os.path.exists(db_config.get("path")):
# pmsg = DBHandler(db_config)
# # pmsg.close_all_connection()
out_path = os.path.join(g.work_path, "decrypted", my_wxid) if my_wxid else os.path.join(g.work_path, "decrypted")
# 检查文件夹中文件是否被占用
if os.path.exists(out_path):
try:
shutil.rmtree(out_path)
except PermissionError as e:
# 显示堆栈信息
ls_loger.error(f"{e}", exc_info=True)
return ReJson(2001, body=str(e))
code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=str(out_path))
time.sleep(1)
if code:
# 移动merge_save_path到g.work_path/my_wxid
if not os.path.exists(os.path.join(g.work_path, my_wxid)):
os.makedirs(os.path.join(g.work_path, my_wxid))
merge_save_path_new = os.path.join(g.work_path, my_wxid, "merge_all.db")
shutil.move(merge_save_path, str(merge_save_path_new))
# 删除out_path
if os.path.exists(out_path):
try:
shutil.rmtree(out_path)
except PermissionError as e:
# 显示堆栈信息
ls_loger.error(f"{e}", exc_info=True)
db_config = {
"key": "merge_all",
"type": "sqlite",
"path": merge_save_path_new
}
set_conf(g.caf, my_wxid, "db_config", db_config)
set_conf(g.caf, my_wxid, "merge_path", merge_save_path_new)
set_conf(g.caf, my_wxid, "wx_path", wx_path)
set_conf(g.caf, my_wxid, "key", key)
set_conf(g.caf, my_wxid, "my_wxid", my_wxid)
set_conf(g.caf, "auto_setting", "last", my_wxid)
rdata = {
"merge_path": merge_save_path_new,
"wx_path": wx_path,
"key": key,
"my_wxid": my_wxid,
"is_init": True,
}
return ReJson(0, rdata)
else:
return ReJson(2001, body=merge_save_path)
@ls_api.route('/api/ls/init_nokey', methods=["GET", 'POST'])
@error9999
def init_nokey():
"""
初始化包括key
:return:
"""
merge_path = request.json.get("merge_path", "").strip().strip("'").strip('"')
wx_path = request.json.get("wx_path", "").strip().strip("'").strip('"')
my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"')
if not wx_path:
return ReJson(1002, body=f"wx_path is required: {wx_path}")
if not os.path.exists(wx_path):
return ReJson(1001, body=f"wx_path not exists: {wx_path}")
if not merge_path:
return ReJson(1002, body=f"merge_path is required: {merge_path}")
if not my_wxid:
return ReJson(1002, body=f"my_wxid is required: {my_wxid}")
key = get_conf(g.caf, my_wxid, "key")
set_conf(g.caf, my_wxid, "merge_path", merge_path)
set_conf(g.caf, my_wxid, "wx_path", wx_path)
set_conf(g.caf, my_wxid, "key", key)
set_conf(g.caf, my_wxid, "my_wxid", my_wxid)
set_conf(g.caf, "test", "last", my_wxid)
rdata = {
"merge_path": merge_path,
"wx_path": wx_path,
"key": "",
"my_wxid": my_wxid,
"is_init": True,
}
return ReJson(0, rdata)
# END 以上为初始化相关 ***************************************************************************************************
@ls_api.route('/api/ls/realtimemsg', methods=["GET", "POST"])
@error9999
def get_real_time_msg():
"""
获取实时消息 使用 merge_real_time_db()函数
:return:
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = get_conf(g.caf, my_wxid, "merge_path")
key = get_conf(g.caf, my_wxid, "key")
wx_path = get_conf(g.caf, my_wxid, "wx_path")
if not merge_path or not key or not wx_path or not wx_path:
return ReJson(1002, body="msg_path or media_path or wx_path or key is required")
code, ret = all_merge_real_time_db(key=key, wx_path=wx_path, merge_path=merge_path)
if code:
return ReJson(0, ret)
else:
return ReJson(2001, body=ret)
# start 这部分为专业工具的api *********************************************************************************************
@ls_api.route('/api/ls/wxinfo', methods=["GET", 'POST'])
@error9999
def get_wxinfo():
"""
获取微信信息
:return:
"""
import pythoncom
pythoncom.CoInitialize()
wxinfos = get_wx_info(WX_OFFS)
pythoncom.CoUninitialize()
return ReJson(0, wxinfos)
@ls_api.route('/api/ls/biasaddr', methods=["GET", 'POST'])
@error9999
def biasaddr():
"""
BiasAddr
:return:
"""
mobile = request.json.get("mobile")
name = request.json.get("name")
account = request.json.get("account")
key = request.json.get("key", "")
wxdbPath = request.json.get("wxdbPath", "")
if not mobile or not name or not account:
return ReJson(1002)
pythoncom.CoInitialize()
rdata = BiasAddr(account, mobile, name, key, wxdbPath).run()
return ReJson(0, str(rdata))
@ls_api.route('/api/ls/decrypt', methods=["GET", 'POST'])
@error9999
def decrypt():
"""
解密
:return:
"""
key = request.json.get("key")
if not key:
return ReJson(1002)
wxdb_path = request.json.get("wxdbPath")
if not wxdb_path:
return ReJson(1002)
out_path = request.json.get("outPath")
if not out_path:
out_path = g.tmp_path
wxinfos = batch_decrypt(key, wxdb_path, out_path=out_path)
return ReJson(0, str(wxinfos))
@ls_api.route('/api/ls/merge', methods=["GET", 'POST'])
@error9999
def merge():
"""
合并
:return:
"""
wxdb_path = request.json.get("dbPath")
if not wxdb_path:
return ReJson(1002)
out_path = request.json.get("outPath")
if not out_path:
return ReJson(1002)
rdata = merge_db(wxdb_path, out_path)
return ReJson(0, str(rdata))
# END 这部分为专业工具的api ***********************************************************************************************

View File

@ -0,0 +1,514 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: chat_api.py
# Description:
# Author: xaoyaoo
# Date: 2024/01/02
# -------------------------------------------------------------------------------
import base64
import json
import logging
import os
import re
import time
import shutil
import pythoncom
import pywxdump
from flask import Flask, request, render_template, g, Blueprint, send_file, make_response, session
from pywxdump import get_core_db, all_merge_real_time_db
from pywxdump.api.rjson import ReJson, RqJson
from pywxdump.api.utils import get_conf, get_conf_wxids, set_conf, error9999, gen_base64, validate_title, \
get_conf_local_wxid
from pywxdump import get_wx_info, WX_OFFS, batch_decrypt, BiasAddr, merge_db, decrypt_merge, merge_real_time_db
from pywxdump.db import DBHandler, download_file, export_csv, export_json
from pywxdump.db.utils import dat2img
# app = Flask(__name__, static_folder='../ui/web/dist', static_url_path='/')
rs_api = Blueprint('rs_api', __name__, template_folder='../ui/web', static_folder='../ui/web/assets/', )
rs_api.debug = False
# 是否初始化
@rs_api.route('/api/rs/is_init', methods=["GET", 'POST'])
@error9999
def is_init():
"""
是否初始化
:return:
"""
local_wxids = get_conf_local_wxid(g.caf)
if len(local_wxids) > 1:
return ReJson(0, True)
return ReJson(0, False)
# start 以下为聊天联系人相关api *******************************************************************************************
@rs_api.route('/api/rs/mywxid', methods=["GET", 'POST'])
@error9999
def mywxid():
"""
获取我的微信id
:return:
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
return ReJson(0, {"my_wxid": my_wxid})
@rs_api.route('/api/rs/user_session_list', methods=["GET", 'POST'])
@error9999
def user_session_list():
"""
获取联系人列表
:return:
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
db_config = get_conf(g.caf, my_wxid, "db_config")
db = DBHandler(db_config)
ret = db.get_session_list()
return ReJson(0, list(ret.values()))
@rs_api.route('/api/rs/user_labels_dict', methods=["GET", 'POST'])
@error9999
def user_labels_dict():
"""
获取标签字典
:return:
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
db_config = get_conf(g.caf, my_wxid, "db_config")
db = DBHandler(db_config)
user_labels_dict = db.get_labels()
return ReJson(0, user_labels_dict)
@rs_api.route('/api/rs/user_list', methods=["GET", 'POST'])
@error9999
def user_list():
"""
获取联系人列表可用于搜索
:return:
"""
if request.method == "GET":
word = request.args.get("word", "")
wxids = request.args.get("wxids", [])
labels = request.args.get("labels", [])
elif request.method == "POST":
word = request.json.get("word", "")
wxids = request.json.get("wxids", [])
labels = request.json.get("labels", [])
else:
return ReJson(1003, msg="Unsupported method")
if isinstance(wxids, str) and wxids == '' or wxids is None: wxids = []
if isinstance(labels, str) and labels == '' or labels is None: labels = []
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
db_config = get_conf(g.caf, my_wxid, "db_config")
db = DBHandler(db_config)
users = db.get_user(word, wxids, labels)
return ReJson(0, users)
# end 以上为聊天联系人相关api *********************************************************************************************
# start 以下为聊天记录相关api *********************************************************************************************
@rs_api.route('/api/rs/imgsrc/<path:imgsrc>', methods=["GET", 'POST'])
@error9999
def get_imgsrc(imgsrc):
"""
获取图片,
1. 从网络获取图片主要功能只是下载图片缓存到本地
2. 读取本地图片
:return:
"""
if not imgsrc:
return ReJson(1002)
if imgsrc.startswith("FileStorage"): # 如果是本地图片文件则调用get_img
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = get_conf(g.caf, my_wxid, "wx_path")
img_path = imgsrc.replace("\\\\", "\\")
img_tmp_path = os.path.join(g.work_path, my_wxid, "img")
original_img_path = os.path.join(wx_path, img_path)
if os.path.exists(original_img_path):
rc, fomt, md5, out_bytes = dat2img(original_img_path)
if not rc:
return ReJson(1001, body=original_img_path)
imgsavepath = os.path.join(str(img_tmp_path), img_path + "_" + ".".join([md5, fomt]))
if not os.path.exists(os.path.dirname(imgsavepath)):
os.makedirs(os.path.dirname(imgsavepath))
with open(imgsavepath, "wb") as f:
f.write(out_bytes)
return send_file(imgsavepath)
else:
return ReJson(1001, body=original_img_path)
elif imgsrc.startswith("http://") or imgsrc.startswith("https://"):
# 将?后面的参数连接到imgsrc
imgsrc = imgsrc + "?" + request.query_string.decode("utf-8") if request.query_string else imgsrc
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
img_tmp_path = os.path.join(g.work_path, my_wxid, "imgsrc")
if not os.path.exists(img_tmp_path):
os.makedirs(img_tmp_path)
file_name = imgsrc.replace("http://", "").replace("https://", "").replace("/", "_").replace("?", "_")
file_name = file_name + ".jpg"
# 如果文件名过长,则将文件明分为目录和文件名
if len(file_name) > 255:
file_name = file_name[:255] + "/" + file_name[255:]
img_path_all = os.path.join(str(img_tmp_path), file_name)
if os.path.exists(img_path_all):
return send_file(img_path_all)
else:
download_file(imgsrc, img_path_all)
if os.path.exists(img_path_all):
return send_file(img_path_all)
else:
return ReJson(4004, body=imgsrc)
else:
return ReJson(1002, body=imgsrc)
@rs_api.route('/api/rs/msg_count', methods=["GET", 'POST'])
@error9999
def msg_count():
"""
获取联系人的聊天记录数量
:return:
"""
if request.method == "GET":
wxid = request.args.get("wxids", [])
elif request.method == "POST":
wxid = request.json.get("wxids", [])
else:
return ReJson(1003, msg="Unsupported method")
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
db_config = get_conf(g.caf, my_wxid, "db_config")
db = DBHandler(db_config)
chat_count = db.get_msg_count(wxid)
chat_count1 = db.get_plc_msg_count(wxid)
# 合并两个字典相同key则将value相加
count = {k: chat_count.get(k, 0) + chat_count1.get(k, 0) for k in
list(set(list(chat_count.keys()) + list(chat_count1.keys())))}
return ReJson(0, count)
@rs_api.route('/api/rs/msg_list', methods=["GET", 'POST'])
@error9999
def get_msgs():
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
db_config = get_conf(g.caf, my_wxid, "db_config")
start = request.json.get("start")
limit = request.json.get("limit")
wxid = request.json.get("wxid")
if not wxid:
return ReJson(1002, body=f"wxid is required: {wxid}")
if start and isinstance(start, str) and start.isdigit():
start = int(start)
if limit and isinstance(limit, str) and limit.isdigit():
limit = int(limit)
if start is None or limit is None:
return ReJson(1002, body=f"start or limit is required {start} {limit}")
if not isinstance(start, int) and not isinstance(limit, int):
return ReJson(1002, body=f"start or limit is not int {start} {limit}")
db = DBHandler(db_config)
msgs, wxid_list = db.get_msg_list(wxid, start, limit)
if not msgs:
msgs, wxid_list = db.get_plc_msg_list(wxid, start, limit)
wxid_list.append(my_wxid)
user_list = db.get_user_list(wxids=wxid_list)
return ReJson(0, {"msg_list": msgs, "user_list": user_list})
@rs_api.route('/api/rs/video/<path:videoPath>', methods=["GET", 'POST'])
def get_video(videoPath):
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = get_conf(g.caf, my_wxid, "wx_path")
videoPath = videoPath.replace("\\\\", "\\")
video_tmp_path = os.path.join(g.work_path, my_wxid, "video")
original_img_path = os.path.join(wx_path, videoPath)
if not os.path.exists(original_img_path):
return ReJson(5002)
# 复制文件到临时文件夹
video_save_path = os.path.join(video_tmp_path, videoPath)
if not os.path.exists(os.path.dirname(video_save_path)):
os.makedirs(os.path.dirname(video_save_path))
if os.path.exists(video_save_path):
return send_file(video_save_path)
shutil.copy(original_img_path, video_save_path)
return send_file(original_img_path)
@rs_api.route('/api/rs/audio/<path:savePath>', methods=["GET", 'POST'])
def get_audio(savePath):
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = get_conf(g.caf, my_wxid, "merge_path")
savePath = os.path.join(g.work_path, my_wxid, "audio", savePath) # 这个是从url中获取的
if os.path.exists(savePath):
return send_file(savePath)
MsgSvrID = savePath.split("_")[-1].replace(".wav", "")
if not savePath:
return ReJson(1002)
# 判断savePath路径的文件夹是否存在
if not os.path.exists(os.path.dirname(savePath)):
os.makedirs(os.path.dirname(savePath))
parsing_media_msg = MediaHandler(merge_path)
wave_data = parsing_media_msg.get_audio(MsgSvrID, is_play=False, is_wave=True, save_path=savePath, rate=24000)
if not wave_data:
return ReJson(1001, body="wave_data is required")
if os.path.exists(savePath):
return send_file(savePath)
else:
return ReJson(4004, body=savePath)
@rs_api.route('/api/rs/file_info', methods=["GET", 'POST'])
def get_file_info():
file_path = request.args.get("file_path")
file_path = request.json.get("file_path", file_path)
if not file_path:
return ReJson(1002)
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = get_conf(g.caf, my_wxid, "wx_path")
all_file_path = os.path.join(wx_path, file_path)
if not os.path.exists(all_file_path):
return ReJson(5002)
file_name = os.path.basename(all_file_path)
file_size = os.path.getsize(all_file_path)
return ReJson(0, {"file_name": file_name, "file_size": str(file_size)})
@rs_api.route('/api/rs/file/<path:filePath>', methods=["GET", 'POST'])
def get_file(filePath):
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = get_conf(g.caf, my_wxid, "wx_path")
all_file_path = os.path.join(wx_path, filePath)
if not os.path.exists(all_file_path):
return ReJson(5002)
return send_file(all_file_path)
# end 以上为聊天记录相关api *********************************************************************************************
# start 导出聊天记录 *****************************************************************************************************
@rs_api.route('/api/rs/export_endb', methods=["GET", 'POST'])
def get_export_endb():
"""
导出加密数据库
:return:
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
wx_path = get_conf(g.caf, my_wxid, "wx_path")
wx_path = request.json.get("wx_path", wx_path)
if not wx_path:
return ReJson(1002, body=f"wx_path is required: {wx_path}")
if not os.path.exists(wx_path):
return ReJson(1001, body=f"wx_path not exists: {wx_path}")
# 分割wx_path的文件名和父目录
code, wxdbpaths = get_core_db(wx_path)
if not code:
return ReJson(2001, body=wxdbpaths)
outpath = os.path.join(g.work_path, "export", my_wxid, "endb")
if not os.path.exists(outpath):
os.makedirs(outpath)
for wxdb in wxdbpaths:
# 复制wxdb->outpath, os.path.basename(wxdb)
assert isinstance(outpath, str) # 为了解决pycharm的警告, 无实际意义
shutil.copy(wxdb, os.path.join(outpath, os.path.basename(wxdb)))
return ReJson(0, body=outpath)
@rs_api.route('/api/rs/export_dedb', methods=["GET", "POST"])
def get_export_dedb():
"""
导出解密数据库
:return:
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
key = request.json.get("key", get_conf(g.caf, my_wxid, "key"))
wx_path = request.json.get("wx_path", get_conf(g.caf, my_wxid, "wx_path"))
if not key:
return ReJson(1002, body=f"key is required: {key}")
if not wx_path:
return ReJson(1002, body=f"wx_path is required: {wx_path}")
if not os.path.exists(wx_path):
return ReJson(1001, body=f"wx_path not exists: {wx_path}")
outpath = os.path.join(g.work_path, "export", my_wxid, "dedb")
if not os.path.exists(outpath):
os.makedirs(outpath)
code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=outpath)
time.sleep(1)
if code:
return ReJson(0, body=merge_save_path)
else:
return ReJson(2001, body=merge_save_path)
@rs_api.route('/api/rs/export_csv', methods=["GET", 'POST'])
def get_export_csv():
"""
导出csv
:return:
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
db_config = get_conf(g.caf, my_wxid, "db_config")
wxid = request.json.get("wxid")
# st_ed_time = request.json.get("datetime", [0, 0])
if not wxid:
return ReJson(1002, body=f"username is required: {wxid}")
# if not isinstance(st_ed_time, list) or len(st_ed_time) != 2:
# return ReJson(1002, body=f"datetime is required: {st_ed_time}")
# start, end = st_ed_time
# if not isinstance(start, int) or not isinstance(end, int) or start >= end:
# return ReJson(1002, body=f"datetime is required: {st_ed_time}")
outpath = os.path.join(g.work_path, "export", my_wxid, "csv", wxid)
if not os.path.exists(outpath):
os.makedirs(outpath)
code, ret = export_csv(wxid, outpath, db_config)
if code:
return ReJson(0, ret)
else:
return ReJson(2001, body=ret)
@rs_api.route('/api/rs/export_json', methods=["GET", 'POST'])
def get_export_json():
"""
导出json
:return:
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
db_config = get_conf(g.caf, my_wxid, "db_config")
wxid = request.json.get("wxid")
if not wxid:
return ReJson(1002, body=f"username is required: {wxid}")
outpath = os.path.join(g.work_path, "export", my_wxid, "json", wxid)
if not os.path.exists(outpath):
os.makedirs(outpath)
code, ret = export_json(wxid, outpath, db_config)
if code:
return ReJson(0, ret)
else:
return ReJson(2001, body=ret)
# end 导出聊天记录 *******************************************************************************************************
# start 聊天记录分析api **************************************************************************************************
@rs_api.route('/api/rs/date_count', methods=["GET", 'POST'])
@error9999
def get_date_count():
"""
获取日期统计
"""
my_wxid = get_conf(g.caf, g.at, "last")
if not my_wxid: return ReJson(1001, body="my_wxid is required")
merge_path = get_conf(g.caf, my_wxid, "merge_path")
date_count = DBHandler(merge_path).date_count()
return ReJson(0, date_count)
@rs_api.route('/api/rs/wordcloud', methods=["GET", 'POST'])
@error9999
def wordcloud():
pass
# 关于、帮助、设置 *******************************************************************************************************
@rs_api.route('/api/rs/check_update', methods=["GET", 'POST'])
@error9999
def check_update():
"""
检查更新
:return:
"""
url = "https://api.github.com/repos/xaoyaoo/PyWxDump/tags"
try:
import requests
res = requests.get(url)
if res.status_code == 200:
data = res.json()
NEW_VERSION = data[0].get("name")
if NEW_VERSION[1:] != pywxdump.__version__:
msg = "有新版本"
else:
msg = "已经是最新版本"
return ReJson(0, body={"msg": msg, "latest_version": NEW_VERSION,
"latest_url": "https://github.com/xaoyaoo/PyWxDump/releases/tag/" + NEW_VERSION})
else:
return ReJson(2001, body="status_code is not 200")
except Exception as e:
return ReJson(9999, msg=str(e))
@rs_api.route('/api/rs/version', methods=["GET", 'POST'])
@error9999
def version():
"""
版本
:return:
"""
return ReJson(0, pywxdump.__version__)
# END 关于、帮助、设置 ***************************************************************************************************
@rs_api.route('/')
@error9999
def index():
return render_template('index.html')

View File

@ -1,5 +1,7 @@
import logging import logging
loger_rjson = logging.getLogger("rjson")
def ReJson(code: int, body: [dict, list] = None, msg: str = None, error: str = None, extra: dict = None) -> dict: def ReJson(code: int, body: [dict, list] = None, msg: str = None, error: str = None, extra: dict = None) -> dict:
""" """
@ -31,13 +33,13 @@ def ReJson(code: int, body: [dict, list] = None, msg: str = None, error: str = N
} }
rjson = situation.get(code, {'code': 9999, 'body': None, 'msg': "code错误", "extra": {}}) rjson = situation.get(code, {'code': 9999, 'body': None, 'msg': "code错误", "extra": {}})
if code != 0: if code != 0:
logging.warning(f"\n{code} \n{rjson['body']}\n{msg if msg else None}") loger_rjson.warning(f"\n{code=}\nbody=\n{rjson['body']}\nmsg={msg if msg else None}\n")
if body: if body:
rjson['body'] = body rjson['body'] = body
if msg: if msg:
rjson['msg'] = msg rjson['msg'] = msg
if error: if error:
logging.error(error) loger_rjson.error(error, exc_info=True)
return rjson return rjson

View File

@ -7,72 +7,74 @@
# ------------------------------------------------------------------------------- # -------------------------------------------------------------------------------
import base64 import base64
import json import json
import logging
import os import os
import re import re
import traceback import traceback
from .rjson import ReJson from .rjson import ReJson
from functools import wraps from functools import wraps
import logging
rs_loger = logging.getLogger("rs_api")
ls_loger = logging.getLogger("ls_api")
def read_session_local_wxid(session_file): def get_conf_local_wxid(conf_file):
try: try:
with open(session_file, 'r') as f: with open(conf_file, 'r') as f:
session = json.load(f) conf = json.load(f)
except FileNotFoundError: except FileNotFoundError:
logging.error(f"Session file not found: {session_file}") logging.error(f"Session file not found: {conf_file}")
return None return None
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logging.error(f"Error decoding JSON file: {e}") logging.error(f"Error decoding JSON file: {e}")
return None return None
rdata = [k for k in session.keys() if k != "test"] return list(conf.keys())
return rdata
def read_session(session_file, wxid, arg): def get_conf(conf_file, wxid, arg):
try: try:
with open(session_file, 'r') as f: with open(conf_file, 'r') as f:
session = json.load(f) conf = json.load(f)
except FileNotFoundError: except FileNotFoundError:
logging.error(f"Session file not found: {session_file}") logging.error(f"Session file not found: {conf_file}")
return None return None
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logging.error(f"Error decoding JSON file: {e}") logging.error(f"Error decoding JSON file: {e}")
return None return None
return session.get(wxid, {}).get(arg, None) return conf.get(wxid, {}).get(arg, None)
def get_session_wxids(session_file): def get_conf_wxids(conf_file):
try: try:
with open(session_file, 'r') as f: with open(conf_file, 'r') as f:
session = json.load(f) conf = json.load(f)
except FileNotFoundError: except FileNotFoundError:
logging.error(f"Session file not found: {session_file}") logging.error(f"Session file not found: {conf_file}")
return None return None
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logging.error(f"Error decoding JSON file: {e}") logging.error(f"Error decoding JSON file: {e}")
return None return None
return list(session.keys()) return list(conf.keys())
def save_session(session_file, wxid, arg, value): def set_conf(conf_file, wxid, arg, value):
try: try:
with open(session_file, 'r') as f: with open(conf_file, 'r') as f:
session = json.load(f) conf = json.load(f)
except FileNotFoundError: except FileNotFoundError:
session = {} conf = {}
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logging.error(f"Error decoding JSON file: {e}") logging.error(f"Error decoding JSON file: {e}")
return False return False
if wxid not in session: if wxid not in conf:
session[wxid] = {} conf[wxid] = {}
if not isinstance(session[wxid], dict): if not isinstance(conf[wxid], dict):
session[wxid] = {} conf[wxid] = {}
session[wxid][arg] = value conf[wxid][arg] = value
try: try:
with open(session_file, 'w') as f: with open(conf_file, 'w') as f:
json.dump(session, f, indent=4, ensure_ascii=False) json.dump(conf, f, indent=4, ensure_ascii=False)
except Exception as e: except Exception as e:
logging.error(f"Error writing to file: {e}") logging.error(f"Error writing to file: {e}")
return False return False

View File

@ -106,7 +106,7 @@ class MainBiasAddr(BaseSubMainClass):
parser.add_argument("--account", type=str, help="微信账号", metavar="", required=True) parser.add_argument("--account", type=str, help="微信账号", metavar="", required=True)
parser.add_argument("--key", type=str, metavar="", help="(可选)密钥") parser.add_argument("--key", type=str, metavar="", help="(可选)密钥")
parser.add_argument("--db_path", type=str, metavar="", help="(可选)已登录账号的微信文件夹路径") parser.add_argument("--db_path", type=str, metavar="", help="(可选)已登录账号的微信文件夹路径")
parser.add_argument("-vlp", '--version_list_path', type=str, metavar="", parser.add_argument("-vlp", '--WX_OFFS_PATH', type=str, metavar="",
help="(可选)微信版本偏移文件路径,如有,则自动更新", help="(可选)微信版本偏移文件路径,如有,则自动更新",
default=None) default=None)
@ -120,7 +120,7 @@ class MainBiasAddr(BaseSubMainClass):
account = args.account account = args.account
key = args.key key = args.key
db_path = args.db_path db_path = args.db_path
vlp = args.version_list_path vlp = args.WX_OFFS_PATH
# 调用 run 函数,并传入参数 # 调用 run 函数,并传入参数
rdata = BiasAddr(account, mobile, name, key, db_path).run(True, vlp) rdata = BiasAddr(account, mobile, name, key, db_path).run(True, vlp)
return rdata return rdata
@ -132,18 +132,18 @@ class MainWxInfo(BaseSubMainClass):
def init_parses(self, parser): def init_parses(self, parser):
# 添加 'wx_info' 子命令解析器 # 添加 'wx_info' 子命令解析器
parser.add_argument("-vlp", '--version_list_path', metavar="", type=str, parser.add_argument("-vlp", '--WX_OFFS_PATH', metavar="", type=str,
help="(可选)微信版本偏移文件路径", default=VERSION_LIST_PATH) help="(可选)微信版本偏移文件路径", default=WX_OFFS_PATH)
parser.add_argument("-s", '--save_path', metavar="", type=str, help="(可选)保存路径【json文件】") parser.add_argument("-s", '--save_path', metavar="", type=str, help="(可选)保存路径【json文件】")
return parser return parser
def run(self, args): def run(self, args):
print(f"[*] PyWxDump v{pywxdump.__version__}") print(f"[*] PyWxDump v{pywxdump.__version__}")
# 读取微信各版本偏移 # 读取微信各版本偏移
path = args.version_list_path path = args.WX_OFFS_PATH
save_path = args.save_path save_path = args.save_path
version_list = json.load(open(path, "r", encoding="utf-8")) WX_OFFS = json.load(open(path, "r", encoding="utf-8"))
result = read_info(version_list, True, save_path) # 读取微信信息 result = get_wx_info(WX_OFFS, True, save_path) # 读取微信信息
return result return result
@ -153,7 +153,7 @@ class MainWxDbPath(BaseSubMainClass):
def init_parses(self, parser): def init_parses(self, parser):
# 添加 'wx_db_path' 子命令解析器 # 添加 'wx_db_path' 子命令解析器
parser.add_argument("-r", "--require_list", type=str, parser.add_argument("-r", "--db_types", type=str,
help="(可选)需要的数据库名称(eg: -r MediaMSG;MicroMsg;FTSMSG;MSG;Sns;Emotion )", help="(可选)需要的数据库名称(eg: -r MediaMSG;MicroMsg;FTSMSG;MSG;Sns;Emotion )",
default="all", metavar="") default="all", metavar="")
parser.add_argument("-wf", "--wx_files", type=str, help="(可选)'WeChat Files'路径", default=None, parser.add_argument("-wf", "--wx_files", type=str, help="(可选)'WeChat Files'路径", default=None,
@ -164,12 +164,13 @@ class MainWxDbPath(BaseSubMainClass):
def run(self, args): def run(self, args):
# 从命令行参数获取值 # 从命令行参数获取值
require_list = args.require_list db_types = args.require_list
msg_dir = args.wx_files msg_dir = args.wx_files
wxid = args.wxid wxid = args.wxid
user_dirs = get_wechat_db(require_list, msg_dir, wxid, True) # 获取微信数据库路径 ret = get_wx_db(msg_dir=msg_dir, db_types=db_types, wxids=wxid)
return user_dirs for i in ret: print(i)
return ret
class MainDecrypt(BaseSubMainClass): class MainDecrypt(BaseSubMainClass):

48
pywxdump/db/__init__.py Normal file
View File

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: __init__.py.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
import pandas as pd
from .utils import download_file
from .dbFavorite import FavoriteHandler
from .dbMSG import MsgHandler
from .dbMicro import MicroHandler
from .dbMedia import MediaHandler
from .dbOpenIMContact import OpenIMContactHandler
from .dbPublicMsg import PublicMsgHandler
from .dbOpenIMMedia import OpenIMMediaHandler
from .export.exportCSV import export_csv
from .export.exportJSON import export_json
class DBHandler(MicroHandler, MediaHandler, OpenIMContactHandler, PublicMsgHandler, OpenIMMediaHandler,
FavoriteHandler):
_class_name = "DBHandler"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.MSG_exist = self.Msg_tables_exist()
self.Micro_exist = self.Micro_tables_exist()
self.Media_exist = self.Media_tables_exist()
self.OpenIMContact_exist = self.OpenIMContact_tables_exist()
self.PublicMsg_exist = self.PublicMSG_tables_exist()
self.OpenIMMedia_exist = self.OpenIMMedia_tables_exist()
self.Favorite_exist = self.Favorite_tables_exist()
# print(self.MSG_exist, self.Micro_exist, self.Media_exist, self.OpenIMContact_exist, self.PublicMsg_exist,
# self.OpenIMMedia_exist, self.Favorite_exist)
def get_user(self, word=None, wxids=None, labels=None):
"""
获取联系人列表
"""
users = self.get_user_list(word=word, wxids=wxids, label_ids=labels)
if self.OpenIMContact_exist:
users.update(self.get_im_user_list(word=word, wxids=wxids))
return users

View File

@ -18,28 +18,15 @@ from .utils import timestamp2str, xml2dict
# * FavTags为收藏内容添加的标签 # * FavTags为收藏内容添加的标签
def FavoriteTypeId2Name(Type): class FavoriteHandler(DatabaseBase):
TypeNameDict = {
1: "文本", # 文本 已测试
2: "图片", # 图片 已测试
3: "语音", # 语音
4: "视频", # 视频 已测试
5: "链接", # 链接 已测试
6: "位置", # 位置
7: "小程序", # 小程序
8: "文件", # 文件 已测试
14: "聊天记录", # 聊天记录 已测试
16: "群聊视频", # 群聊中的视频 可能
18: "笔记" # 笔记 已测试
}
return TypeNameDict.get(Type, "未知")
class ParsingFavorite(DatabaseBase):
_class_name = "Favorite" _class_name = "Favorite"
Favorite_required_tables = ["FavItems", "FavDataItem", "FavTagDatas", "FavBindTagDatas"]
def __init__(self, db_path): def Favorite_tables_exist(self):
super().__init__(db_path) """
判断该类所需要的表是否存在
"""
return self.check_tables_exist(self.Favorite_required_tables)
def get_tags(self, LocalID): def get_tags(self, LocalID):
""" """
@ -49,7 +36,7 @@ class ParsingFavorite(DatabaseBase):
sql = "select LocalID, TagName from FavTagDatas order by ServerSeq" sql = "select LocalID, TagName from FavTagDatas order by ServerSeq"
else: else:
sql = "select LocalID, TagName from FavTagDatas where LocalID = '%s' order by ServerSeq " % LocalID sql = "select LocalID, TagName from FavTagDatas where LocalID = '%s' order by ServerSeq " % LocalID
tags = self.execute_sql(sql) # [(1, 797940830, '程序语言类'), (2, 806153863, '账单')] tags = self.execute(sql) # [(1, 797940830, '程序语言类'), (2, 806153863, '账单')]
# 转换为字典 # 转换为字典
tags = {tag[0]: tag[1] for tag in tags} tags = {tag[0]: tag[1] for tag in tags}
return tags return tags
@ -59,7 +46,7 @@ class ParsingFavorite(DatabaseBase):
return: [(FavLocalID, TagName)] return: [(FavLocalID, TagName)]
""" """
sql = "select A.FavLocalID, B.TagName from FavBindTagDatas A, FavTagDatas B where A.TagLocalID = B.LocalID" sql = "select A.FavLocalID, B.TagName from FavBindTagDatas A, FavTagDatas B where A.TagLocalID = B.LocalID"
FavBindTags = self.execute_sql(sql) FavBindTags = self.execute(sql)
return FavBindTags return FavBindTags
def get_favorite(self): def get_favorite(self):
@ -124,8 +111,8 @@ class ParsingFavorite(DatabaseBase):
sql1 = "select " + ",".join(FavItemsFields.keys()) + " from FavItems order by UpdateTime desc" sql1 = "select " + ",".join(FavItemsFields.keys()) + " from FavItems order by UpdateTime desc"
sql2 = "select " + ",".join(FavDataItemFields.keys()) + " from FavDataItem B order by B.RecId asc" sql2 = "select " + ",".join(FavDataItemFields.keys()) + " from FavDataItem B order by B.RecId asc"
FavItemsList = self.execute_sql(sql1) FavItemsList = self.execute(sql1)
FavDataItemList = self.execute_sql(sql2) FavDataItemList = self.execute(sql2)
if FavItemsList is None or len(FavItemsList) == 0: if FavItemsList is None or len(FavItemsList) == 0:
return False return False
@ -152,3 +139,20 @@ class ParsingFavorite(DatabaseBase):
pf = pf.fillna("") # 去掉Nan pf = pf.fillna("") # 去掉Nan
rdata = pf.to_dict(orient="records") rdata = pf.to_dict(orient="records")
return rdata return rdata
def FavoriteTypeId2Name(Type):
TypeNameDict = {
1: "文本", # 文本 已测试
2: "图片", # 图片 已测试
3: "语音", # 语音
4: "视频", # 视频 已测试
5: "链接", # 链接 已测试
6: "位置", # 位置
7: "小程序", # 小程序
8: "文件", # 文件 已测试
14: "聊天记录", # 聊天记录 已测试
16: "群聊视频", # 群聊中的视频 可能
18: "笔记" # 笔记 已测试
}
return TypeNameDict.get(Type, "未知")

View File

@ -8,17 +8,288 @@
import json import json
import os import os
import re import re
# import time
import pandas as pd # import pandas as pd
from .dbbase import DatabaseBase from .dbbase import DatabaseBase
from .utils import get_md5, name2typeid, typeid2name, type_converter, timestamp2str, xml2dict, match_BytesExtra from .utils import db_error, timestamp2str, xml2dict, match_BytesExtra, type_converter, \
get_md5, name2typeid, db_loger
import lz4.block import lz4.block
import blackboxprotobuf import blackboxprotobuf
class ParsingMSG(DatabaseBase): class MsgHandler(DatabaseBase):
_class_name = "MSG" _class_name = "MSG"
MSG_required_tables = ["MSG"]
def Msg_tables_exist(self):
"""
判断该类所需要的表是否存在
"""
return self.check_tables_exist(self.MSG_required_tables)
@db_error
def get_msg_count(self, wxids: list = ""):
"""
获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量不传wxid则获取所有联系人的聊天记录数量
:param wxids: wxid list
:return: 聊天记录数量列表 {wxid: chat_count}
"""
if isinstance(wxids, str):
wxids = [wxids]
if wxids:
wxids = "('" + "','".join(wxids) + "')"
sql = f"SELECT StrTalker, COUNT(*) FROM MSG WHERE StrTalker IN {wxids} GROUP BY StrTalker ORDER BY COUNT(*) DESC;"
else:
sql = f"SELECT StrTalker, COUNT(*) FROM MSG GROUP BY StrTalker ORDER BY COUNT(*) DESC;"
sql_total = f"SELECT COUNT(*) FROM MSG;"
result = self.execute(sql)
total_ret = self.execute(sql_total)
if not result:
return {}
total = 0
if total_ret and len(total_ret) > 0:
total = total_ret[0][0]
msg_count = {"total": total}
msg_count.update({row[0]: row[1] for row in result})
return msg_count
# 单条消息处理
@db_error
def get_msg_detail(self, row):
"""
获取单条消息详情,格式化输出
"""
(localId, TalkerId, MsgSvrID, Type, SubType, CreateTime, IsSender, Sequence, StatusEx, FlagEx, Status,
MsgSequence, StrContent, MsgServerSeq, StrTalker, DisplayContent, Reserved0, Reserved1, Reserved3,
Reserved4, Reserved5, Reserved6, CompressContent, BytesExtra, BytesTrans, Reserved2, _id) = row
CreateTime = timestamp2str(CreateTime)
type_id = (Type, SubType)
type_name = type_converter(type_id)
msg = StrContent
src = ""
if type_id == (1, 0): # 文本
msg = StrContent
elif type_id == (3, 0): # 图片
DictExtra = get_BytesExtra(BytesExtra)
DictExtra_str = str(DictExtra)
img_paths = [i for i in re.findall(r"(FileStorage.*?)'", DictExtra_str)]
img_paths = sorted(img_paths, key=lambda p: "Image" in p, reverse=True)
if img_paths:
img_path = img_paths[0].replace("'", "")
img_path = [i for i in img_path.split("\\") if i]
img_path = os.path.join(*img_path)
src = img_path
else:
src = ""
msg = "图片"
elif type_id == (34, 0): # 语音
tmp_c = xml2dict(StrContent)
voicelength = tmp_c.get("voicemsg", {}).get("voicelength", "")
transtext = tmp_c.get("voicetrans", {}).get("transtext", "")
if voicelength.isdigit():
voicelength = int(voicelength) / 1000
voicelength = f"{voicelength:.2f}"
msg = f"语音时长:{voicelength}\n翻译结果:{transtext}" if transtext else f"语音时长:{voicelength}"
src = os.path.join("audio", f"{StrTalker}",
f"{CreateTime.replace(':', '-').replace(' ', '_')}_{IsSender}_{MsgSvrID}.wav")
elif type_id == (43, 0): # 视频
DictExtra = get_BytesExtra(BytesExtra)
DictExtra = str(DictExtra)
DictExtra_str = str(DictExtra)
video_paths = [i for i in re.findall(r"(FileStorage.*?)'", DictExtra_str)]
video_paths = sorted(video_paths, key=lambda p: "mp4" in p, reverse=True)
if video_paths:
video_path = video_paths[0].replace("'", "")
video_path = [i for i in video_path.split("\\") if i]
video_path = os.path.join(*video_path)
src = video_path
else:
src = ""
msg = "视频"
elif type_id == (47, 0): # 动画表情
content_tmp = xml2dict(StrContent)
cdnurl = content_tmp.get("emoji", {}).get("cdnurl", "")
if not cdnurl:
DictExtra = get_BytesExtra(BytesExtra)
cdnurl = match_BytesExtra(DictExtra)
if cdnurl:
content = {"src": cdnurl, "msg": "表情"}
elif type_id == (48, 0): # 地图信息
content_tmp = xml2dict(StrContent)
location = content_tmp.get("location", {})
msg = (f"纬度:【{location.pop('x')}】 经度:【{location.pop('y')}\n"
f"位置:{location.pop('label')} {location.pop('poiname')}\n"
f"其他信息:{json.dumps(location, ensure_ascii=False, indent=4)}"
)
src = ""
elif type_id == (49, 0): # 文件
DictExtra = get_BytesExtra(BytesExtra)
url = match_BytesExtra(DictExtra)
src = url
file_name = os.path.basename(url)
msg = file_name
elif type_id == (49, 5): # (分享)卡片式链接
CompressContent = decompress_CompressContent(CompressContent)
CompressContent_tmp = xml2dict(CompressContent)
appmsg = CompressContent_tmp.get("appmsg", {})
title = appmsg.get("title", "")
des = appmsg.get("des", "")
url = appmsg.get("url", "")
msg = f"{title}\n{des}\n\n{url}"
src = url
elif type_id == (49, 19): # 合并转发的聊天记录
CompressContent = decompress_CompressContent(CompressContent)
content_tmp = xml2dict(CompressContent)
title = content_tmp.get("appmsg", {}).get("title", "")
des = content_tmp.get("appmsg", {}).get("des", "")
recorditem = content_tmp.get("appmsg", {}).get("recorditem", "")
recorditem = xml2dict(recorditem)
msg = f"{title}\n{des}"
src = recorditem
elif type_id == (49, 57): # 带有引用的文本消息
CompressContent = decompress_CompressContent(CompressContent)
content_tmp = xml2dict(CompressContent)
appmsg = content_tmp.get("appmsg", {})
title = appmsg.get("title", "")
refermsg = appmsg.get("refermsg", {})
displayname = refermsg.get("displayname", "")
display_content = refermsg.get("content", "")
display_createtime = refermsg.get("createtime", "")
display_createtime = timestamp2str(
int(display_createtime)) if display_createtime.isdigit() else display_createtime
if display_content.startswith("<?xml"):
display_content = xml2dict(display_content)
appmsg1 = display_content.get("appmsg", {})
title1 = appmsg1.get("title", "")
if title1: display_content = title1
msg = f"{title}\n\n[引用]({display_createtime}){displayname}:{display_content}"
src = ""
elif type_id == (49, 2000): # 转账消息
CompressContent = decompress_CompressContent(CompressContent)
content_tmp = xml2dict(CompressContent)
wcpayinfo = content_tmp.get("appmsg", {}).get("wcpayinfo", {})
paysubtype = wcpayinfo.get("paysubtype", "") # 转账类型
feedesc = wcpayinfo.get("feedesc", "") # 转账金额
pay_memo = wcpayinfo.get("pay_memo", "") # 转账备注
begintransfertime = wcpayinfo.get("begintransfertime", "") # 转账开始时间
msg = (f"{'已收款' if paysubtype == '3' else '转账'}{feedesc}\n"
f"转账说明:{pay_memo if pay_memo else ''}\n"
f"转账时间:{timestamp2str(begintransfertime)}\n"
)
src = ""
elif type_id[0] == 49 and type_id[1] != 0:
DictExtra = get_BytesExtra(BytesExtra)
url = match_BytesExtra(DictExtra)
src = url
msg = type_name
elif type_id == (50, 0): # 语音通话
msg = "语音/视频通话[%s]" % DisplayContent
# elif type_id == (10000, 0):
# msg = StrContent
# elif type_id == (10000, 4):
# msg = StrContent
# elif type_id == (10000, 8000):
# msg = StrContent
talker = "未知"
if IsSender == 1:
talker = ""
else:
if StrTalker.endswith("@chatroom"):
bytes_extra = get_BytesExtra(BytesExtra)
if bytes_extra:
try:
talker = bytes_extra['3'][0]['2']
if "publisher-id" in talker:
talker = "系统"
except:
pass
else:
talker = StrTalker
row_data = {"id": _id, "MsgSvrID": str(MsgSvrID), "type_name": type_name, "is_sender": IsSender,
"talker": talker, "room_name": StrTalker, "msg": msg, "src": src, "extra": {},
"CreateTime": CreateTime, }
return row_data
@db_error
def get_msg_list(self, wxid="", start_index=0, page_size=500, msg_type: str = "", msg_sub_type: str = "",
start_createtime=None, end_createtime=None):
sql_base = ("SELECT localId,TalkerId,MsgSvrID,Type,SubType,CreateTime,IsSender,Sequence,StatusEx,FlagEx,Status,"
"MsgSequence,StrContent,MsgServerSeq,StrTalker,DisplayContent,Reserved0,Reserved1,Reserved3,"
"Reserved4,Reserved5,Reserved6,CompressContent,BytesExtra,BytesTrans,Reserved2,"
"ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
"FROM MSG ")
param = ()
sql_wxid, param = ("AND StrTalker=? ", param + (wxid,)) if wxid else ("", param)
sql_type, param = ("AND Type=? ", param + (msg_type,)) if msg_type else ("", param)
sql_sub_type, param = ("AND SubType=? ", param + (msg_sub_type,)) if msg_type and msg_sub_type else ("", param)
sql_start_createtime, param = ("AND CreateTime>=? ", param + (start_createtime,)) if start_createtime else (
"", param)
sql_end_createtime, param = ("AND CreateTime<=? ", param + (end_createtime,)) if end_createtime else ("", param)
sql = (
f"{sql_base} WHERE 1=1 "
f"{sql_wxid}"
f"{sql_type}"
f"{sql_sub_type}"
f"{sql_start_createtime}"
f"{sql_end_createtime}"
f"ORDER BY CreateTime ASC LIMIT ?,?"
)
param = param + (start_index, page_size)
result = self.execute(sql, param)
if not result:
return [], []
result_data = (self.get_msg_detail(row) for row in result)
rdata = list(result_data) # 转为列表
wxid_list = {d['talker'] for d in rdata} # 创建一个无重复的 wxid 列表
return rdata, list(wxid_list)
@db_error
def decompress_CompressContent(data):
"""
解压缩MsgCompressContent内容
:param data: CompressContent内容 bytes
:return:
"""
if data is None or not isinstance(data, bytes):
return None
try:
dst = lz4.block.decompress(data, uncompressed_size=len(data) << 8)
dst = dst.replace(b'\x00', b'') # 已经解码完成后还含有0x00的部分要删掉要不后面ET识别的时候会报错
uncompressed_data = dst.decode('utf-8', errors='ignore')
return uncompressed_data
except Exception as e:
return data.decode('utf-8', errors='ignore')
@db_error
def get_BytesExtra(BytesExtra):
BytesExtra_message_type = { BytesExtra_message_type = {
"1": { "1": {
"type": "message", "type": "message",
@ -242,295 +513,10 @@ class ParsingMSG(DatabaseBase):
} }
} }
} }
def __init__(self, db_path):
super().__init__(db_path)
def decompress_CompressContent(self, data):
"""
解压缩MsgCompressContent内容
:param data: CompressContent内容 bytes
:return:
"""
if data is None or not isinstance(data, bytes):
return None
try:
dst = lz4.block.decompress(data, uncompressed_size=len(data) << 8)
dst = dst.replace(b'\x00', b'') # 已经解码完成后还含有0x00的部分要删掉要不后面ET识别的时候会报错
uncompressed_data = dst.decode('utf-8', errors='ignore')
return uncompressed_data
except Exception as e:
return data.decode('utf-8', errors='ignore')
def get_BytesExtra(self, BytesExtra):
if BytesExtra is None or not isinstance(BytesExtra, bytes): if BytesExtra is None or not isinstance(BytesExtra, bytes):
return None return None
try: try:
deserialize_data, message_type = blackboxprotobuf.decode_message(BytesExtra, self.BytesExtra_message_type) deserialize_data, message_type = blackboxprotobuf.decode_message(BytesExtra, BytesExtra_message_type)
return deserialize_data return deserialize_data
except Exception as e: except Exception as e:
return None return None
def msg_count(self, wxid: str = ""):
"""
获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量不传wxid则获取所有联系人的聊天记录数量
:param MSG_db_path: MSG.db 文件路径
:return: 聊天记录数量列表 {wxid: chat_count}
"""
if wxid:
sql = f"SELECT StrTalker, COUNT(*) FROM MSG WHERE StrTalker='{wxid}';"
else:
sql = f"SELECT StrTalker, COUNT(*) FROM MSG GROUP BY StrTalker ORDER BY COUNT(*) DESC;"
result = self.execute_sql(sql)
if not result:
return {}
df = pd.DataFrame(result, columns=["wxid", "msg_count"])
# # 排序
df = df.sort_values(by="msg_count", ascending=False)
# chat_counts {wxid: chat_count}
chat_counts = df.set_index("wxid").to_dict()["msg_count"]
return chat_counts
def msg_count_total(self):
"""
获取聊天记录总数
:return: 聊天记录总数
"""
sql = "SELECT COUNT(*) FROM MSG;"
result = self.execute_sql(sql)
if result and len(result) > 0:
chat_counts = result[0][0]
return chat_counts
return 0
# def room_user_list(self, selected_talker):
# """
# 获取群聊中包含的所有用户列表
# :param MSG_db_path: MSG.db 文件路径
# :param selected_talker: 选中的聊天对象 wxid
# :return: 聊天用户列表
# """
# sql = (
# "SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType,CreateTime,MsgSvrID,DisplayContent,CompressContent,BytesExtra,ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
# "FROM MSG WHERE StrTalker=? "
# "ORDER BY CreateTime ASC")
#
# result1 = self.execute_sql(sql, (selected_talker,))
# user_list = []
# read_user_wx_id = []
# for row in result1:
# localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType, CreateTime, MsgSvrID, DisplayContent, CompressContent, BytesExtra, id = row
# bytes_extra = self.get_BytesExtra(BytesExtra)
# if bytes_extra:
# try:
# talker = bytes_extra['3'][0]['2'].decode('utf-8', errors='ignore')
# except:
# continue
# if talker in read_user_wx_id:
# continue
# user = get_contact(MSG_db_path, talker)
# if not user:
# continue
# user_list.append(user)
# read_user_wx_id.append(talker)
# return user_list
# 单条消息处理
def msg_detail(self, row):
"""
获取单条消息详情,格式化输出
"""
(localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType, CreateTime, MsgSvrID,
DisplayContent, CompressContent, BytesExtra, id) = row
CreateTime = timestamp2str(CreateTime)
type_id = (Type, SubType)
type_name = typeid2name(type_id)
content = {"src": "", "msg": StrContent}
if type_id == (1, 0): # 文本
content["msg"] = StrContent
elif type_id == (3, 0): # 图片
DictExtra = self.get_BytesExtra(BytesExtra)
DictExtra_str = str(DictExtra)
img_paths = [i for i in re.findall(r"(FileStorage.*?)'", DictExtra_str)]
img_paths = sorted(img_paths, key=lambda p: "Image" in p, reverse=True)
if img_paths:
img_path = img_paths[0].replace("'", "")
img_path = [i for i in img_path.split("\\") if i]
img_path = os.path.join(*img_path)
content["src"] = img_path
else:
content["src"] = ""
content["msg"] = "图片"
elif type_id == (34, 0): # 语音
tmp_c = xml2dict(StrContent)
voicelength = tmp_c.get("voicemsg", {}).get("voicelength", "")
transtext = tmp_c.get("voicetrans", {}).get("transtext", "")
if voicelength.isdigit():
voicelength = int(voicelength) / 1000
voicelength = f"{voicelength:.2f}"
content[
"msg"] = f"语音时长:{voicelength}\n翻译结果:{transtext}" if transtext else f"语音时长:{voicelength}"
content["src"] = os.path.join("audio", f"{StrTalker}",
f"{CreateTime.replace(':', '-').replace(' ', '_')}_{IsSender}_{MsgSvrID}.wav")
elif type_id == (43, 0): # 视频
DictExtra = self.get_BytesExtra(BytesExtra)
DictExtra = str(DictExtra)
DictExtra_str = str(DictExtra)
video_paths = [i for i in re.findall(r"(FileStorage.*?)'", DictExtra_str)]
video_paths = sorted(video_paths, key=lambda p: "mp4" in p, reverse=True)
if video_paths:
video_path = video_paths[0].replace("'", "")
video_path = [i for i in video_path.split("\\") if i]
video_path = os.path.join(*video_path)
content["src"] = video_path
else:
content["src"] = ""
content["msg"] = "视频"
elif type_id == (47, 0): # 动画表情
content_tmp = xml2dict(StrContent)
cdnurl = content_tmp.get("emoji", {}).get("cdnurl", "")
if not cdnurl:
DictExtra = self.get_BytesExtra(BytesExtra)
cdnurl = match_BytesExtra(DictExtra)
if cdnurl:
content = {"src": cdnurl, "msg": "表情"}
elif type_id == (48, 0): # 地图信息
content_tmp = xml2dict(StrContent)
location = content_tmp.get("location", {})
content["msg"] = (f"纬度:【{location.pop('x')}】 经度:【{location.pop('y')}\n"
f"位置:{location.pop('label')} {location.pop('poiname')}\n"
f"其他信息:{json.dumps(location, ensure_ascii=False, indent=4)}"
)
content["src"] = ""
elif type_id == (49, 0): # 文件
DictExtra = self.get_BytesExtra(BytesExtra)
url = match_BytesExtra(DictExtra)
content["src"] = url
file_name = os.path.basename(url)
content["msg"] = file_name
elif type_id == (49, 5): # (分享)卡片式链接
CompressContent = self.decompress_CompressContent(CompressContent)
CompressContent_tmp = xml2dict(CompressContent)
appmsg = CompressContent_tmp.get("appmsg", {})
title = appmsg.get("title", "")
des = appmsg.get("des", "")
url = appmsg.get("url", "")
content["msg"] = f"{title}\n{des}\n\n{url}"
content["src"] = url
elif type_id == (49, 19): # 合并转发的聊天记录
CompressContent = self.decompress_CompressContent(CompressContent)
content_tmp = xml2dict(CompressContent)
title = content_tmp.get("appmsg", {}).get("title", "")
des = content_tmp.get("appmsg", {}).get("des", "")
recorditem = content_tmp.get("appmsg", {}).get("recorditem", "")
recorditem = xml2dict(recorditem)
content["msg"] = f"{title}\n{des}"
content["src"] = recorditem
elif type_id == (49, 57): # 带有引用的文本消息
CompressContent = self.decompress_CompressContent(CompressContent)
content_tmp = xml2dict(CompressContent)
appmsg = content_tmp.get("appmsg", {})
title = appmsg.get("title", "")
refermsg = appmsg.get("refermsg", {})
displayname = refermsg.get("displayname", "")
display_content = refermsg.get("content", "")
display_createtime = refermsg.get("createtime", "")
display_createtime = timestamp2str(
int(display_createtime)) if display_createtime.isdigit() else display_createtime
if display_content.startswith("<?xml"):
display_content = xml2dict(display_content)
appmsg1 = display_content.get("appmsg", {})
title1 = appmsg1.get("title", "")
if title1: display_content = title1
content["msg"] = f"{title}\n\n[引用]({display_createtime}){displayname}:{display_content}"
content["src"] = ""
elif type_id == (49, 2000): # 转账消息
CompressContent = self.decompress_CompressContent(CompressContent)
content_tmp = xml2dict(CompressContent)
wcpayinfo = content_tmp.get("appmsg", {}).get("wcpayinfo", {})
paysubtype = wcpayinfo.get("paysubtype", "") # 转账类型
feedesc = wcpayinfo.get("feedesc", "") # 转账金额
pay_memo = wcpayinfo.get("pay_memo", "") # 转账备注
begintransfertime = wcpayinfo.get("begintransfertime", "") # 转账开始时间
content["msg"] = (f"{'已收款' if paysubtype == '3' else '转账'}{feedesc}\n"
f"转账说明:{pay_memo if pay_memo else ''}\n"
f"转账时间:{timestamp2str(begintransfertime)}\n"
)
content["src"] = ""
elif type_id[0] == 49 and type_id[1] != 0:
DictExtra = self.get_BytesExtra(BytesExtra)
url = match_BytesExtra(DictExtra)
content["src"] = url
content["msg"] = type_name
elif type_id == (50, 0): # 语音通话
content["msg"] = "语音/视频通话[%s]" % DisplayContent
# elif type_id == (10000, 0):
# content["msg"] = StrContent
# elif type_id == (10000, 4):
# content["msg"] = StrContent
# elif type_id == (10000, 8000):
# content["msg"] = StrContent
talker = "未知"
if IsSender == 1:
talker = ""
else:
if StrTalker.endswith("@chatroom"):
bytes_extra = self.get_BytesExtra(BytesExtra)
if bytes_extra:
try:
talker = bytes_extra['3'][0]['2']
if "publisher-id" in talker:
talker = "系统"
except:
pass
else:
talker = StrTalker
row_data = {"MsgSvrID": str(MsgSvrID), "type_name": type_name, "is_sender": IsSender, "talker": talker,
"room_name": StrTalker, "content": content, "CreateTime": CreateTime, "id": id}
return row_data
def msg_list(self, wxid="", start_index=0, page_size=500, msg_type: str = ""):
if wxid:
sql = (
"SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType,CreateTime,MsgSvrID,DisplayContent,CompressContent,BytesExtra,ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
"FROM MSG WHERE StrTalker=? "
"ORDER BY CreateTime ASC LIMIT ?,?")
if msg_type:
sql = sql.replace("ORDER BY CreateTime ASC LIMIT ?,?",
f"AND Type={msg_type} ORDER BY CreateTime ASC LIMIT ?,?")
result1 = self.execute_sql(sql, (wxid, start_index, page_size))
else:
sql = (
"SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType,CreateTime,MsgSvrID,DisplayContent,CompressContent,BytesExtra,ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
"FROM MSG ORDER BY CreateTime ASC LIMIT ?,?")
if msg_type:
sql = sql.replace("ORDER BY CreateTime ASC LIMIT ?,?",
f"AND Type={msg_type} ORDER BY CreateTime ASC LIMIT ?,?")
result1 = self.execute_sql(sql, (start_index, page_size))
if not result1:
return [], []
data = []
wxid_list = []
for row in result1:
tmpdata = self.msg_detail(row)
wxid_list.append(tmpdata["talker"])
data.append(tmpdata)
wxid_list = list(set(wxid_list))
return data, wxid_list

View File

@ -9,14 +9,19 @@ from .dbbase import DatabaseBase
from .utils import silk2audio from .utils import silk2audio
class ParsingMediaMSG(DatabaseBase): class MediaHandler(DatabaseBase):
_class_name = "MediaMSG" _class_name = "MediaMSG"
def __init__(self, db_path): Media_required_tables = ["Media"]
super().__init__(db_path)
def Media_tables_exist(self):
"""
判断该类所需要的表是否存在
"""
return self.check_tables_exist(self.Media_required_tables)
def get_audio(self, MsgSvrID, is_play=False, is_wave=False, save_path=None, rate=24000): def get_audio(self, MsgSvrID, is_play=False, is_wave=False, save_path=None, rate=24000):
sql = "select Buf from Media where Reserved0=? " sql = "select Buf from Media where Reserved0=? "
DBdata = self.execute_sql(sql, (MsgSvrID,)) DBdata = self.execute(sql, (MsgSvrID,))
if not DBdata: if not DBdata:
return False return False
if len(DBdata) == 0: if len(DBdata) == 0:

306
pywxdump/db/dbMicro.py Normal file
View File

@ -0,0 +1,306 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: parsingMicroMsg.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
import logging
from .dbbase import DatabaseBase
from .utils import timestamp2str, bytes2str, db_loger, db_error
import blackboxprotobuf
class MicroHandler(DatabaseBase):
_class_name = "MicroMsg"
Micro_required_tables = ["ContactLabel", "Contact", "ContactHeadImgUrl", "Session", "ChatInfo", "ChatRoom",
"ChatRoomInfo"]
def Micro_tables_exist(self):
"""
判断该类所需要的表是否存在
"""
return self.check_tables_exist(self.Micro_required_tables)
@db_error
def get_labels(self, id_is_key=True):
"""
读取标签列表
:param id_is_key: id_is_key: True: id作为keyFalse: name作为key
:return:
"""
sql = "SELECT LabelId, LabelName FROM ContactLabel ORDER BY LabelName ASC;"
result = self.execute(sql)
if not result:
return []
if id_is_key:
labels = {row[0]: row[1] for row in result}
else:
labels = {row[1]: row[0] for row in result}
return labels
@db_error
def get_session_list(self):
"""
获取会话列表
:return: 会话列表
"""
sessions = {}
sql = (
"SELECT S.strUsrName,S.nOrder,S.nUnReadCount, S.strNickName, S.nStatus, S.nIsSend, S.strContent, "
"S.nMsgLocalID, S.nMsgStatus, S.nTime, S.nMsgType, S.Reserved2 AS nMsgSubType, C.UserName, C.Alias, "
"C.DelFlag, C.Type, C.VerifyFlag, C.Reserved1, C.Reserved2, C.Remark, C.NickName, C.LabelIDList, "
"C.ChatRoomType, C.ChatRoomNotify, C.Reserved5, C.Reserved6 as describe, C.ExtraBuf, H.bigHeadImgUrl "
"FROM (SELECT strUsrName, MAX(nTime) AS MaxnTime FROM Session GROUP BY strUsrName) AS SubQuery "
"JOIN Session S ON S.strUsrName = SubQuery.strUsrName AND S.nTime = SubQuery.MaxnTime "
"left join Contact C ON C.UserName = S.strUsrName "
"LEFT JOIN ContactHeadImgUrl H ON C.UserName = H.usrName "
"ORDER BY S.nOrder DESC;"
)
ret = self.execute(sql)
if not ret:
return sessions
id2label = self.get_labels()
for row in ret:
(strUsrName, nOrder, nUnReadCount, strNickName, nStatus, nIsSend, strContent,
nMsgLocalID, nMsgStatus, nTime, nMsgType, nMsgSubType,
UserName, Alias, DelFlag, Type, VerifyFlag, Reserved1, Reserved2, Remark, NickName, LabelIDList,
ChatRoomType, ChatRoomNotify, Reserved5, describe, ExtraBuf, bigHeadImgUrl) = row
ExtraBuf = get_ExtraBuf(ExtraBuf)
LabelIDList = LabelIDList.split(",") if LabelIDList else []
LabelIDList = [id2label.get(int(label_id), label_id) for label_id in LabelIDList if label_id]
nTime = timestamp2str(nTime) if nTime else None
sessions[strUsrName] = {
"wxid": strUsrName, "nOrder": nOrder, "nUnReadCount": nUnReadCount, "strNickName": strNickName,
"nStatus": nStatus, "nIsSend": nIsSend, "strContent": strContent, "nMsgLocalID": nMsgLocalID,
"nMsgStatus": nMsgStatus, "nTime": nTime, "nMsgType": nMsgType, "nMsgSubType": nMsgSubType,
"nickname": NickName, "remark": Remark, "account": Alias,
"describe": describe, "headImgUrl": bigHeadImgUrl if bigHeadImgUrl else "",
"ExtraBuf": ExtraBuf, "LabelIDList": tuple(LabelIDList)
}
return sessions
@db_error
def get_recent_chat_wxid(self):
"""
获取最近聊天的联系人
:return: 最近聊天的联系人
"""
users = []
sql = (
"SELECT A.Username, LastReadedCreateTime, LastReadedSvrId "
"FROM ( SELECT Username, MAX(LastReadedCreateTime) AS MaxLastReadedCreateTime FROM ChatInfo "
"WHERE LastReadedCreateTime IS NOT NULL AND LastReadedCreateTime > 1007911408000 GROUP BY Username "
") AS SubQuery JOIN ChatInfo A "
"ON A.Username = SubQuery.Username AND LastReadedCreateTime = SubQuery.MaxLastReadedCreateTime "
"ORDER BY A.LastReadedCreateTime DESC;"
)
result = self.execute(sql)
if not result:
return []
for row in result:
# 获取用户名、昵称、备注和聊天记录数量
username, LastReadedCreateTime, LastReadedSvrId = row
LastReadedCreateTime = timestamp2str(LastReadedCreateTime) if LastReadedCreateTime else None
users.append(
{"wxid": username, "LastReadedCreateTime": LastReadedCreateTime, "LastReadedSvrId": LastReadedSvrId})
return users
@db_error
def get_user_list(self, word: str = None, wxids: list = None, label_ids: list = None):
"""
获取联系人列表
[ 注意如果修改这个函数要同时修改dbOpenIMContact.py中的get_im_user_list函数 ]
:param word: 查询关键字可以是wxid,用户名昵称备注描述允许拼音
:param wxids: wxid列表
:param label_ids: 标签id
:return: 联系人字典
"""
if isinstance(wxids, str):
wxids = [wxids]
if isinstance(label_ids, str):
label_ids = [label_ids]
users = {}
sql = (
"SELECT A.UserName, A.Alias, A.DelFlag, A.Type, A.VerifyFlag, A.Reserved1, A.Reserved2,"
"A.Remark, A.NickName, A.LabelIDList, A.ChatRoomType, A.ChatRoomNotify, A.Reserved5,"
"A.Reserved6 as describe, A.ExtraBuf, B.bigHeadImgUrl "
"FROM Contact A LEFT JOIN ContactHeadImgUrl B ON A.UserName = B.usrName WHERE 1==1 ;"
)
if word:
sql = sql.replace(";",
f"AND ( A.UserName LIKE '%{word}%' "
f"OR A.NickName LIKE '%{word}%' "
f"OR A.Remark LIKE '%{word}%' "
f"OR A.Alias LIKE '%{word}%' "
f"OR LOWER(A.QuanPin) LIKE LOWER('%{word}%') "
f"OR LOWER(A.PYInitial) LIKE LOWER('%{word}%') "
f"OR LOWER(A.RemarkQuanPin) LIKE LOWER('%{word}%') "
f"OR LOWER(A.RemarkPYInitial) LIKE LOWER('%{word}%') "
f") "
";")
if wxids:
sql = sql.replace(";", f"AND A.UserName IN ('" + "','".join(wxids) + "') ;")
if label_ids:
sql_label = [f"A.LabelIDList LIKE '%{i}%' " for i in label_ids]
sql_label = " OR ".join(sql_label)
sql = sql.replace(";", f"AND ({sql_label}) ;")
result = self.execute(sql)
if not result:
return users
id2label = self.get_labels()
for row in result:
# 获取wxid,昵称,备注,描述,头像,标签
(UserName, Alias, DelFlag, Type, VerifyFlag, Reserved1, Reserved2, Remark, NickName, LabelIDList,
ChatRoomType, ChatRoomNotify, Reserved5, describe, ExtraBuf, bigHeadImgUrl) = row
ExtraBuf = get_ExtraBuf(ExtraBuf)
LabelIDList = LabelIDList.split(",") if LabelIDList else []
LabelIDList = [id2label.get(int(label_id), label_id) for label_id in LabelIDList if label_id]
# print(f"{UserName=}\n{Alias=}\n{DelFlag=}\n{Type=}\n{VerifyFlag=}\n{Reserved1=}\n{Reserved2=}\n"
# f"{Remark=}\n{NickName=}\n{LabelIDList=}\n{ChatRoomType=}\n{ChatRoomNotify=}\n{Reserved5=}\n"
# f"{describe=}\n{ExtraBuf=}\n{bigHeadImgUrl=}")
users[UserName] = {
"wxid": UserName, "nickname": NickName, "remark": Remark, "account": Alias,
"describe": describe, "headImgUrl": bigHeadImgUrl if bigHeadImgUrl else "",
"ExtraBuf": ExtraBuf, "LabelIDList": tuple(LabelIDList)}
return users
@db_error
def get_room_list(self, word=None, roomwxids: list = None):
"""
获取群聊列表
:param word: 群聊搜索词
:param roomwxids: 群聊wxid列表
:return: 群聊字典
"""
# 连接 MicroMsg.db 数据库,并执行查询
if isinstance(roomwxids, str):
roomwxids = [roomwxids]
sql = (
"SELECT A.ChatRoomName,A.UserNameList,A.DisplayNameList,A.ChatRoomFlag,A.IsShowName,"
"A.SelfDisplayName,A.Reserved2,A.RoomData, "
"B.Announcement,B.AnnouncementEditor,B.AnnouncementPublishTime "
"FROM ChatRoom A LEFT JOIN ChatRoomInfo B ON A.ChatRoomName==B.ChatRoomName "
"WHERE 1==1 ;")
if word:
sql = sql.replace(";",
f"AND A.ChatRoomName LIKE '%{word}%' ;")
if roomwxids:
sql = sql.replace(";", f"AND A.UserName IN ('" + "','".join(roomwxids) + "') ;")
rooms = {}
result = self.execute(sql)
if not result:
return rooms
for row in result:
# 获取用户名、昵称、备注和聊天记录数量
(ChatRoomName, UserNameList, DisplayNameList, ChatRoomFlag, IsShowName, SelfDisplayName,
Reserved2, RoomData,
Announcement, AnnouncementEditor, AnnouncementPublishTime) = row
UserNameList = UserNameList.split("^G")
DisplayNameList = DisplayNameList.split("^G")
RoomData = ChatRoom_RoomData(RoomData)
wxid2remark = {}
if RoomData:
rd = []
for k, v in RoomData.items():
if isinstance(v, list):
rd += v
for i in rd:
try:
if isinstance(i, dict) and isinstance(i.get('1'), str) and i.get('2'):
wxid2remark[i['1']] = i["2"]
except Exception as e:
db_loger.error(f"wxid2remark: ChatRoomName:{ChatRoomName}, {i} error:{e}", exc_info=True)
rooms[ChatRoomName] = {
"wxid": ChatRoomName, "UserNameList": UserNameList, "DisplayNameList": DisplayNameList,
"ChatRoomFlag": ChatRoomFlag, "IsShowName": IsShowName, "SelfDisplayName": SelfDisplayName,
"owner": Reserved2, "wxid2remark": wxid2remark,
"Announcement": Announcement, "AnnouncementEditor": AnnouncementEditor,
"AnnouncementPublishTime": AnnouncementPublishTime}
return rooms
@db_error
def ChatRoom_RoomData(RoomData):
# 读取群聊数据,主要为 wxid以及对应昵称
if RoomData is None or not isinstance(RoomData, bytes):
return None
data = get_BytesExtra(RoomData)
bytes2str(data) if data else None
return data
@db_error
def get_BytesExtra(BytesExtra):
if BytesExtra is None or not isinstance(BytesExtra, bytes):
return None
try:
deserialize_data, message_type = blackboxprotobuf.decode_message(BytesExtra)
return deserialize_data
except Exception as e:
db_loger.warning(f"\nget_BytesExtra: {e}\n{BytesExtra}", exc_info=True)
return None
@db_error
def get_ExtraBuf(ExtraBuf: bytes):
"""
读取ExtraBuf联系人表
:param ExtraBuf:
:return:
"""
if not ExtraBuf:
return None
buf_dict = {
'74752C06': '性别[1男2女]', '46CF10C4': '个性签名', 'A4D9024A': '', 'E2EAA8D1': '', '1D025BBF': '',
'F917BCC0': '公司名称', '759378AD': '手机号', '4EB96D85': '企微属性', '81AE19B4': '朋友圈背景',
'0E719F13': '备注图片', '945f3190': '备注图片2',
'DDF32683': '0', '88E28FCE': '1', '761A1D2D': '2', '0263A0CB': '3', '0451FF12': '4', '228C66A8': '5',
'4D6C4570': '6', '4335DFDD': '7', 'DE4CDAEB': '8', 'A72BC20A': '9', '069FED52': '10', '9B0F4299': '11',
'3D641E22': '12', '1249822C': '13', 'B4F73ACB': '14', '0959EB92': '15', '3CF4A315': '16',
'C9477AC60201E44CD0E8': '17', 'B7ACF0F5': '18', '57A7B5A8': '19', '695F3170': '20', 'FB083DD9': '21',
'0240E37F': '22', '315D02A3': '23', '7DEC0BC3': '24', '16791C90': '25'
}
rdata = {}
for buf_name in buf_dict:
rdata_name = buf_dict[buf_name]
buf_name = bytes.fromhex(buf_name)
offset = ExtraBuf.find(buf_name)
if offset == -1:
rdata[rdata_name] = ""
continue
offset += len(buf_name)
type_id = ExtraBuf[offset: offset + 1]
offset += 1
if type_id == b"\x04":
rdata[rdata_name] = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
elif type_id == b"\x18":
length = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-16").rstrip("\x00")
elif type_id == b"\x17":
length = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-8", errors="ignore").rstrip(
"\x00")
elif type_id == b"\x05":
rdata[rdata_name] = f"0x{ExtraBuf[offset: offset + 8].hex()}"
return rdata

View File

@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: parsingOpenIMContact.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/16
# -------------------------------------------------------------------------------
from .dbbase import DatabaseBase
from .utils import db_error
class OpenIMContactHandler(DatabaseBase):
_class_name = "OpenIMContact"
OpenIMContact_required_tables = ["OpenIMContact"]
def OpenIMContact_tables_exist(self):
"""
判断该类所需要的表是否存在
"""
return self.check_tables_exist(self.OpenIMContact_required_tables)
def get_im_user_list(self, word=None, wxids=None):
"""
获取联系人列表
[ 注意如果修改这个函数要同时修改dbMicro.py中的get_user_list函数 ]
:param word: 查询关键字可以是用户名昵称备注描述允许拼音
:param wxids: 微信id列表
:return: 联系人字典
"""
if not wxids:
wxids = []
if isinstance(wxids, str):
wxids = [wxids]
sql = ("SELECT UserName,NickName,Type,Remark,BigHeadImgUrl,CustomInfoDetail,CustomInfoDetailVisible,"
"AntiSpamTicket,AppId,Sex,DescWordingId,ExtraBuf "
"FROM OpenIMContact WHERE 1==1 ;")
if word:
sql = sql.replace(";",
f"AND (UserName LIKE '%{word}%' "
f"OR NickName LIKE '%{word}%' "
f"OR Remark LIKE '%{word}%' "
f"OR LOWER(NickNamePYInit) LIKE LOWER('%{word}%') "
f"OR LOWER(NickNameQuanPin) LIKE LOWER('%{word}%') "
f"OR LOWER(RemarkPYInit) LIKE LOWER('%{word}%') "
f"OR LOWER(RemarkQuanPin) LIKE LOWER('%{word}%') "
") ;")
if wxids:
sql = sql.replace(";", f"AND UserName IN ('" + "','".join(wxids) + "') ;")
result = self.execute(sql)
if not result:
return []
users = {}
for row in result:
# 获取用户名、昵称、备注和聊天记录数量
(UserName, NickName, Type, Remark, BigHeadImgUrl, CustomInfoDetail, CustomInfoDetailVisible,
AntiSpamTicket, AppId, Sex, DescWordingId, ExtraBuf) = row
users[UserName] = {
"wxid": UserName, "nickname": NickName, "remark": Remark, "account": UserName,
"describe": '', "headImgUrl": BigHeadImgUrl if BigHeadImgUrl else "",
"ExtraBuf": None, "LabelIDList": tuple()}
return users
@db_error
def get_ExtraBuf(ExtraBuf: bytes):
"""
读取ExtraBuf联系人表
:param ExtraBuf:
:return:
"""
if not ExtraBuf:
return None
buf_dict = {
'74752C06': '性别[1男2女]', '46CF10C4': '个性签名', 'A4D9024A': '', 'E2EAA8D1': '', '1D025BBF': '',
'F917BCC0': '公司名称', '759378AD': '手机号', '4EB96D85': '企微属性', '81AE19B4': '朋友圈背景',
'0E719F13': '备注图片', '945f3190': '备注图片2',
'DDF32683': '0', '88E28FCE': '1', '761A1D2D': '2', '0263A0CB': '3', '0451FF12': '4', '228C66A8': '5',
'4D6C4570': '6', '4335DFDD': '7', 'DE4CDAEB': '8', 'A72BC20A': '9', '069FED52': '10', '9B0F4299': '11',
'3D641E22': '12', '1249822C': '13', 'B4F73ACB': '14', '0959EB92': '15', '3CF4A315': '16',
'C9477AC60201E44CD0E8': '17', 'B7ACF0F5': '18', '57A7B5A8': '19', '695F3170': '20', 'FB083DD9': '21',
'0240E37F': '22', '315D02A3': '23', '7DEC0BC3': '24', '16791C90': '25'
}
rdata = {}
for buf_name in buf_dict:
rdata_name = buf_dict[buf_name]
buf_name = bytes.fromhex(buf_name)
offset = ExtraBuf.find(buf_name)
if offset == -1:
rdata[rdata_name] = ""
continue
offset += len(buf_name)
type_id = ExtraBuf[offset: offset + 1]
offset += 1
if type_id == b"\x04":
rdata[rdata_name] = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
elif type_id == b"\x18":
length = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-16").rstrip("\x00")
elif type_id == b"\x17":
length = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-8", errors="ignore").rstrip(
"\x00")
elif type_id == b"\x05":
rdata[rdata_name] = f"0x{ExtraBuf[offset: offset + 8].hex()}"
return rdata

View File

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: MediaMSG_parsing.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
from .dbbase import DatabaseBase
from .utils import silk2audio, db_loger
class OpenIMMediaHandler(DatabaseBase):
_class_name = "OpenIMMedia"
OpenIMMedia_required_tables = ["OpenIMMedia"]
def OpenIMMedia_tables_exist(self):
"""
判断该类所需要的表是否存在
"""
return self.check_tables_exist(self.OpenIMMedia_required_tables)
def get_im_audio(self, MsgSvrID, is_play=False, is_wave=False, save_path=None, rate=24000):
sql = "select Buf from OpenIMMedia where Reserved0=? "
DBdata = self.execute(sql, (MsgSvrID,))
if not DBdata:
return False
if len(DBdata) == 0:
return False
data = DBdata[0][0] # [1:] + b'\xFF\xFF'
try:
pcm_data = silk2audio(buf_data=data, is_play=is_play, is_wave=is_wave, save_path=save_path, rate=rate)
return pcm_data
except Exception as e:
db_loger.warning(e, exc_info=True)
return False

106
pywxdump/db/dbPublicMsg.py Normal file
View File

@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: parsingPublicMsg.py
# Description:
# Author: xaoyaoo
# Date: 2024/07/03
# -------------------------------------------------------------------------------
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: parsingMSG.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
import json
import os
import re
from typing import Union, Tuple
import pandas as pd
from .dbbase import DatabaseBase
from .dbMSG import MsgHandler
from .utils import get_md5, name2typeid, typeid2name, type_converter, timestamp2str, xml2dict, match_BytesExtra, \
db_error
import lz4.block
import blackboxprotobuf
class PublicMsgHandler(MsgHandler):
_class_name = "PublicMSG"
PublicMSG_required_tables = ["PublicMsg"]
@db_error
def PublicMSG_tables_exist(self):
"""
判断该类所需要的表是否存在
"""
return self.check_tables_exist(self.PublicMSG_required_tables)
@db_error
def get_plc_msg_count(self, wxids: list = ""):
"""
获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量不传wxid则获取所有联系人的聊天记录数量
:param wxids: wxid list
:return: 聊天记录数量列表 {wxid: chat_count}
"""
if isinstance(wxids, str):
wxids = [wxids]
if wxids:
wxids = "('" + "','".join(wxids) + "')"
sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg WHERE StrTalker IN {wxids} GROUP BY StrTalker ORDER BY COUNT(*) DESC;"
else:
sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg GROUP BY StrTalker ORDER BY COUNT(*) DESC;"
sql_total = f"SELECT COUNT(*) FROM MSG;"
result = self.execute(sql)
total_ret = self.execute(sql_total)
if not result:
return {}
total = 0
if total_ret and len(total_ret) > 0:
total = total_ret[0][0]
msg_count = {"total": total}
msg_count.update({row[0]: row[1] for row in result})
return msg_count
@db_error
def get_plc_msg_list(self, wxid="", start_index=0, page_size=500, msg_type: str = "", msg_sub_type: str = "",
start_createtime=None, end_createtime=None):
sql_base = ("SELECT localId,TalkerId,MsgSvrID,Type,SubType,CreateTime,IsSender,Sequence,StatusEx,FlagEx,Status,"
"MsgSequence,StrContent,MsgServerSeq,StrTalker,DisplayContent,Reserved0,Reserved1,Reserved3,"
"Reserved4,Reserved5,Reserved6,CompressContent,BytesExtra,BytesTrans,Reserved2,"
"ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
"FROM PublicMsg ")
param = ()
sql_wxid, param = ("AND StrTalker=? ", param + (wxid,)) if wxid else ("", param)
sql_type, param = ("AND Type=? ", param + (msg_type,)) if msg_type else ("", param)
sql_sub_type, param = ("AND SubType=? ", param + (msg_sub_type,)) if msg_type and msg_sub_type else ("", param)
sql_start_createtime, param = ("AND CreateTime>=? ", param + (start_createtime,)) if start_createtime else (
"", param)
sql_end_createtime, param = ("AND CreateTime<=? ", param + (end_createtime,)) if end_createtime else ("", param)
sql = (
f"{sql_base} WHERE 1=1 "
f"{sql_wxid}"
f"{sql_type}"
f"{sql_sub_type}"
f"{sql_start_createtime}"
f"{sql_end_createtime}"
f"ORDER BY CreateTime ASC LIMIT ?,?"
)
param = param + (start_index, page_size)
result = self.execute(sql, param)
if not result:
return [], []
result_data = (self.get_msg_detail(row) for row in result)
rdata = list(result_data) # 转为列表
wxid_list = {d['talker'] for d in rdata} # 创建一个无重复的 wxid 列表
return rdata, list(wxid_list)

205
pywxdump/db/dbbase.py Normal file
View File

@ -0,0 +1,205 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: dbbase.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
import importlib
import os
import sqlite3
import time
from .utils import db_loger
from dbutils.pooled_db import PooledDB
# import logging
#
# db_loger = logging.getLogger("db_prepare")
class DatabaseSingletonBase:
# _singleton_instances = {} # 使用字典存储不同db_path对应的单例实例
_class_name = "DatabaseSingletonBase"
_db_pool = {} # 使用字典存储不同db_path对应的连接池
# def __new__(cls, *args, **kwargs):
# if cls._class_name not in cls._singleton_instances:
# cls._singleton_instances[cls._class_name] = super().__new__(cls)
# return cls._singleton_instances[cls._class_name]
@classmethod
def connect(cls, db_config):
"""
连接数据库如果增加其他数据库连接则重写该方法
:param db_config: 数据库配置
:return: 连接池
"""
if not db_config:
raise ValueError("db_config 不能为空")
db_key = db_config["key"]
db_type = db_config["type"]
if db_key in cls._db_pool and cls._db_pool[db_key] is not None:
return cls._db_pool[db_key]
if db_type == "sqlite":
db_path = db_config["path"]
if not os.path.exists(db_path):
raise FileNotFoundError(f"文件不存在: {db_path}")
pool = PooledDB(
creator=sqlite3, # 使用 sqlite3 作为连接创建者
ping=0, # ping 数据库判断是否服务正常
database=db_path
)
elif db_type == "mysql":
mysql_config = {
'user': db_config['user'],
'host': db_config['host'],
'password': db_config['password'],
'database': db_config['database'],
'port': db_config['port']
}
pool = PooledDB(
creator=importlib.import_module('pymysql'), # 使用 mysql 作为连接创建者
ping=1, # ping 数据库判断是否服务正常
**mysql_config
)
else:
raise ValueError(f"不支持的数据库类型: {db_type}")
db_loger.info(f"{pool} 连接句柄创建 {db_config}")
cls._db_pool[db_key] = pool
return pool
class DatabaseBase(DatabaseSingletonBase):
_class_name = "DatabaseBase"
def __init__(self, db_config):
"""
db_config = {
"key": "test1",
"type": "sqlite",
"path": r"C:\***\wxdump_work\merge_all.db"
}
"""
self.config = db_config
self.pool = self.connect(self.config)
def execute(self, sql, params=None):
"""
执行SQL语句
:param sql: SQL语句 (str)
:param params: 参数 (tuple)
:return: 查询结果 (list)
"""
connection = self.pool.connection()
try:
# connection.text_factory = bytes
cursor = connection.cursor()
if params:
cursor.execute(sql, params)
else:
cursor.execute(sql)
return cursor.fetchall()
except Exception as e1:
try:
connection.text_factory = bytes
cursor = connection.cursor()
if params:
cursor.execute(sql, params)
else:
cursor.execute(sql)
rdata = cursor.fetchall()
connection.text_factory = str
return rdata
except Exception as e2:
db_loger.error(f"{sql=}\n{params=}\n{e1=}\n{e2=}\n", exc_info=True)
return None
def check_tables_exist(self, required_tables):
"""
判断该类所需要的表是否存在
"""
required_tables = required_tables or []
required_tables_str = "'" + "','".join(required_tables) + "'"
sql = (f"SELECT tbl_name FROM sqlite_master "
f"WHERE type='table' AND tbl_name in ({required_tables_str});")
existing_tables = self.execute(sql)
existing_tables = [row[0] for row in existing_tables] # 将查询结果转换为列表
# 检查所有必需的表是否都在现有表中
return all(table in existing_tables for table in required_tables)
def close(self):
self.pool.close()
db_loger.info(f"关闭数据库 - {self.config}")
def __del__(self):
self.close()
# class MsgDb(DatabaseBase):
#
# def p(self, *args, **kwargs):
# sel = "select tbl_name from sqlite_master where type='table'"
# data = self.execute(sel)
# # print([i[0] for i in data])
# return data
#
#
# class MsgDb1(DatabaseBase):
# _class_name = "MsgDb1"
#
# def p(self, *args, **kwargs):
# sel = "select tbl_name from sqlite_master where type='table'"
# data = self.execute(sel)
# # print([i[0] for i in data])
# return data
#
#
# if __name__ == '__main__':
# logging.basicConfig(level=logging.INFO,
# style='{',
# datefmt='%Y-%m-%d %H:%M:%S',
# format='[{levelname[0]}] {asctime} [{name}:{levelno}] {pathname}:{lineno} {message}'
# )
#
# config1 = {
# "key": "test1",
# "type": "sqlite",
# "path": r"D:\e_all.db"
# }
# config2 = {
# "key": "test2",
# "type": "sqlite",
# "path": r"D:\_call.db"
# }
#
# t1 = MsgDb(config1)
# t1.p()
# t2 = MsgDb(config2)
# t2.p()
# t3 = MsgDb1(config1)
# t3.p()
# t4 = MsgDb1(config2)
# t4.p()
#
# print(t4._db_pool)
# # 销毁t1
# del t1
# # 销毁t2
# del t2
# del t3
#
# # 销毁t4
# del t4
# import time
# time.sleep(1)
#
# t1 = MsgDb(config1)
# t1.p()
# t2 = MsgDb(config2)
# t2.p()
#
#
# print(t2._db_pool)

View File

@ -8,18 +8,18 @@
import csv import csv
import json import json
import os import os
from ..parsingMSG import ParsingMSG from ..dbMSG import MsgHandler
def export_csv(wxid, outpath, msg_path, page_size=5000): def export_csv(wxid, outpath, db_config, page_size=5000):
if not os.path.exists(outpath): if not os.path.exists(outpath):
outpath = os.path.join(os.getcwd(), "export" + os.sep + wxid) outpath = os.path.join(os.getcwd(), "export" + os.sep + wxid)
if not os.path.exists(outpath): if not os.path.exists(outpath):
os.makedirs(outpath) os.makedirs(outpath)
pmsg = ParsingMSG(msg_path) pmsg = MsgHandler(db_config)
count = pmsg.msg_count(wxid) count = pmsg.get_msg_count(wxid)
chatCount = count.get(wxid, 0) chatCount = count.get(wxid, 0)
if chatCount == 0: if chatCount == 0:
return False, "没有聊天记录" return False, "没有聊天记录"
@ -29,7 +29,7 @@ def export_csv(wxid, outpath, msg_path, page_size=5000):
for i in range(0, chatCount, page_size): for i in range(0, chatCount, page_size):
start_index = i start_index = i
data, wxid_list = pmsg.msg_list(wxid, start_index, page_size) data, wxid_list = pmsg.get_msg_list(wxid, start_index, page_size)
if len(data) == 0: if len(data) == 0:
return False, "没有聊天记录" return False, "没有聊天记录"
@ -39,7 +39,7 @@ def export_csv(wxid, outpath, msg_path, page_size=5000):
with open(save_path, "w", encoding="utf-8", newline='') as f: with open(save_path, "w", encoding="utf-8", newline='') as f:
csv_writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL) csv_writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
csv_writer.writerow(["id", "MsgSvrID", "type_name", "is_sender", "talker", "room_name", "content", csv_writer.writerow(["id", "MsgSvrID", "type_name", "is_sender", "talker", "room_name", "msg", "src",
"CreateTime"]) "CreateTime"])
for row in data: for row in data:
id = row.get("id", "") id = row.get("id", "")
@ -48,11 +48,10 @@ def export_csv(wxid, outpath, msg_path, page_size=5000):
is_sender = row.get("is_sender", "") is_sender = row.get("is_sender", "")
talker = row.get("talker", "") talker = row.get("talker", "")
room_name = row.get("room_name", "") room_name = row.get("room_name", "")
content = row.get("content", "") msg = row.get("msg", "")
src = row.get("src", "")
CreateTime = row.get("CreateTime", "") CreateTime = row.get("CreateTime", "")
csv_writer.writerow([id, MsgSvrID, type_name, is_sender, talker, room_name, msg, src, CreateTime])
content = json.dumps(content, ensure_ascii=False)
csv_writer.writerow([id, MsgSvrID, type_name, is_sender, talker, room_name, content, CreateTime])
return True, f"导出成功: {outpath}" return True, f"导出成功: {outpath}"

View File

@ -7,18 +7,18 @@
# ------------------------------------------------------------------------------- # -------------------------------------------------------------------------------
import json import json
import os import os
from ..parsingMSG import ParsingMSG from ..dbMSG import MsgHandler
def export_json(wxid, outpath, msg_path): def export_json(wxid, outpath, db_config):
if not os.path.exists(outpath): if not os.path.exists(outpath):
outpath = os.path.join(os.getcwd(), "export" + os.sep + wxid) outpath = os.path.join(os.getcwd(), "export" + os.sep + wxid)
if not os.path.exists(outpath): if not os.path.exists(outpath):
os.makedirs(outpath) os.makedirs(outpath)
pmsg = ParsingMSG(msg_path) pmsg = MsgHandler(db_config)
count = pmsg.msg_count(wxid) count = pmsg.get_msg_count(wxid)
chatCount = count.get(wxid, 0) chatCount = count.get(wxid, 0)
if chatCount == 0: if chatCount == 0:
return False, "没有聊天记录" return False, "没有聊天记录"
@ -26,7 +26,7 @@ def export_json(wxid, outpath, msg_path):
page_size = chatCount + 1 page_size = chatCount + 1
for i in range(0, chatCount, page_size): for i in range(0, chatCount, page_size):
start_index = i start_index = i
data, wxid_list = pmsg.msg_list(wxid, start_index, page_size) data, wxid_list = pmsg.get_msg_list(wxid, start_index, page_size)
if len(data) == 0: if len(data) == 0:
return False, "没有聊天记录" return False, "没有聊天记录"
save_path = os.path.join(outpath, f"{wxid}_{i}_{i + page_size}.json") save_path = os.path.join(outpath, f"{wxid}_{i}_{i + page_size}.json")

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: __init__.py.py
# Description:
# Author: xaoyaoo
# Date: 2024/07/23
# -------------------------------------------------------------------------------
from ._loger import db_loger
from .common_utils import timestamp2str, xml2dict, silk2audio, bytes2str, get_md5, name2typeid, typeid2name, \
type_converter, match_BytesExtra, db_error, download_file,dat2img

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: _loger.py
# Description:
# Author: xaoyaoo
# Date: 2024/07/23
# -------------------------------------------------------------------------------
import logging
db_loger = logging.getLogger("db_prepare")

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-# # -*- coding: utf-8 -*-#
# ------------------------------------------------------------------------------- # -------------------------------------------------------------------------------
# Name: utils.py # Name: common_utils.py
# Description: # Description:
# Author: xaoyaoo # Author: xaoyaoo
# Date: 2024/04/15 # Date: 2024/04/15
@ -17,6 +17,25 @@ import pysilk
import lxml.etree as ET # 这个模块更健壮些微信XML格式有时有非标格式会导致xml.etree.ElementTree处理失败 import lxml.etree as ET # 这个模块更健壮些微信XML格式有时有非标格式会导致xml.etree.ElementTree处理失败
from collections import defaultdict from collections import defaultdict
from ._loger import db_loger
def db_error(func):
"""
错误处理装饰器
:param func:
:return:
"""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
db_loger.error(f"db_error: {e}", exc_info=True)
return None
return wrapper
def type_converter(type_id_or_name: [str, tuple]): def type_converter(type_id_or_name: [str, tuple]):
""" """

View File

@ -1,90 +0,0 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: __init__.py.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
import pandas as pd
from .parsingFavorite import ParsingFavorite
from .parsingMSG import ParsingMSG
from .parsingMicroMsg import ParsingMicroMsg
from .parsingMediaMSG import ParsingMediaMSG
from .parsingOpenIMContact import ParsingOpenIMContact
from .parsingPublicMsg import ParsingPublicMsg
from .utils import download_file
from .export.exportCSV import export_csv
from .export.exportJSON import export_json
def get_user_list(MicroMsg_db_path, OpenIMContact_db_path=None, word=None):
"""
获取联系人列表
:param MicroMsg_db_path: MicroMsg.db 文件路径
:param OpenIMContact_db_path: OpenIMContact.db 文件路径
:param word: 搜索关键字
:return: 联系人列表
"""
# 连接 MicroMsg.db 数据库,并执行查询
if not MicroMsg_db_path:
return []
parsing_micromsg = ParsingMicroMsg(MicroMsg_db_path)
users = parsing_micromsg.user_list(word=word)
# 如果有 OpenIMContact.db 文件,获取 OpenIMContact.db 中的联系人信息
if OpenIMContact_db_path:
parsing_openimcontact = ParsingOpenIMContact(OpenIMContact_db_path)
users += parsing_openimcontact.user_list(word=word)
# 去重
# print(users)
unique_users = [dict(t) for t in {tuple(d.items()) for d in users}]
return unique_users
def get_recent_user_list(MicroMsg_db_path, OpenIMContact_db_path=None, limit=200):
"""
获取联系人列表
:param MicroMsg_db_path: MicroMsg.db 文件路径
:param OpenIMContact_db_path: OpenIMContact.db 文件路径
:param limit: 最大数量
:return: 联系人列表
"""
# 连接 MicroMsg.db 数据库,并执行查询
if not MicroMsg_db_path:
return []
parsing_micromsg = ParsingMicroMsg(MicroMsg_db_path)
recent_users = parsing_micromsg.recent_chat_wxid() # [{"wxid": username, "LastReadedCreateTime": LastReadedCreateTime, "LastReadedSvrId": LastReadedSvrId},]
recent_users = pd.DataFrame(recent_users, columns=["wxid", "LastReadedCreateTime", "LastReadedSvrId"])
recent_users = recent_users.sort_values(by="LastReadedCreateTime", ascending=False)
recent_users = recent_users.drop_duplicates(subset=["wxid"], keep="first").head(limit)
users = get_user_list(MicroMsg_db_path, OpenIMContact_db_path)
users = pd.DataFrame(users)
users = pd.merge(users, recent_users, on="wxid", how="right")
# users = users.drop_duplicates(subset=["wxid"], keep="last") # 保留最新的
users = users.sort_values(by="LastReadedCreateTime", ascending=False) if not users.empty else users
users = users.drop_duplicates(subset=["wxid"], keep="first") # 保留最新的
users = users.fillna("")
users = users.to_dict(orient="records")
return users
def wxid2userinfo(MicroMsg_db_path, OpenIMContact_db_path, wxid):
"""
获取联系人信息
:param MicroMsg_db_path: MicroMsg.db 文件路径
:param OpenIMContact_db_path: OpenIMContact.db 文件路径
:param wxid: 微信id,可以是单个id也可以是多个id使用list传入
:return: 联系人信息 {wxid: {wxid: wxid, nickname: nickname, remark: remark, account: account, describe: describe, headImgUrl: headImgUrl}}
"""
# 连接 MicroMsg.db 数据库,并执行查询
parsing_micromsg = ParsingMicroMsg(MicroMsg_db_path)
users = parsing_micromsg.wxid2userinfo(wxid)
# {'wxid_uw8ruinee7zq12': {'wxid': 'wxid_uw8ruinee7zq12', 'nickname': '2021年', 'remark': '于浩', 'account': 'yh13327404424', 'describe': '', 'headImgUrl': 'https://wx.qlogo.cn/mmhead/ver_1/LLibM2qUys7nBt9Hl8uuTQkn9ILFicoImlt2616ZNGoIvRbA8VmJ0Vibhd3V96JFfxQ25Tj1nRWTsXYDdH3z2FAQkQDXSnjS5PBuSraey4ZnoooOkEu2e3DjXbJaJJXKUib1/0'}}
# 如果有 OpenIMContact.db 文件,获取 OpenIMContact.db 中的联系人信息
if OpenIMContact_db_path:
parsing_openimcontact = ParsingOpenIMContact(OpenIMContact_db_path)
users.update(parsing_openimcontact.wxid2userinfo(wxid))
return users

View File

@ -1,103 +0,0 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: dbbase.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
import os
import sqlite3
import logging
class DatabaseBase:
_singleton_instances = {} # 使用字典存储不同db_path对应的单例实例
_connection_pool = {} # 使用字典存储不同db_path对应的连接池
_class_name = "DatabaseBase"
def __new__(cls, db_path):
if cls._class_name not in cls._singleton_instances:
cls._singleton_instances[cls._class_name] = super().__new__(cls)
return cls._singleton_instances[cls._class_name]
def __init__(self, db_path):
self._db_path = db_path
self._db_connection = self._connect_to_database(db_path)
@classmethod
def _connect_to_database(cls, db_path):
if not os.path.exists(db_path):
raise FileNotFoundError(f"文件不存在: {db_path}")
if db_path in cls._connection_pool and cls._connection_pool[db_path] is not None:
return cls._connection_pool[db_path]
connection = sqlite3.connect(db_path, check_same_thread=False)
logging.info(f"{connection} 连接句柄创建 {db_path}")
return connection
def execute_sql(self, sql, params=None):
"""
执行SQL语句
:param sql: SQL语句 (str)
:param params: 参数 (tuple)
:return: 查询结果 (list)
"""
# 检测数据库连接是否关闭
if not self._db_connection:
logging.warning(f"重新连接数据库 - {self._db_path}")
self._connect_to_database(self._db_path)
connection = self._db_connection
try:
# connection.text_factory = bytes
cursor = connection.cursor()
if params:
cursor.execute(sql, params)
else:
cursor.execute(sql)
return cursor.fetchall()
except Exception as e1:
try:
connection.text_factory = bytes
cursor = connection.cursor()
if params:
cursor.execute(sql, params)
else:
cursor.execute(sql)
rdata = cursor.fetchall()
connection.text_factory = str
return rdata
except Exception as e2:
logging.error(f"**********\nSQL: {sql}\nparams: {params}\n{e1}\n{e2}\n**********")
return None
def close_connection(self):
if self._db_connection:
self._db_connection.close()
logging.info(f"关闭数据库 - {self._db_path}")
self._db_connection = None
def close_all_connection(self):
for db_path in self._connection_pool:
if self._connection_pool[db_path]:
self._connection_pool[db_path].close()
logging.info(f"关闭数据库 - {db_path}")
self._connection_pool[db_path] = None
def show__singleton_instances(self):
print(self._singleton_instances)
def __del__(self):
self.close_connection()
# del self._singleton_instances[self._db_path]
if __name__ == '__main__':
a = DatabaseBase("test.db")
b = DatabaseBase("test1.db")
d1 = a.execute_sql("select * from sqlite_master;")
d2 = b.execute_sql("select * from sqlite_master;")
print([i[1] for i in d1])
print([i[1] for i in d2])
a.close_connection()
b.close_connection()

View File

@ -1,267 +0,0 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: parsingMicroMsg.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
import logging
from .dbbase import DatabaseBase
from .utils import timestamp2str, bytes2str
import blackboxprotobuf
class ParsingMicroMsg(DatabaseBase):
_class_name = "MicroMsg"
def __init__(self, db_path):
super().__init__(db_path)
def get_BytesExtra(self, BytesExtra):
if BytesExtra is None or not isinstance(BytesExtra, bytes):
return None
try:
deserialize_data, message_type = blackboxprotobuf.decode_message(BytesExtra)
return deserialize_data
except Exception as e:
return None
def get_ExtraBuf(self, ExtraBuf: bytes):
"""
读取ExtraBuf联系人表
:param ExtraBuf:
:return:
"""
if not ExtraBuf:
return None
try:
buf_dict = {
'DDF32683': '0', '74752C06': '性别[1男2女]', '88E28FCE': '2', '761A1D2D': '3', '0263A0CB': '4',
'0451FF12': '5',
'228C66A8': '6', '46CF10C4': '个性签名', 'A4D9024A': '', 'E2EAA8D1': '', '1D025BBF': '',
'4D6C4570': '11',
'F917BCC0': '公司名称', '759378AD': '手机号', '4335DFDD': '14', 'DE4CDAEB': '15', 'A72BC20A': '16',
'069FED52': '17',
'9B0F4299': '18', '3D641E22': '19', '1249822C': '20', '4EB96D85': '企微属性', 'B4F73ACB': '22',
'0959EB92': '23',
'3CF4A315': '24', 'C9477AC60201E44CD0E8': '26', 'B7ACF0F5': '28', '57A7B5A8': '29',
'81AE19B4': '朋友圈背景',
'695F3170': '31', 'FB083DD9': '32', '0240E37F': '33', '315D02A3': '34', '7DEC0BC3': '35',
'0E719F13': '备注图片',
'16791C90': '37'
}
rdata = {}
for buf_name in buf_dict:
rdata_name = buf_dict[buf_name]
buf_name = bytes.fromhex(buf_name)
offset = ExtraBuf.find(buf_name)
if offset == -1:
rdata[rdata_name] = ""
continue
offset += len(buf_name)
type_id = ExtraBuf[offset: offset + 1]
offset += 1
if type_id == b"\x04":
rdata[rdata_name] = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
elif type_id == b"\x18":
length = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-16").rstrip("\x00")
elif type_id == b"\x17":
length = int.from_bytes(ExtraBuf[offset: offset + 4], "little")
rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-8").rstrip("\x00")
elif type_id == b"\x05":
rdata[rdata_name] = f"0x{ExtraBuf[offset: offset + 8].hex()}"
return rdata
except Exception as e:
print(f'解析错误:\n{e}')
return None
def ChatRoom_RoomData(self, RoomData):
# 读取群聊数据,主要为 wxid以及对应昵称
if RoomData is None or not isinstance(RoomData, bytes):
return None
try:
data = self.get_BytesExtra(RoomData)
bytes2str(data)
return data
except Exception as e:
return None
def wxid2userinfo(self, wxid):
"""
获取单个联系人信息
:param wxid: 微信id,可以是单个id也可以是id列表
:return: 联系人信息
"""
if isinstance(wxid, str):
wxid = [wxid]
elif isinstance(wxid, list):
wxid = wxid
else:
return {}
wxid = "','".join(wxid)
wxid = f"'{wxid}'"
# 获取username是wx_id的用户
sql = ("SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl,A.LabelIDList "
"FROM Contact A,ContactHeadImgUrl B "
f"WHERE A.UserName = B.usrName AND A.UserName in ({wxid}) "
"ORDER BY NickName ASC;")
result = self.execute_sql(sql)
if not result:
return {}
users = {}
for row in result:
# 获取wxid,昵称,备注,描述,头像
username, nickname, remark, Alias, describe, headImgUrl, LabelIDList = row
LabelIDList = LabelIDList.split(",") if LabelIDList else []
users[username] = {"wxid": username, "nickname": nickname, "remark": remark, "account": Alias,
"describe": describe, "headImgUrl": headImgUrl, "LabelIDList": tuple(LabelIDList)}
return users
def user_list(self, word=None):
"""
获取联系人列表
:param word 查询关键字可以是用户名昵称备注描述允许拼音
:return: 联系人列表
"""
users = []
sql = (
"SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl,A.LabelIDList "
"FROM Contact A left join ContactHeadImgUrl B on A.UserName==B.usrName "
"ORDER BY A.NickName DESC;")
if word:
sql = sql.replace("ORDER BY A.NickName DESC;",
f"where "
f"A.UserName LIKE '%{word}%' "
f"OR A.NickName LIKE '%{word}%' "
f"OR A.Remark LIKE '%{word}%' "
f"OR A.Alias LIKE '%{word}%' "
f"OR A.QuanPin LIKE LOWER('%{word}%') "
f"OR LOWER(A.PYInitial) LIKE LOWER('%{word}%') "
# f"OR A.Reserved6 LIKE '%{word}%' "
"ORDER BY A.NickName DESC;")
result = self.execute_sql(sql)
if not result:
return []
for row in result:
# 获取wxid,昵称,备注,描述,头像,标签
username, nickname, remark, Alias, describe, headImgUrl, LabelIDList = row
LabelIDList = LabelIDList.split(",") if LabelIDList else []
users.append(
{"wxid": username, "nickname": nickname, "remark": remark, "account": Alias,
"describe": describe, "headImgUrl": headImgUrl if headImgUrl else "",
"LabelIDList": tuple(LabelIDList)})
return users
def user_list_by_label(self, label_id):
"""
获取标签联系人列表
:param label_id: 标签id
:return: 标签联系人列表
"""
users = []
sql = (
"SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl,A.LabelIDList "
"FROM Contact A left join ContactHeadImgUrl B on A.UserName==B.usrName "
f"where A.LabelIDList LIKE '%{label_id}%' "
"ORDER BY A.NickName DESC;")
result = self.execute_sql(sql)
if not result:
return []
for row in result:
# 获取wxid,昵称,备注,描述,头像,标签
username, nickname, remark, Alias, describe, headImgUrl, LabelIDList = row
LabelIDList = LabelIDList.split(",") if LabelIDList else []
users.append(
{"wxid": username, "nickname": nickname, "remark": remark, "account": Alias,
"describe": describe, "headImgUrl": headImgUrl if headImgUrl else "",
"LabelIDList": tuple(LabelIDList)})
return users
def recent_chat_wxid(self):
"""
获取最近聊天的联系人
:return: 最近聊天的联系人
"""
users = []
sql = (
"SELECT C.Username, C.LastReadedCreateTime,C.LastReadedSvrId "
"FROM ChatInfo C WHERE C.LastReadedCreateTime IS NOT NULL AND C.LastReadedCreateTime > 1007911408000 "
"ORDER BY C.LastReadedCreateTime DESC;")
result = self.execute_sql(sql)
if not result:
return []
for row in result:
# 获取用户名、昵称、备注和聊天记录数量
username, LastReadedCreateTime, LastReadedSvrId = row
LastReadedCreateTime = timestamp2str(LastReadedCreateTime) if LastReadedCreateTime else None
users.append(
{"wxid": username, "LastReadedCreateTime": LastReadedCreateTime, "LastReadedSvrId": LastReadedSvrId})
return users
def chatroom_list(self, roomwxid=None):
"""
获取群聊列表
:param MicroMsg_db_path: MicroMsg.db 文件路径
:return: 群聊列表
"""
rooms = []
# 连接 MicroMsg.db 数据库,并执行查询
sql = (
"SELECT A.ChatRoomName,A.UserNameList, A.DisplayNameList,A.RoomData, B.Announcement,B.AnnouncementEditor "
"FROM ChatRoom A,ChatRoomInfo B "
"where A.ChatRoomName==B.ChatRoomName "
"ORDER BY A.ChatRoomName ASC;")
if roomwxid:
sql = sql.replace("ORDER BY A.ChatRoomName ASC;",
f"and A.ChatRoomName LIKE '%{roomwxid}%' "
"ORDER BY A.ChatRoomName ASC;")
result = self.execute_sql(sql)
if not result:
return []
room_datas = []
for row in result:
# 获取用户名、昵称、备注和聊天记录数量
ChatRoomName, UserNameList, DisplayNameList, RoomData, Announcement, AnnouncementEditor = row
UserNameList = UserNameList.split("^G")
DisplayNameList = DisplayNameList.split("^G")
RoomData = self.ChatRoom_RoomData(RoomData)
wxid2remark = {}
if RoomData:
rd = []
for k, v in RoomData.items():
if isinstance(v, list):
rd += v
for i in rd:
try:
if isinstance(i, dict) and isinstance(i.get('1'), str) and i.get('2'):
wxid2remark[i['1']] = i["2"]
except Exception as e:
logging.error(f"wxid2remark: ChatRoomName:{ChatRoomName}, {i} error:{e}")
rooms.append(
{"ChatRoomName": ChatRoomName, "UserNameList": UserNameList, "DisplayNameList": DisplayNameList,
"Announcement": Announcement, "AnnouncementEditor": AnnouncementEditor, "wxid2remark": wxid2remark})
return rooms
def labels_dict(self, id_is_key=True):
"""
读取标签列表
:param label_list:
:return:
"""
sql = "SELECT LabelId, LabelName FROM ContactLabel ORDER BY LabelName ASC;"
result = self.execute_sql(sql)
if not result:
return []
if id_is_key:
labels = {row[0]: row[1] for row in result}
else:
labels = {row[1]: row[0] for row in result}
return labels

View File

@ -1,74 +0,0 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: parsingOpenIMContact.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/16
# -------------------------------------------------------------------------------
from .dbbase import DatabaseBase
class ParsingOpenIMContact(DatabaseBase):
_class_name = "OpenIMContact"
def __init__(self, db_path):
super().__init__(db_path)
def wxid2userinfo(self, wxid):
"""
获取单个联系人信息
:param wxid: 微信id
:return: 联系人信息
"""
if isinstance(wxid, str):
wxid = [wxid]
elif isinstance(wxid, list):
wxid = wxid
else:
return {}
wxid = "','".join(wxid)
wxid = f"'{wxid}'"
# 获取username是wx_id的用户
sql = ("SELECT A.UserName, A.NickName, A.Remark,A.BigHeadImgUrl "
"FROM OpenIMContact A "
f"WHERE A.UserName in ({wxid}) "
"ORDER BY NickName ASC;")
result = self.execute_sql(sql)
if not result:
return {}
users = {}
for row in result:
# 获取用户名、昵称、备注和聊天记录数量
username, nickname, remark, headImgUrl = row
users[username] = {"wxid": username, "nickname": nickname, "remark": remark, "account": "", "describe": "",
"headImgUrl": headImgUrl, "LabelIDList": ()}
return users
def user_list(self, word=None):
"""
获取联系人列表
:param MicroMsg_db_path: MicroMsg.db 文件路径
:return: 联系人列表
"""
sql = ("SELECT A.UserName, A.NickName, A.Remark,A.BigHeadImgUrl FROM OpenIMContact A "
"ORDER BY NickName ASC;")
if word:
sql = sql.replace("ORDER BY NickName ASC;",
f"where "
f"UserName LIKE '%{word}%' "
f"OR NickName LIKE '%{word}%' "
f"OR Remark LIKE '%{word}%' "
"ORDER BY NickName ASC;")
result = self.execute_sql(sql)
if not result:
return []
users = []
for row in result:
# 获取用户名、昵称、备注和聊天记录数量
username, nickname, remark, headImgUrl = row
users.append(
{"wxid": username, "nickname": nickname, "remark": remark, "account": "", "describe": "",
"headImgUrl": headImgUrl, "LabelIDList": ()})
return users

View File

@ -1,94 +0,0 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: parsingPublicMsg.py
# Description:
# Author: xaoyaoo
# Date: 2024/07/03
# -------------------------------------------------------------------------------
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: parsingMSG.py
# Description:
# Author: xaoyaoo
# Date: 2024/04/15
# -------------------------------------------------------------------------------
import json
import os
import re
from typing import Union, Tuple
import pandas as pd
from .dbbase import DatabaseBase
from .parsingMSG import ParsingMSG
from .utils import get_md5, name2typeid, typeid2name, type_converter, timestamp2str, xml2dict, match_BytesExtra
import lz4.block
import blackboxprotobuf
class ParsingPublicMsg(ParsingMSG):
_class_name = "PublicMSG"
def msg_count(self, wxid: str = ""):
"""
获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量不传wxid则获取所有联系人的聊天记录数量
:param MSG_db_path: MSG.db 文件路径
:return: 聊天记录数量列表 {wxid: chat_count}
"""
if wxid:
sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg WHERE StrTalker='{wxid}';"
else:
sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg GROUP BY StrTalker ORDER BY COUNT(*) DESC;"
result = self.execute_sql(sql)
if not result:
return {}
df = pd.DataFrame(result, columns=["wxid", "msg_count"])
# # 排序
df = df.sort_values(by="msg_count", ascending=False)
# chat_counts {wxid: chat_count}
chat_counts = df.set_index("wxid").to_dict()["msg_count"]
return chat_counts
def msg_count_total(self):
"""
获取聊天记录总数
:return: 聊天记录总数
"""
sql = "SELECT COUNT(*) FROM PublicMsg;"
result = self.execute_sql(sql)
if result and len(result) > 0:
chat_counts = result[0][0]
return chat_counts
return 0
def msg_list(self, wxid="", start_index=0, page_size=500, msg_type: str = ""):
sql = (
"SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType, CreateTime, MsgSvrID, "
"DisplayContent, CompressContent, BytesExtra, ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id "
"FROM PublicMsg WHERE 1==1 "
"ORDER BY CreateTime ASC LIMIT ?, ?"
)
params = [start_index, page_size]
if msg_type:
sql = sql.replace("ORDER BY CreateTime ASC LIMIT ?, ?",
f"AND Type=? ORDER BY CreateTime ASC LIMIT ?,?")
params = [msg_type] + params
if wxid:
sql = sql.replace("WHERE 1==1", f"WHERE StrTalker=? ")
params = [wxid] + params
params = tuple(params)
result1 = self.execute_sql(sql, params)
if not result1:
return [], []
data = []
wxid_list = []
for row in result1:
tmpdata = self.msg_detail(row)
wxid_list.append(tmpdata["talker"])
data.append(tmpdata)
wxid_list = list(set(wxid_list))
return data, wxid_list

View File

@ -9,6 +9,19 @@ import os
import subprocess import subprocess
import sys import sys
import time import time
import logging
server_loger = logging.getLogger("server")
def is_port_in_use(_host, _port):
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try:
s.bind((_host, _port))
except socket.error:
return True
return False
def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online=False, debug=False, def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online=False, debug=False,
@ -25,17 +38,17 @@ def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online
:param isopenBrowser: 是否自动打开浏览器 :param isopenBrowser: 是否自动打开浏览器
:return: :return:
""" """
tmp_path = os.path.join(os.getcwd(), "wxdump_tmp") # 临时文件夹,用于存放图片等 work_path = os.path.join(os.getcwd(), "wxdump_work") # 临时文件夹,用于存放图片等
if not os.path.exists(tmp_path): if not os.path.exists(work_path):
os.makedirs(tmp_path) os.makedirs(work_path)
print(f"[+] 创建临时文件夹:{tmp_path}") server_loger.info(f"[+] 创建临时文件夹:{work_path}")
print(f"[+] 创建临时文件夹:{work_path}")
session_file = os.path.join(tmp_path, "conf.json") # 用于存放各种基础信息 conf_auto_file = os.path.join(work_path, "conf_auto.json") # 用于存放各种基础信息
from flask import Flask, g from flask import Flask, g
from flask_cors import CORS from flask_cors import CORS
from pywxdump.api import api, read_session, save_session from pywxdump.api import rs_api, ls_api, get_conf, set_conf
import logging
# 检查端口是否被占用 # 检查端口是否被占用
if online: if online:
@ -49,7 +62,7 @@ def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online
app.config['TIMEOUT'] = 1000 app.config['TIMEOUT'] = 1000
app.secret_key = 'secret_key' app.secret_key = 'secret_key'
app.logger.setLevel(logging.ERROR) app.logger.setLevel(logging.WARNING)
CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True) # 允许所有域名跨域 CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True) # 允许所有域名跨域
@ -63,18 +76,20 @@ def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online
@app.before_request @app.before_request
def before_request(): def before_request():
g.work_path = work_path # 临时文件夹,用于存放图片等-新版本
g.caf = conf_auto_file # 用于存放各种基础信息-新版本
g.at = "auto_setting" # 用于默认设置-新版本
g.tmp_path = tmp_path # 临时文件夹,用于存放图片等 if merge_path: set_conf(conf_auto_file, g.at, "merge_path", merge_path)
g.sf = session_file # 用于存放各种基础信息 if wx_path: set_conf(conf_auto_file, g.at, "wx_path", wx_path)
if key: set_conf(conf_auto_file, g.at, "key", key)
if my_wxid: set_conf(conf_auto_file, g.at, "my_wxid", my_wxid)
if not os.path.exists(conf_auto_file):
set_conf(conf_auto_file, g.at, "last", my_wxid)
if merge_path: save_session(session_file, "test", "merge_path", merge_path) app.register_blueprint(rs_api)
if wx_path: save_session(session_file, "test", "wx_path", wx_path) app.register_blueprint(ls_api)
if key: save_session(session_file, "test", "key", key)
if my_wxid: save_session(session_file, "test", "my_wxid", my_wxid)
if not os.path.exists(session_file):
save_session(session_file, "test", "last", my_wxid)
app.register_blueprint(api)
if isopenBrowser: if isopenBrowser:
try: try:
# 自动打开浏览器 # 自动打开浏览器
@ -87,24 +102,18 @@ def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online
elif sys.platform.startswith('linux'): # Linux elif sys.platform.startswith('linux'): # Linux
subprocess.call(['xdg-open', url]) subprocess.call(['xdg-open', url])
else: else:
server_loger.error(f"Unsupported platform, can't open browser automatically.", exc_info=True)
print("Unsupported platform, can't open browser automatically.") print("Unsupported platform, can't open browser automatically.")
except Exception as e: except Exception as e:
pass server_loger.error(f"自动打开浏览器失败:{e}", exc_info=True)
def is_port_in_use(host, port):
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try:
s.bind((host, port))
except socket.error:
return True
return False
if is_port_in_use(host, port): if is_port_in_use(host, port):
server_loger.error(f"Port {port} is already in use. Choose a different port.")
print(f"Port {port} is already in use. Choose a different port.") print(f"Port {port} is already in use. Choose a different port.")
input("Press Enter to exit...") input("Press Enter to exit...")
else: else:
time.sleep(1) time.sleep(1)
server_loger.info(f"启动flask服务host:port{host}:{port}")
print("[+] 请使用浏览器访问 http://127.0.0.1:5000/ 查看聊天记录") print("[+] 请使用浏览器访问 http://127.0.0.1:5000/ 查看聊天记录")
app.run(host=host, port=port, debug=debug) app.run(host=host, port=port, debug=debug)

View File

@ -5,8 +5,7 @@
# Author: xaoyaoo # Author: xaoyaoo
# Date: 2023/08/21 # Date: 2023/08/21
# ------------------------------------------------------------------------------- # -------------------------------------------------------------------------------
from .get_wx_info import read_info, get_wechat_db, get_core_db from .wx_info import get_wx_info, get_wx_db, get_core_db
from .get_bias_addr import BiasAddr from .get_bias_addr import BiasAddr
from .decryption import batch_decrypt, decrypt from .decryption import batch_decrypt, decrypt
from .merge_db import merge_msg_db, merge_copy_db, merge_media_msg_db, merge_db, decrypt_merge, merge_real_time_db, \ from .merge_db import merge_db, decrypt_merge, merge_real_time_db, all_merge_real_time_db
all_merge_real_time_db

View File

@ -16,9 +16,10 @@ import hashlib
import os import os
from typing import Union, List from typing import Union, List
from Cryptodome.Cipher import AES from Cryptodome.Cipher import AES
# from Crypto.Cipher import AES # 如果上面的导入失败,可以尝试使用这个 # from Crypto.Cipher import AES # 如果上面的导入失败,可以尝试使用这个
from .utils import wx_core_error, wx_core_loger
SQLITE_FILE_HEADER = "SQLite format 3\x00" # SQLite文件头 SQLITE_FILE_HEADER = "SQLite format 3\x00" # SQLite文件头
KEY_SIZE = 32 KEY_SIZE = 32
@ -26,7 +27,8 @@ DEFAULT_PAGESIZE = 4096
# 通过密钥解密数据库 # 通过密钥解密数据库
def decrypt(key: str, db_path, out_path): @wx_core_error
def decrypt(key: str, db_path: str, out_path: str):
""" """
通过密钥解密数据库 通过密钥解密数据库
:param key: 密钥 64位16进制字符串 :param key: 密钥 64位16进制字符串
@ -72,11 +74,19 @@ def decrypt(key: str, db_path, out_path):
return True, [db_path, out_path, key] return True, [db_path, out_path, key]
@wx_core_error
def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_logging: bool = False): def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_print: bool = False):
"""
批量解密数据库
:param key: 密钥 64位16进制字符串
:param db_path: 待解密的数据库路径(文件或文件夹)
:param out_path: 解密后的数据库输出路径(文件夹)
:param is_logging: 是否打印日志
:return: (bool, [[input_db_path, output_db_path, key],...])
"""
if not isinstance(key, str) or not isinstance(out_path, str) or not os.path.exists(out_path) or len(key) != 64: if not isinstance(key, str) or not isinstance(out_path, str) or not os.path.exists(out_path) or len(key) != 64:
error = f"[-] (key:'{key}' or out_path:'{out_path}') Error!" error = f"[-] (key:'{key}' or out_path:'{out_path}') Error!"
if is_logging: print(error) wx_core_loger.error(error, exc_info=True)
return False, error return False, error
process_list = [] process_list = []
@ -84,7 +94,7 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo
if isinstance(db_path, str): if isinstance(db_path, str):
if not os.path.exists(db_path): if not os.path.exists(db_path):
error = f"[-] db_path:'{db_path}' not found!" error = f"[-] db_path:'{db_path}' not found!"
if is_logging: print(error) wx_core_loger.error(error, exc_info=True)
return False, error return False, error
if os.path.isfile(db_path): if os.path.isfile(db_path):
@ -104,7 +114,7 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo
process_list.append([key, inpath, outpath]) process_list.append([key, inpath, outpath])
else: else:
error = f"[-] db_path:'{db_path}' Error " error = f"[-] db_path:'{db_path}' Error "
if is_logging: print(error) wx_core_loger.error(error, exc_info=True)
return False, error return False, error
elif isinstance(db_path, list): elif isinstance(db_path, list):
@ -114,9 +124,9 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo
for inpath in db_path: for inpath in db_path:
if not os.path.exists(inpath): if not os.path.exists(inpath):
erreor = f"[-] db_path:'{db_path}' not found!" error = f"[-] db_path:'{db_path}' not found!"
if is_logging: print(erreor) wx_core_loger.error(error, exc_info=True)
return False, erreor return False, error
inpath = os.path.normpath(inpath) inpath = os.path.normpath(inpath)
rel = os.path.relpath(os.path.dirname(inpath), rt_path) rel = os.path.relpath(os.path.dirname(inpath), rt_path)
@ -126,7 +136,7 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo
process_list.append([key, inpath, outpath]) process_list.append([key, inpath, outpath])
else: else:
error = f"[-] db_path:'{db_path}' Error " error = f"[-] db_path:'{db_path}' Error "
if is_logging: print(error) wx_core_loger.error(error, exc_info=True)
return False, error return False, error
result = [] result = []
@ -139,7 +149,7 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo
if not os.listdir(os.path.join(root, dir)): if not os.listdir(os.path.join(root, dir)):
os.rmdir(os.path.join(root, dir)) os.rmdir(os.path.join(root, dir))
if is_logging: if is_print:
print("=" * 32) print("=" * 32)
success_count = 0 success_count = 0
fail_count = 0 fail_count = 0

View File

@ -151,7 +151,7 @@ class BiasAddr:
return j - module.lpBaseOfDll return j - module.lpBaseOfDll
return 0 return 0
def run(self, logging_path=False, version_list_path=None): def run(self, logging_path=False, WX_OFFS_PATH=None):
if not self.get_process_handle()[0]: if not self.get_process_handle()[0]:
return None return None
mobile_bias = self.search_memory_value(self.mobile, self.module_name) mobile_bias = self.search_memory_value(self.mobile, self.module_name)
@ -164,11 +164,11 @@ class BiasAddr:
rdata = {self.version: [name_bias, account_bias, mobile_bias, 0, key_bias]} rdata = {self.version: [name_bias, account_bias, mobile_bias, 0, key_bias]}
if version_list_path and os.path.exists(version_list_path): if WX_OFFS_PATH and os.path.exists(WX_OFFS_PATH):
with open(version_list_path, "r", encoding="utf-8") as f: with open(WX_OFFS_PATH, "r", encoding="utf-8") as f:
data = json.load(f) data = json.load(f)
data.update(rdata) data.update(rdata)
with open(version_list_path, "w", encoding="utf-8") as f: with open(WX_OFFS_PATH, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4) json.dump(data, f, ensure_ascii=False, indent=4)
if os.path.exists(logging_path) and isinstance(logging_path, str): if os.path.exists(logging_path) and isinstance(logging_path, str):
with open(logging_path, "a", encoding="utf-8") as f: with open(logging_path, "a", encoding="utf-8") as f:

View File

@ -0,0 +1,471 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: merge_db.py
# Description:
# Author: xaoyaoo
# Date: 2023/12/03
# -------------------------------------------------------------------------------
import logging
import os
import shutil
import sqlite3
import subprocess
import time
from typing import List
from .decryption import batch_decrypt
from .wx_info import get_core_db
from .utils import wx_core_loger, wx_core_error
@wx_core_error
def execute_sql(connection, sql, params=None):
"""
执行给定的SQL语句返回结果
参数
- connection SQLite连接
- sql要执行的SQL语句
- paramsSQL语句中的参数
"""
try:
# connection.text_factory = bytes
cursor = connection.cursor()
if params:
cursor.execute(sql, params)
else:
cursor.execute(sql)
return cursor.fetchall()
except Exception as e:
try:
connection.text_factory = bytes
cursor = connection.cursor()
if params:
cursor.execute(sql, params)
else:
cursor.execute(sql)
rdata = cursor.fetchall()
connection.text_factory = str
return rdata
except Exception as e:
wx_core_loger.error(f"**********\nSQL: {sql}\nparams: {params}\n{e}\n**********", exc_info=True)
return None
@wx_core_error
def check_create_sync_log(connection):
"""
检查是否存在表 sync_log,用于记录同步记录包括微信数据库路径表名记录数同步时间
:param connection: SQLite连接
:return: True or False
"""
out_cursor = connection.cursor()
# 检查是否存在表 sync_log,用于记录同步记录,包括微信数据库路径,表名,记录数,同步时间
sync_log_status = execute_sql(connection, "SELECT name FROM sqlite_master WHERE type='table' AND name='sync_log'")
if len(sync_log_status) < 1:
# db_path 微信数据库路径tbl_name 表名src_count 源数据库记录数current_count 当前合并后的数据库对应表记录数
sync_record_create_sql = ("CREATE TABLE sync_log ("
"id INTEGER PRIMARY KEY AUTOINCREMENT,"
"db_path TEXT NOT NULL,"
"tbl_name TEXT NOT NULL,"
"src_count INT,"
"current_count INT,"
"createTime INT DEFAULT (strftime('%s', 'now')), "
"updateTime INT DEFAULT (strftime('%s', 'now'))"
");")
out_cursor.execute(sync_record_create_sql)
# 创建索引
out_cursor.execute("CREATE INDEX idx_sync_log_db_path ON sync_log (db_path);")
out_cursor.execute("CREATE INDEX idx_sync_log_tbl_name ON sync_log (tbl_name);")
# 创建联合索引,防止重复
out_cursor.execute("CREATE UNIQUE INDEX idx_sync_log_db_tbl ON sync_log (db_path, tbl_name);")
connection.commit()
out_cursor.close()
return True
@wx_core_error
def check_create_file_md5(connection):
"""
检查是否存在表 file_md5,用于记录文件信息后续用于去重等操作暂时闲置
"""
pass
@wx_core_error
def merge_db(db_paths: list[dict], save_path: str = "merge.db", is_merge_data: bool = True,
startCreateTime: int = 0, endCreateTime: int = 0):
"""
合并数据库 会忽略主键以及重复的行
:param db_paths: [{"db_path": "xxx", "de_path": "xxx"},...]
db_path表示初始路径de_path表示解密后的路径初始路径用于保存合并的日志情况解密后的路径用于读取数据
:param save_path: str 输出文件路径
:param is_merge_data: bool 是否合并数据(如果为False则只解密并创建表不插入数据)
:param startCreateTime: 开始时间戳 主要用于MSG数据库的合并
:param endCreateTime: 结束时间戳 主要用于MSG数据库的合并
:return:
"""
if os.path.isdir(save_path):
save_path = os.path.join(save_path, f"merge_{int(time.time())}.db")
if isinstance(db_paths, list):
# alias, file_path
databases = {f"MSG{i}": (db['db_path'],
db.get('de_path', db['db_path'])
) for i, db in enumerate(db_paths)
}
else:
raise TypeError("db_paths 类型错误")
outdb = sqlite3.connect(save_path)
is_sync_log = check_create_sync_log(outdb)
if not is_sync_log:
wx_core_loger.warning("创建同步记录表失败")
out_cursor = outdb.cursor()
# 将MSG_db_paths中的数据合并到out_db_path中
for alias, db in databases.items():
db_path = db[0]
de_path = db[1]
# 附加数据库
sql_attach = f"ATTACH DATABASE '{de_path}' AS {alias}"
out_cursor.execute(sql_attach)
outdb.commit()
sql_query_tbl_name = f"SELECT name FROM {alias}.sqlite_master WHERE type='table' ORDER BY name;"
tables = execute_sql(outdb, sql_query_tbl_name)
for table in tables:
table = table[0]
if table == "sqlite_sequence":
continue
# 获取表中的字段名
sql_query_columns = f"PRAGMA table_info({table})"
columns = execute_sql(outdb, sql_query_columns)
col_type = {
(i[1] if isinstance(i[1], str) else i[1].decode(),
i[2] if isinstance(i[2], str) else i[2].decode())
for i in columns}
columns = [i[0] for i in col_type]
if not columns or len(columns) < 1:
continue
# 创建表table
sql_create_tbl = f"CREATE TABLE IF NOT EXISTS {table} AS SELECT * FROM {alias}.{table} WHERE 0 = 1;"
out_cursor.execute(sql_create_tbl)
# 创建包含 NULL 值比较的 UNIQUE 索引
index_name = f"{table}_unique_index"
coalesce_columns = ','.join(f"COALESCE({column}, '')" for column in columns)
sql = f"CREATE UNIQUE INDEX IF NOT EXISTS {index_name} ON {table} ({coalesce_columns})"
out_cursor.execute(sql)
# 插入sync_log
sql_query_sync_log = f"SELECT src_count FROM sync_log WHERE db_path=? AND tbl_name=?"
sync_log = execute_sql(outdb, sql_query_sync_log, (db_path, table))
if not sync_log or len(sync_log) < 1:
sql_insert_sync_log = "INSERT INTO sync_log (db_path, tbl_name, src_count, current_count) VALUES (?, ?, ?, ?)"
out_cursor.execute(sql_insert_sync_log, (db_path, table, 0, 0))
outdb.commit()
if is_merge_data:
# 比较源数据库和合并后的数据库记录数
log_src_count = execute_sql(outdb, sql_query_sync_log, (db_path, table))[0][0]
src_count = execute_sql(outdb, f"SELECT COUNT(*) FROM {alias}.{table}")[0][0]
if src_count <= log_src_count:
wx_core_loger.info(f"忽略 {db_path} {de_path} {table} {src_count} {log_src_count}")
continue
# 构建数据查询sql
sql_base = f"SELECT {','.join([i for i in columns])} FROM {alias}.{table} "
where_clauses, params = [], []
if "CreateTime" in columns:
if startCreateTime > 0:
where_clauses.append("CreateTime > ?")
params.append(startCreateTime)
if endCreateTime > 0:
where_clauses.append("CreateTime < ?")
params.append(endCreateTime)
# 如果有WHERE子句将其添加到SQL语句中并添加ORDER BY子句
sql = f"{sql_base} WHERE {' AND '.join(where_clauses)} ORDER BY CreateTime" if where_clauses else sql_base
src_data = execute_sql(outdb, sql, tuple(params))
if not src_data or len(src_data) < 1:
continue
# 插入数据
sql = f"INSERT OR IGNORE INTO {table} ({','.join([i for i in columns])}) VALUES ({','.join(['?'] * len(columns))})"
try:
out_cursor.executemany(sql, src_data)
# update sync_log
sql_update_sync_log = ("UPDATE sync_log "
"SET src_count = ? ,"
f"current_count=(SELECT COUNT(*) FROM {table}) "
"WHERE db_path=? AND tbl_name=?")
out_cursor.execute(sql_update_sync_log, (src_count, db_path, table))
except Exception as e:
wx_core_loger.error(
f"error: {db_path}\n{de_path}\n{table}\n{sql}\n{src_data}\n{len(src_data)}\n{e}\n",
exc_info=True)
# 分离数据库
sql_detach = f"DETACH DATABASE {alias}"
out_cursor.execute(sql_detach)
outdb.commit()
out_cursor.close()
outdb.close()
return save_path
# @wx_core_error
# def merge_db1(db_paths: list[dict], save_path: str = "merge.db", is_merge_data: bool = True,
# startCreateTime: int = 0, endCreateTime: int = 0):
# """
# 合并数据库 会忽略主键以及重复的行。
# :param db_paths: [{"db_path": "xxx", "de_path": "xxx"},...]
# db_path表示初始路径de_path表示解密后的路径初始路径用于保存合并的日志情况解密后的路径用于读取数据
# :param save_path: str 输出文件路径
# :param is_merge_data: bool 是否合并数据(如果为False则只解密并创建表不插入数据)
# :param startCreateTime: 开始时间戳 主要用于MSG数据库的合并
# :param endCreateTime: 结束时间戳 主要用于MSG数据库的合并
# :return:
# """
# if os.path.isdir(save_path):
# save_path = os.path.join(save_path, f"merge_{int(time.time())}.db")
#
# if isinstance(db_paths, list):
# # alias, file_path
# databases = {f"MSG{i}": (db['db_path'],
# db.get('de_path', db['db_path'])
# ) for i, db in enumerate(db_paths)
# }
# else:
# raise TypeError("db_paths 类型错误")
#
# from sqlalchemy import create_engine, MetaData, Table, select, insert, Column, UniqueConstraint
# from sqlalchemy.orm import sessionmaker
# from sqlalchemy import inspect, PrimaryKeyConstraint
#
# outdb = create_engine(f"sqlite:///{save_path}", echo=False)
#
# # 创建Session实例
# Session = sessionmaker()
# Session.configure(bind=outdb)
# session = Session()
#
# # 将MSG_db_paths中的数据合并到out_db_path中
# for alias, db in databases.items():
# db_path = db[0]
# de_path = db[1]
#
# db_engine = create_engine(f"sqlite:///{de_path}", echo=False)
#
# # 反射源数据库的表结构
# metadata = MetaData()
# metadata.reflect(bind=db_engine)
#
# # 创建表
# outdb_metadata = MetaData()
# inspector = inspect(db_engine)
# table_names = [i for i in inspector.get_table_names() if i not in ["sqlite_sequence"]]
# for table_name in table_names:
# # 创建表table
# columns_list_dict = inspector.get_columns(table_name)
# col_names = [i['name'] for i in columns_list_dict]
# columns = [Column(i['name'], i['type'], primary_key=False) for i in columns_list_dict]
# table = Table(table_name, outdb_metadata, *columns)
# if len(columns) > 1: # 联合索引
# unique_constraint = UniqueConstraint(*col_names, name=f"{table_name}_unique_index")
# table.append_constraint(unique_constraint)
# else:
# table.append_constraint(PrimaryKeyConstraint(*col_names))
# table.create(outdb, checkfirst=True)
#
# # 将源数据库中的数据插入目标数据库
# outdb_metadata = MetaData()
# for table_name in metadata.tables:
# source_table = Table(table_name, metadata, autoload_with=db_engine)
# outdb_table = Table(table_name, outdb_metadata, autoload_with=outdb)
#
# # 查询源表中的所有数据
# query = select(source_table)
# with db_engine.connect() as connection:
# result = connection.execute(query).fetchall()
#
# # 插入到目标表中
# for row in result:
# row_data = row._asdict()
#
# # 尝试将所有文本数据转换为 UTF-8
# for key, value in row_data.items():
# if isinstance(value, str):
# row_data[key] = value.encode("utf-8")
#
# insert_stmt = insert(outdb_table).values(row_data)
# try:
# session.execute(insert_stmt)
# except Exception as e:
# pass
# db_engine.dispose()
#
# # 提交事务
# session.commit()
# # 关闭Session
# session.close()
# outdb.dispose()
# return save_path
@wx_core_error
def decrypt_merge(wx_path: str, key: str, outpath: str = "",
merge_save_path: str = None,
is_merge_data=True, is_del_decrypted: bool = True,
startCreateTime: int = 0, endCreateTime: int = 0,
db_type=None) -> (bool, str):
"""
解密合并数据库 msg.db, microMsg.db, media.db,注意会删除原数据库
:param wx_path: 微信路径 eg: C:\\*******\\WeChat Files\\wxid_*********
:param key: 解密密钥
:param outpath: 输出路径
:param merge_save_path: 合并后的数据库路径
:param is_merge_data: 是否合并数据(如果为False则只解密并创建表不插入数据)
:param is_del_decrypted: 是否删除解密后的数据库除了合并后的数据库
:param startCreateTime: 开始时间戳 主要用于MSG数据库的合并
:param endCreateTime: 结束时间戳 主要用于MSG数据库的合并
:param db_type: 数据库类型从核心数据库中选择
:return: (true,解密后的数据库路径) or (false,错误信息)
"""
if db_type is None:
db_type = []
outpath = outpath if outpath else "decrypt_merge_tmp"
merge_save_path = os.path.join(outpath,
f"merge_{int(time.time())}.db") if merge_save_path is None else merge_save_path
decrypted_path = os.path.join(outpath, "decrypted")
if not wx_path or not key or not os.path.exists(wx_path):
wx_core_loger.error("参数错误", exc_info=True)
return False, "参数错误"
# 解密
code, wxdbpaths = get_core_db(wx_path, db_type)
if not code:
wx_core_loger.error(f"获取数据库路径失败{wxdbpaths}", exc_info=True)
return False, wxdbpaths
# 判断out_path是否为空目录
if os.path.exists(decrypted_path) and os.listdir(decrypted_path):
for root, dirs, files in os.walk(decrypted_path, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
if not os.path.exists(decrypted_path):
os.makedirs(decrypted_path)
wxdbpaths = {i["db_path"]: i for i in wxdbpaths}
# 调用 decrypt 函数,并传入参数 # 解密
code, ret = batch_decrypt(key=key, db_path=list(wxdbpaths.keys()), out_path=decrypted_path, is_print=False)
if not code:
wx_core_loger.error(f"解密失败{ret}", exc_info=True)
return False, ret
out_dbs = []
for code1, ret1 in ret:
if code1:
out_dbs.append(ret1)
parpare_merge_db_path = []
for db_path, out_path, _ in out_dbs:
parpare_merge_db_path.append({"db_path": db_path, "de_path": out_path})
merge_save_path = merge_db(parpare_merge_db_path, merge_save_path, is_merge_data=is_merge_data,
startCreateTime=startCreateTime, endCreateTime=endCreateTime)
if is_del_decrypted:
shutil.rmtree(decrypted_path, True)
if isinstance(merge_save_path, str):
return True, merge_save_path
else:
return False, "未知错误"
@wx_core_error
def merge_real_time_db(key, merge_path: str, db_paths: [dict] or dict):
"""
合并实时数据库消息,暂时只支持64位系统
:param key: 解密密钥
:param db_paths: [dict] or dict eg: {'wxid': 'wxid_***', 'db_type': 'MicroMsg',
'db_path': 'C:\**\wxid_***\Msg\MicroMsg.db', 'wxid_dir': 'C:\***\wxid_***'}
:param merge_path: 合并后的数据库路径
:return:
"""
try:
import platform
except:
raise ImportError("未找到模块 platform")
# 判断系统位数是否为64位如果不是则抛出异常
if platform.architecture()[0] != '64bit':
raise Exception("System is not 64-bit.")
if isinstance(db_paths, dict):
db_paths = [db_paths]
merge_path = os.path.abspath(merge_path) # 合并后的数据库路径,必须为绝对路径
merge_path_base = os.path.dirname(merge_path) # 合并后的数据库路径
if not os.path.exists(merge_path_base):
os.makedirs(merge_path_base)
endbs = []
for db_info in db_paths:
db_path = os.path.abspath(db_info['db_path'])
if not os.path.exists(db_path):
# raise FileNotFoundError("数据库不存在")
continue
if "MSG" not in db_path and "MicroMsg" not in db_path and "MediaMSG" not in db_path:
# raise FileNotFoundError("数据库不是消息数据库") # MicroMsg实时数据库
continue
endbs.append(os.path.abspath(db_path))
endbs = '" "'.join(list(set(endbs)))
# 获取当前文件夹路径
current_path = os.path.dirname(__file__)
real_time_exe_path = os.path.join(current_path, "tools", "realTime.exe")
# 调用cmd命令
cmd = f'{real_time_exe_path} "{key}" "{merge_path}" "{endbs}"'
# os.system(cmd)
p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=merge_path_base,
creationflags=subprocess.CREATE_NO_WINDOW)
# p.communicate()
# 查看返回值
out, err = p.communicate()
if out and out.decode("utf-8").find("SUCCESS") >= 0:
wx_core_loger.info(f"合并实时数据库成功{out}")
return True, merge_path
if err:
wx_core_loger.error(f"合并实时数据库失败\n{out}\n{err}")
return False, err
@wx_core_error
def all_merge_real_time_db(key, wx_path, merge_path: str):
"""
合并所有实时数据库
这是全量合并会有可能产生重复数据需要自行去重
:param key: 解密密钥
:param wx_path: 微信路径
:param merge_path: 合并后的数据库路径 eg: C:\\*******\\WeChat Files\\wxid_*********\\merge.db
:return:
"""
if not merge_path or not key or not wx_path or not wx_path:
return False, "msg_path or media_path or wx_path or key is required"
try:
from pywxdump import get_core_db
except ImportError:
return False, "未找到模块 pywxdump"
db_paths = get_core_db(wx_path, ["MediaMSG", "MSG", "MicroMsg"])
if not db_paths[0]:
return False, db_paths[1]
db_paths = db_paths[1]
merge_real_time_db(key=key, merge_path=merge_path, db_paths=db_paths)
return True, merge_path

View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: __init__.py.py
# Description:
# Author: xaoyaoo
# Date: 2024/07/23
# -------------------------------------------------------------------------------
from .common_utils import verify_key, get_exe_version, get_exe_bit, wx_core_error
from .ctypes_utils import get_process_list, get_memory_maps, get_process_exe_path, \
get_file_version_info
from .memory_search import search_memory
from ._loger import wx_core_loger
DB_TYPE_CORE = ["MSG", "MediaMSG", "MicroMsg", "OpenIMContact", "OpenIMMedia", "OpenIMMsg", "Favorite", "PublicMsg"]

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: _loger.py
# Description:
# Author: xaoyaoo
# Date: 2024/07/23
# -------------------------------------------------------------------------------
import logging
wx_core_loger = logging.getLogger("wx_core")

View File

@ -11,12 +11,20 @@ import hmac
import sys import sys
import traceback import traceback
import hashlib import hashlib
<<<<<<< HEAD:pywxdump/wx_info/utils.py
=======
from ._loger import wx_core_loger
>>>>>>> ta:pywxdump/wx_core/utils/common_utils.py
if sys.platform == "win32": if sys.platform == "win32":
from win32com.client import Dispatch from win32com.client import Dispatch
else: else:
Dispatch = None Dispatch = None
<<<<<<< HEAD:pywxdump/wx_info/utils.py
def info_error(func): def info_error(func):
=======
def wx_core_error(func):
>>>>>>> ta:pywxdump/wx_core/utils/common_utils.py
""" """
错误处理装饰器 错误处理装饰器
:param func: :param func:
@ -26,10 +34,15 @@ def info_error(func):
try: try:
return func(*args, **kwargs) return func(*args, **kwargs)
except Exception as e: except Exception as e:
<<<<<<< HEAD:pywxdump/wx_info/utils.py
traceback_data = traceback.format_exc() traceback_data = traceback.format_exc()
rdata = f"{traceback_data}" rdata = f"{traceback_data}"
print(f"info_error: \n{rdata}") print(f"info_error: \n{rdata}")
return "None" return "None"
=======
wx_core_loger.error(f"wx_core_error: {e}", exc_info=True)
return None
>>>>>>> ta:pywxdump/wx_core/utils/common_utils.py
return wrapper return wrapper
@ -53,7 +66,7 @@ def verify_key(key, wx_db_path):
return False return False
return True return True
@info_error @wx_core_error
def get_exe_version(file_path): def get_exe_version(file_path):
""" """
获取 PE 文件的版本号 获取 PE 文件的版本号

View File

@ -0,0 +1,264 @@
import ctypes
import ctypes.wintypes
from collections import namedtuple
# 定义必要的常量
TH32CS_SNAPPROCESS = 0x00000002
MAX_PATH = 260
PROCESS_QUERY_INFORMATION = 0x0400
PROCESS_VM_READ = 0x0010
# MEMORY_BASIC_INFORMATION 结构体定义
class MEMORY_BASIC_INFORMATION(ctypes.Structure):
_fields_ = [
('BaseAddress', ctypes.wintypes.LPVOID),
('AllocationBase', ctypes.wintypes.LPVOID),
('AllocationProtect', ctypes.wintypes.DWORD),
('RegionSize', ctypes.c_size_t),
('State', ctypes.wintypes.DWORD),
('Protect', ctypes.wintypes.DWORD),
('Type', ctypes.wintypes.DWORD)
]
class MODULEINFO(ctypes.Structure):
_fields_ = [
("lpBaseOfDll", ctypes.c_void_p), # remote pointer
("SizeOfImage", ctypes.c_ulong),
("EntryPoint", ctypes.c_void_p), # remote pointer
]
# 定义PROCESSENTRY32结构
class PROCESSENTRY32(ctypes.Structure):
_fields_ = [("dwSize", ctypes.wintypes.DWORD),
("cntUsage", ctypes.wintypes.DWORD),
("th32ProcessID", ctypes.wintypes.DWORD),
("th32DefaultHeapID", ctypes.POINTER(ctypes.wintypes.ULONG)),
("th32ModuleID", ctypes.wintypes.DWORD),
("cntThreads", ctypes.wintypes.DWORD),
("th32ParentProcessID", ctypes.wintypes.DWORD),
("pcPriClassBase", ctypes.wintypes.LONG),
("dwFlags", ctypes.wintypes.DWORD),
("szExeFile", ctypes.c_char * MAX_PATH)]
class VS_FIXEDFILEINFO(ctypes.Structure):
_fields_ = [
('dwSignature', ctypes.wintypes.DWORD),
('dwStrucVersion', ctypes.wintypes.DWORD),
('dwFileVersionMS', ctypes.wintypes.DWORD),
('dwFileVersionLS', ctypes.wintypes.DWORD),
('dwProductVersionMS', ctypes.wintypes.DWORD),
('dwProductVersionLS', ctypes.wintypes.DWORD),
('dwFileFlagsMask', ctypes.wintypes.DWORD),
('dwFileFlags', ctypes.wintypes.DWORD),
('dwFileOS', ctypes.wintypes.DWORD),
('dwFileType', ctypes.wintypes.DWORD),
('dwFileSubtype', ctypes.wintypes.DWORD),
('dwFileDateMS', ctypes.wintypes.DWORD),
('dwFileDateLS', ctypes.wintypes.DWORD),
]
# 加载dll
kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
psapi = ctypes.WinDLL('psapi', use_last_error=True)
version = ctypes.WinDLL('version', use_last_error=True)
# 创建进程快照
CreateToolhelp32Snapshot = kernel32.CreateToolhelp32Snapshot
CreateToolhelp32Snapshot.argtypes = [ctypes.wintypes.DWORD, ctypes.wintypes.DWORD]
CreateToolhelp32Snapshot.restype = ctypes.wintypes.HANDLE
# 获取第一个进程
Process32First = kernel32.Process32First
Process32First.argtypes = [ctypes.wintypes.HANDLE, ctypes.POINTER(PROCESSENTRY32)]
Process32First.restype = ctypes.wintypes.BOOL
# 获取下一个进程
Process32Next = kernel32.Process32Next
Process32Next.argtypes = [ctypes.wintypes.HANDLE, ctypes.POINTER(PROCESSENTRY32)]
Process32Next.restype = ctypes.wintypes.BOOL
# 关闭句柄
CloseHandle = kernel32.CloseHandle
CloseHandle.argtypes = [ctypes.wintypes.HANDLE]
CloseHandle.restype = ctypes.wintypes.BOOL
# 打开进程
OpenProcess = kernel32.OpenProcess
OpenProcess.argtypes = [ctypes.wintypes.DWORD, ctypes.wintypes.BOOL, ctypes.wintypes.DWORD]
OpenProcess.restype = ctypes.wintypes.HANDLE
# 获取模块文件名
GetModuleFileNameEx = psapi.GetModuleFileNameExA
GetModuleFileNameEx.argtypes = [ctypes.wintypes.HANDLE, ctypes.wintypes.HANDLE, ctypes.c_char_p, ctypes.wintypes.DWORD]
GetModuleFileNameEx.restype = ctypes.wintypes.DWORD
# 获取文件版本信息大小
GetFileVersionInfoSizeW = version.GetFileVersionInfoSizeW
GetFileVersionInfoSizeW.argtypes = [ctypes.wintypes.LPCWSTR, ctypes.POINTER(ctypes.wintypes.DWORD)]
GetFileVersionInfoSizeW.restype = ctypes.wintypes.DWORD
# 获取文件版本信息
GetFileVersionInfoW = version.GetFileVersionInfoW
GetFileVersionInfoW.argtypes = [ctypes.wintypes.LPCWSTR, ctypes.wintypes.DWORD, ctypes.wintypes.DWORD, ctypes.c_void_p]
GetFileVersionInfoW.restype = ctypes.wintypes.BOOL
# 查询文件版本信息
VerQueryValueW = version.VerQueryValueW
VerQueryValueW.argtypes = [ctypes.c_void_p, ctypes.wintypes.LPCWSTR, ctypes.POINTER(ctypes.c_void_p),
ctypes.POINTER(ctypes.wintypes.UINT)]
VerQueryValueW.restype = ctypes.wintypes.BOOL
# 获取模块信息
GetModuleInformation = psapi.GetModuleInformation
GetModuleInformation.argtypes = [ctypes.wintypes.HANDLE, ctypes.wintypes.HMODULE, ctypes.POINTER(MODULEINFO),
ctypes.wintypes.DWORD]
GetModuleInformation.restype = ctypes.c_bool
# 读取进程内存
ReadProcessMemory = ctypes.windll.kernel32.ReadProcessMemory
# 定义VirtualQueryEx函数
VirtualQueryEx = kernel32.VirtualQueryEx
VirtualQueryEx.argtypes = [ctypes.wintypes.HANDLE, ctypes.wintypes.LPCVOID, ctypes.POINTER(MEMORY_BASIC_INFORMATION),
ctypes.c_size_t]
VirtualQueryEx.restype = ctypes.c_size_t
# 获取映射文件名
GetMappedFileName = psapi.GetMappedFileNameA
GetMappedFileName.argtypes = [ctypes.wintypes.HANDLE, ctypes.wintypes.LPVOID, ctypes.c_char_p, ctypes.wintypes.DWORD]
GetMappedFileName.restype = ctypes.wintypes.DWORD
GetMappedFileNameW = psapi.GetMappedFileNameW
GetMappedFileNameW.restype = ctypes.wintypes.DWORD
GetMappedFileNameW.argtypes = [ctypes.wintypes.HANDLE, ctypes.c_void_p, ctypes.wintypes.LPWSTR, ctypes.wintypes.DWORD]
def get_memory_maps(pid):
# 打开进程
access = PROCESS_QUERY_INFORMATION | PROCESS_VM_READ
hProcess = OpenProcess(access, False, pid)
if not hProcess:
return []
memory_maps = []
base_address = 0
mbi = MEMORY_BASIC_INFORMATION()
max_address = 0x7FFFFFFFFFFFFFFF # 64位系统的最大地址
while base_address < max_address:
if VirtualQueryEx(hProcess, base_address, ctypes.byref(mbi), ctypes.sizeof(mbi)) == 0:
break
mapped_file_name = ctypes.create_unicode_buffer(ctypes.wintypes.MAX_PATH)
if GetMappedFileNameW(hProcess, base_address, mapped_file_name, ctypes.wintypes.MAX_PATH) > 0:
file_name = mapped_file_name.value
else:
file_name = None
# module_info = MODULEINFO()
# if GetModuleInformation(hProcess, mbi.BaseAddress, ctypes.byref(module_info), ctypes.sizeof(module_info)):
# file_name = get_file_version_info(module_info.lpBaseOfDll)
memory_maps.append({
'BaseAddress': mbi.BaseAddress,
'RegionSize': mbi.RegionSize,
'State': mbi.State,
'Protect': mbi.Protect,
'Type': mbi.Type,
'FileName': file_name
})
base_address += mbi.RegionSize
CloseHandle(hProcess)
MemMap = namedtuple('MemMap', ['BaseAddress', 'RegionSize', 'State', 'Protect', 'Type', 'FileName'])
return [MemMap(**m) for m in memory_maps]
def get_process_exe_path(process_id):
h_process = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, process_id)
if not h_process:
return None
exe_path = ctypes.create_string_buffer(MAX_PATH)
if GetModuleFileNameEx(h_process, None, exe_path, MAX_PATH) > 0:
CloseHandle(h_process)
return exe_path.value.decode('utf-8', errors='ignore')
else:
CloseHandle(h_process)
return None
def get_file_version_info(file_path):
size = GetFileVersionInfoSizeW(file_path, None)
if size == 0:
return None
res = ctypes.create_string_buffer(size)
if not GetFileVersionInfoW(file_path, 0, size, res):
return None
uLen = ctypes.wintypes.UINT()
lplpBuffer = ctypes.c_void_p()
if not VerQueryValueW(res, r'\\', ctypes.byref(lplpBuffer), ctypes.byref(uLen)):
return None
ffi = ctypes.cast(lplpBuffer, ctypes.POINTER(VS_FIXEDFILEINFO)).contents
if ffi.dwSignature != 0xFEEF04BD:
return None
version = (
(ffi.dwFileVersionMS >> 16) & 0xffff,
ffi.dwFileVersionMS & 0xffff,
(ffi.dwFileVersionLS >> 16) & 0xffff,
ffi.dwFileVersionLS & 0xffff,
)
# f"{version[0]}.{version[1]}.{version[2]}.{version[3]}"
return f"{version[0]}.{version[1]}.{version[2]}.{version[3]}"
def get_process_list():
h_process_snap = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0)
if h_process_snap == ctypes.wintypes.HANDLE(-1).value:
print("Failed to create snapshot")
return []
pe32 = PROCESSENTRY32()
pe32.dwSize = ctypes.sizeof(PROCESSENTRY32)
process_list = []
if not Process32First(h_process_snap, ctypes.byref(pe32)):
print("Failed to get first process")
CloseHandle(h_process_snap)
return []
while True:
# process_path = get_process_exe_path(pe32.th32ProcessID)
process_list.append((pe32.th32ProcessID, pe32.szExeFile.decode('utf-8', errors='ignore')))
if not Process32Next(h_process_snap, ctypes.byref(pe32)):
break
CloseHandle(h_process_snap)
return process_list
if __name__ == "__main__":
processes = get_process_list()
for pid, name in processes:
if name == "WeChat.exe":
# print(f"PID: {pid}, Process Name: {name}, Exe Path: {path}")
# Handle = ctypes.windll.kernel32.OpenProcess(0x1F0FFF, False, pid)
# wechat_base_address = 0
memory_maps = get_memory_maps(pid)
for module in memory_maps:
if module.FileName and 'WeChatWin.dll' in module.FileName:
print(module.BaseAddress)
print(module.FileName)
break
# print(wechat_base_address)
# get_info_with_key(Handle, key_baseaddr, addrLen)

View File

@ -0,0 +1,117 @@
import ctypes
import ctypes.wintypes as wintypes
import logging
import re
import sys
# 定义常量
PROCESS_QUERY_INFORMATION = 0x0400
PROCESS_VM_READ = 0x0010
PAGE_EXECUTE = 0x10
PAGE_EXECUTE_READ = 0x20
PAGE_EXECUTE_READWRITE = 0x40
PAGE_EXECUTE_WRITECOPY = 0x80
PAGE_NOACCESS = 0x01
PAGE_READONLY = 0x02
PAGE_READWRITE = 0x04
PAGE_WRITECOPY = 0x08
PAGE_GUARD = 0x100
PAGE_NOCACHE = 0x200
PAGE_WRITECOMBINE = 0x400
MEM_COMMIT = 0x1000
MEM_FREE = 0x10000
MEM_RESERVE = 0x2000
MEM_DECOMMIT = 0x4000
MEM_RELEASE = 0x8000
# 定义结构体
class MEMORY_BASIC_INFORMATION(ctypes.Structure):
_fields_ = [
("BaseAddress", ctypes.c_void_p),
("AllocationBase", ctypes.c_void_p),
("AllocationProtect", wintypes.DWORD),
("RegionSize", ctypes.c_size_t),
("State", wintypes.DWORD),
("Protect", wintypes.DWORD),
("Type", wintypes.DWORD),
]
# 加载Windows API函数
kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
OpenProcess = kernel32.OpenProcess
OpenProcess.restype = wintypes.HANDLE
OpenProcess.argtypes = [wintypes.DWORD, wintypes.BOOL, wintypes.DWORD]
ReadProcessMemory = kernel32.ReadProcessMemory
VirtualQueryEx = kernel32.VirtualQueryEx
VirtualQueryEx.restype = ctypes.c_size_t
VirtualQueryEx.argtypes = [wintypes.HANDLE, ctypes.c_void_p, ctypes.POINTER(MEMORY_BASIC_INFORMATION), ctypes.c_size_t]
CloseHandle = kernel32.CloseHandle
CloseHandle.restype = wintypes.BOOL
CloseHandle.argtypes = [wintypes.HANDLE]
def search_memory(hProcess, pattern=br'\\Msg\\FTSContact', max_num=100,start_address=0x0,end_address=0x7FFFFFFFFFFFFFFF):
"""
在进程内存中搜索字符串
:param p: 进程ID或者进程句柄
:param pattern: 要搜索的字符串
:param max_num: 最多找到的数量
"""
result = []
# 打开进程
if not hProcess:
raise ctypes.WinError(ctypes.get_last_error())
mbi = MEMORY_BASIC_INFORMATION()
address = start_address
max_address = end_address if sys.maxsize > 2 ** 32 else 0x7fff0000
pattern = re.compile(pattern)
while address < max_address:
if VirtualQueryEx(hProcess, address, ctypes.byref(mbi), ctypes.sizeof(mbi)) == 0:
break
# 读取内存数据
allowed_protections = [PAGE_EXECUTE, PAGE_EXECUTE_READ, PAGE_EXECUTE_READWRITE, PAGE_READWRITE, PAGE_READONLY, ]
if mbi.State != MEM_COMMIT or mbi.Protect not in allowed_protections:
address += mbi.RegionSize
continue
# 使用正确的类型来避免OverflowError
base_address_c = ctypes.c_ulonglong(mbi.BaseAddress)
region_size_c = ctypes.c_size_t(mbi.RegionSize)
page_bytes = ctypes.create_string_buffer(mbi.RegionSize)
bytes_read = ctypes.c_size_t()
if ReadProcessMemory(hProcess, base_address_c, page_bytes, region_size_c, ctypes.byref(bytes_read)) == 0:
address += mbi.RegionSize
continue
# 搜索字符串 re print(page_bytes.raw)
find = [address + match.start() for match in pattern.finditer(page_bytes, re.DOTALL)]
if find:
result.extend(find)
if len(result) >= max_num:
break
address += mbi.RegionSize
return result
if __name__ == '__main__':
# 示例用法
pid = 29320 # 将此替换为你要查询的进程ID
try:
maps = search_memory(pid)
print(len(maps))
for m in maps:
print(hex(m))
except Exception as e:
logging.error(e, exc_info=True)

438
pywxdump/wx_core/wx_info.py Normal file
View File

@ -0,0 +1,438 @@
# -*- coding: utf-8 -*-#
# -------------------------------------------------------------------------------
# Name: getwxinfo.py
# Description:
# Author: xaoyaoo
# Date: 2023/08/21
# -------------------------------------------------------------------------------
import ctypes
import json
import os
import re
import winreg
from typing import List, Union
from .utils import verify_key, get_exe_bit, wx_core_error
from .utils import get_process_list, get_memory_maps, get_process_exe_path, get_file_version_info
from .utils import search_memory
from .utils import wx_core_loger, DB_TYPE_CORE
import ctypes.wintypes as wintypes
# 定义常量
PROCESS_QUERY_INFORMATION = 0x0400
PROCESS_VM_READ = 0x0010
kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
OpenProcess = kernel32.OpenProcess
OpenProcess.restype = wintypes.HANDLE
OpenProcess.argtypes = [wintypes.DWORD, wintypes.BOOL, wintypes.DWORD]
CloseHandle = kernel32.CloseHandle
CloseHandle.restype = wintypes.BOOL
CloseHandle.argtypes = [wintypes.HANDLE]
ReadProcessMemory = kernel32.ReadProcessMemory
void_p = ctypes.c_void_p
# 读取内存中的字符串(key部分)
@wx_core_error
def get_key_by_offs(h_process, address, address_len=8):
array = ctypes.create_string_buffer(address_len)
if ReadProcessMemory(h_process, void_p(address), array, address_len, 0) == 0: return None
address = int.from_bytes(array, byteorder='little') # 逆序转换为int地址key地址
key = ctypes.create_string_buffer(32)
if ReadProcessMemory(h_process, void_p(address), key, 32, 0) == 0: return None
key_string = bytes(key).hex()
return key_string
# 读取内存中的字符串(非key部分)
@wx_core_error
def get_info_string(h_process, address, n_size=64):
array = ctypes.create_string_buffer(n_size)
if ReadProcessMemory(h_process, void_p(address), array, n_size, 0) == 0: return None
array = bytes(array).split(b"\x00")[0] if b"\x00" in array else bytes(array)
text = array.decode('utf-8', errors='ignore')
return text.strip() if text.strip() != "" else None
# 读取内存中的字符串(昵称部分name)
@wx_core_error
def get_info_name(h_process, address, address_len=8, n_size=64):
array = ctypes.create_string_buffer(n_size)
if ReadProcessMemory(h_process, void_p(address), array, n_size, 0) == 0: return None
address1 = int.from_bytes(array[:address_len], byteorder='little') # 逆序转换为int地址key地址
info_name = get_info_string(h_process, address1, n_size)
if info_name != None:
return info_name
array = bytes(array).split(b"\x00")[0] if b"\x00" in array else bytes(array)
text = array.decode('utf-8', errors='ignore')
return text.strip() if text.strip() != "" else None
# 读取内存中的wxid
@wx_core_error
def get_info_wxid(h_process):
find_num = 100
addrs = search_memory(h_process, br'\\Msg\\FTSContact', max_num=find_num)
wxids = []
for addr in addrs:
array = ctypes.create_string_buffer(80)
if ReadProcessMemory(h_process, void_p(addr - 30), array, 80, 0) == 0: return None
array = bytes(array) # .split(b"\\")[0]
array = array.split(b"\\Msg")[0]
array = array.split(b"\\")[-1]
wxids.append(array.decode('utf-8', errors='ignore'))
wxid = max(wxids, key=wxids.count) if wxids else None
return wxid
# 读取内存中的wx_path基于wxid
@wx_core_error
def get_wx_dir_by_wxid(h_process, wxid=""):
find_num = 10
addrs = search_memory(h_process, wxid.encode() + br'\\Msg\\FTSContact', max_num=find_num)
wxid_dir = []
for addr in addrs:
win_addr_len = 260
array = ctypes.create_string_buffer(win_addr_len)
if ReadProcessMemory(h_process, void_p(addr - win_addr_len + 50), array, win_addr_len, 0) == 0: return None
array = bytes(array).split(b"\\Msg")[0]
array = array.split(b"\00")[-1]
wxid_dir.append(array.decode('utf-8', errors='ignore'))
wxid_dir = max(wxid_dir, key=wxid_dir.count) if wxid_dir else None
return wxid_dir
@wx_core_error
def get_wx_dir_by_reg(wxid="all"):
"""
# 读取 wx_dir (微信文件路径) (快)
:param wxid: 微信id
:return: 返回wx_dir,if wxid="all" return wx_dir else return wx_dir/wxid
"""
if not wxid:
return None
w_dir = "MyDocument:"
is_w_dir = False
try:
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r"Software\Tencent\WeChat", 0, winreg.KEY_READ)
value, _ = winreg.QueryValueEx(key, "FileSavePath")
winreg.CloseKey(key)
w_dir = value
is_w_dir = True
except Exception as e:
w_dir = "MyDocument:"
if not is_w_dir:
try:
user_profile = os.environ.get("USERPROFILE")
path_3ebffe94 = os.path.join(user_profile, "AppData", "Roaming", "Tencent", "WeChat", "All Users", "config",
"3ebffe94.ini")
with open(path_3ebffe94, "r", encoding="utf-8") as f:
w_dir = f.read()
is_w_dir = True
except Exception as e:
w_dir = "MyDocument:"
if w_dir == "MyDocument:":
try:
# 打开注册表路径
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
r"Software\Microsoft\Windows\CurrentVersion\Explorer\User Shell Folders")
documents_path = winreg.QueryValueEx(key, "Personal")[0] # 读取文档实际目录路径
winreg.CloseKey(key) # 关闭注册表
documents_paths = os.path.split(documents_path)
if "%" in documents_paths[0]:
w_dir = os.environ.get(documents_paths[0].replace("%", ""))
w_dir = os.path.join(w_dir, os.path.join(*documents_paths[1:]))
# print(1, w_dir)
else:
w_dir = documents_path
except Exception as e:
profile = os.environ.get("USERPROFILE")
w_dir = os.path.join(profile, "Documents")
wx_dir = os.path.join(w_dir, "WeChat Files")
if wxid and wxid != "all":
wxid_dir = os.path.join(wx_dir, wxid)
return wxid_dir if os.path.exists(wxid_dir) else None
return wx_dir if os.path.exists(wx_dir) else None
def get_wx_dir(wxid: str = "", Handle=None):
"""
综合运用多种方法获取wx_path
优先调用 get_wx_dir_by_reg (该方法速度快)
次要调用 get_wx_dir_by_wxid 该方法通过搜索内存进行速度较慢
"""
if wxid:
wx_dir = get_wx_dir_by_reg(wxid) if wxid else None
if wxid is not None and wx_dir is None and Handle: # 通过wxid获取wx_path,如果wx_path为空则通过wxid获取wx_path
wx_dir = get_wx_dir_by_wxid(Handle, wxid=wxid)
else:
wx_dir = get_wx_dir_by_reg()
return wx_dir
@wx_core_error
def get_key_by_mem_search(pid, db_path, addr_len):
"""
获取key
:param pid: 进程id
:param db_path: 微信数据库路径
:param addr_len: 地址长度
:return: 返回key
"""
def read_key_bytes(h_process, address, address_len=8):
array = ctypes.create_string_buffer(address_len)
if ReadProcessMemory(h_process, void_p(address), array, address_len, 0) == 0: return None
address = int.from_bytes(array, byteorder='little') # 逆序转换为int地址key地址
key = ctypes.create_string_buffer(32)
if ReadProcessMemory(h_process, void_p(address), key, 32, 0) == 0: return None
key_bytes = bytes(key)
return key_bytes
phone_type1 = "iphone\x00"
phone_type2 = "android\x00"
phone_type3 = "ipad\x00"
MicroMsg_path = os.path.join(db_path, "MSG", "MicroMsg.db")
start_adress = 0x7FFFFFFFFFFFFFFF
end_adress = 0
memory_maps = get_memory_maps(pid)
for module in memory_maps:
if module.FileName and 'WeChatWin.dll' in module.FileName:
s = module.BaseAddress
e = module.BaseAddress + module.RegionSize
start_adress = s if s < start_adress else start_adress
end_adress = e if e > end_adress else end_adress
hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, pid)
type1_addrs = search_memory(hProcess, phone_type1.encode(), max_num=2, start_address=start_adress,
end_address=end_adress)
type2_addrs = search_memory(hProcess, phone_type2.encode(), max_num=2, start_address=start_adress,
end_address=end_adress)
type3_addrs = search_memory(hProcess, phone_type3.encode(), max_num=2, start_address=start_adress,
end_address=end_adress)
type_addrs = []
if len(type1_addrs) >= 2: type_addrs += type1_addrs
if len(type2_addrs) >= 2: type_addrs += type2_addrs
if len(type3_addrs) >= 2: type_addrs += type3_addrs
if len(type_addrs) == 0: return None
type_addrs.sort() # 从小到大排序
for i in type_addrs[::-1]:
for j in range(i, i - 2000, -addr_len):
key_bytes = read_key_bytes(hProcess, j, addr_len)
if key_bytes == None:
continue
if verify_key(key_bytes, MicroMsg_path):
return key_bytes.hex()
CloseHandle(hProcess)
return None
@wx_core_error
def get_wx_key(key: str = "", wx_dir: str = "", pid=0, addrLen=8):
"""
获取key
:param key: 微信key
:param wx_dir: 微信文件路径
:param pid: 进程id
:param addrLen: 地址长度
:return: 返回key
"""
isKey = verify_key(
bytes.fromhex(key),
os.path.join(wx_dir, "MSG", "MicroMsg.db")) if key is not None and wx_dir is not None else False
if wx_dir is not None and not isKey:
key = get_key_by_mem_search(pid, wx_dir, addrLen)
return key
@wx_core_error
def get_info_details(pid, WX_OFFS: dict = None):
path = get_process_exe_path(pid)
rd = {'pid': pid, 'version': get_file_version_info(path),
"account": None, "mobile": None, "nickname": None, "mail": None,
"wxid": None, "key": None, "wx_dir": None}
try:
bias_list = WX_OFFS.get(rd['version'], None)
Handle = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, pid)
addrLen = get_exe_bit(path) // 8
if not isinstance(bias_list, list) or len(bias_list) <= 4:
wx_core_loger.warning(f"[-] WeChat Current Version Is Not Supported(not get account,mobile,nickname,mail)")
else:
wechat_base_address = 0
memory_maps = get_memory_maps(pid)
for module in memory_maps:
if module.FileName and 'WeChatWin.dll' in module.FileName:
wechat_base_address = module.BaseAddress
rd['version'] = get_file_version_info(module.FileName) if os.path.exists(module.FileName) else rd[
'version']
bias_list = WX_OFFS.get(rd['version'], None)
break
if wechat_base_address != 0:
name_baseaddr = wechat_base_address + bias_list[0]
account_baseaddr = wechat_base_address + bias_list[1]
mobile_baseaddr = wechat_base_address + bias_list[2]
mail_baseaddr = wechat_base_address + bias_list[3]
key_baseaddr = wechat_base_address + bias_list[4]
rd['account'] = get_info_string(Handle, account_baseaddr, 32) if bias_list[1] != 0 else None
rd['mobile'] = get_info_string(Handle, mobile_baseaddr, 64) if bias_list[2] != 0 else None
rd['nickname'] = get_info_name(Handle, name_baseaddr, addrLen, 64) if bias_list[0] != 0 else None
rd['mail'] = get_info_string(Handle, mail_baseaddr, 64) if bias_list[3] != 0 else None
rd['key'] = get_key_by_offs(Handle, key_baseaddr, addrLen) if bias_list[4] != 0 else None
else:
wx_core_loger.warning(f"[-] WeChat WeChatWin.dll Not Found")
rd['wxid'] = get_info_wxid(Handle)
rd['wx_dir'] = get_wx_dir(rd['wxid'], Handle)
rd['key'] = get_wx_key(rd['key'], rd['wx_dir'], rd['pid'], addrLen)
CloseHandle(Handle)
except Exception as e:
wx_core_loger.error(f"[-] WeChat Get Info Error:{e}", exc_info=True)
return rd
# 读取微信信息(account,mobile,nickname,mail,wxid,key)
@wx_core_error
def get_wx_info(WX_OFFS: dict = None, is_print: bool = False, save_path: str = None):
"""
读取微信信息(account,mobile,nickname,mail,wxid,key)
:param WX_OFFS: 版本偏移量
:param is_print: 是否打印结果
:param save_path: 保存路径
:return: 返回微信信息 [{"pid": pid, "version": version, "account": account,
"mobile": mobile, "nickname": nickname, "mail": mail, "wxid": wxid,
"key": key, "wx_dir": wx_dir}, ...]
"""
if WX_OFFS is None:
WX_OFFS = {}
wechat_pids = []
result = []
processes = get_process_list()
for pid, name in processes:
if name == "WeChat.exe":
wechat_pids.append(pid)
if len(wechat_pids) <= 0:
wx_core_loger.error("[-] WeChat No Run")
return result
for pid in wechat_pids:
rd = get_info_details(pid, WX_OFFS)
result.append(rd)
if is_print:
print("=" * 32)
if isinstance(result, str): # 输出报错
print(result)
else: # 输出结果
for i, rlt in enumerate(result):
for k, v in rlt.items():
print(f"[+] {k:>8}: {v if v else 'None'}")
print(end="-" * 32 + "\n" if i != len(result) - 1 else "")
print("=" * 32)
if save_path:
try:
infos = json.load(open(save_path, "r", encoding="utf-8")) if os.path.exists(save_path) else []
except:
infos = []
with open(save_path, "w", encoding="utf-8") as f:
infos += result
json.dump(infos, f, ensure_ascii=False, indent=4)
return result
@wx_core_error
def get_wx_db(msg_dir: str = None,
db_types: Union[List[str], str] = None,
wxids: Union[List[str], str] = None) -> list[dict]:
r"""
获取微信数据库路径
:param msg_dir: 微信数据库目录 eg: C:\Users\user\Documents\WeChat Files 非wxid目录
:param db_types: 需要获取的数据库类型,如果为空,则获取所有数据库
:param wxids: 微信id列表,如果为空,则获取所有wxid下的数据库
:return: [{"wxid": wxid, "db_type": db_type, "db_path": db_path, "wxid_dir": wxid_dir}, ...]
"""
result = []
if not msg_dir or not os.path.exists(msg_dir):
wx_core_loger.warning(f"[-] 微信文件目录不存在: {msg_dir}, 将使用默认路径")
msg_dir = get_wx_dir_by_reg(wxid="all")
if not os.path.exists(msg_dir):
wx_core_loger.error(f"[-] 目录不存在: {msg_dir}", exc_info=True)
return result
wxids = wxids.split(";") if isinstance(wxids, str) else wxids
if not isinstance(wxids, list) or len(wxids) <= 0:
wxids = None
db_types = db_types.split(";") if isinstance(db_types, str) else db_types
if not isinstance(db_types, list) or len(db_types) <= 0:
db_types = None
wxid_dirs = {} # wx用户目录
for sub_dir in os.listdir(msg_dir):
if os.path.isdir(os.path.join(msg_dir, sub_dir)) and sub_dir not in ["All Users", "Applet", "WMPF"]:
wxid_dirs[os.path.basename(sub_dir)] = os.path.join(msg_dir, sub_dir)
for wxid, wxid_dir in wxid_dirs.items():
if wxids and wxid not in wxids: # 如果指定wxid,则过滤掉其他wxid
continue
for root, dirs, files in os.walk(wxid_dir):
for file_name in files:
if not file_name.endswith(".db"):
continue
db_type = re.sub(r"\d*\.db$", "", file_name)
if db_types and db_type not in db_types: # 如果指定db_type,则过滤掉其他db_type
continue
db_path = os.path.join(root, file_name)
result.append({"wxid": wxid, "db_type": db_type, "db_path": db_path, "wxid_dir": wxid_dir})
return result
@wx_core_error
def get_core_db(wx_path: str, db_types: list = None) -> [dict]:
"""
获取聊天消息核心数据库路径
:param wx_path: 微信文件夹路径 egC:\*****\WeChat Files\wxid*******
:param db_types: 数据库类型 eg: DB_TYPE_CORE中选择一个或多个
:return: 返回数据库路径 eg: [{"wxid": wxid, "db_type": db_type, "db_path": db_path, "wxid_dir": wxid_dir}, ...]
"""
if not os.path.exists(wx_path):
return False, f"[-] 目录不存在: {wx_path}"
if not db_types:
db_types = DB_TYPE_CORE
db_types = [dt for dt in db_types if dt in DB_TYPE_CORE]
msg_dir = os.path.dirname(wx_path)
my_wxid = os.path.basename(wx_path)
wxdbpaths = get_wx_db(msg_dir=msg_dir, db_types=db_types, wxids=my_wxid)
if len(wxdbpaths) == 0:
wx_core_loger.error(f"[-] get_core_db 未获取到数据库路径")
return False, "未获取到数据库路径"
return True, wxdbpaths
if __name__ == '__main__':
from pywxdump import WX_OFFS
get_wx_info(WX_OFFS, is_print=True)