diff --git a/pywxdump/version_list.json b/pywxdump/WX_OFFS.json similarity index 100% rename from pywxdump/version_list.json rename to pywxdump/WX_OFFS.json diff --git a/pywxdump/__init__.py b/pywxdump/__init__.py index 3d0df06..f79d71a 100644 --- a/pywxdump/__init__.py +++ b/pywxdump/__init__.py @@ -8,24 +8,23 @@ # from .analyzer.db_parsing import read_img_dat, read_emoji, decompress_CompressContent, read_audio_buf, read_audio, \ # parse_xml_string, read_BytesExtra # from .ui import app_show_chat, get_user_list, export -from .wx_info import BiasAddr, read_info, get_wechat_db, batch_decrypt, decrypt, get_core_db -from .wx_info import merge_copy_db, merge_msg_db, merge_media_msg_db, merge_db, decrypt_merge, merge_real_time_db, \ - all_merge_real_time_db +from .wx_core import BiasAddr, get_wx_info, get_wx_db, batch_decrypt, decrypt, get_core_db +from .wx_core import merge_db, decrypt_merge, merge_real_time_db, all_merge_real_time_db from .analyzer import DBPool -from .dbpreprocess import get_user_list, get_recent_user_list, wxid2userinfo, ParsingMSG, ParsingMicroMsg, \ - ParsingMediaMSG, ParsingOpenIMContact, ParsingFavorite,ParsingPublicMsg +from .db import MsgHandler, MicroHandler, \ + MediaHandler, OpenIMContactHandler, FavoriteHandler, PublicMsgHandler, DBHandler from .server import start_falsk import os, json try: - VERSION_LIST_PATH = os.path.join(os.path.dirname(__file__), "version_list.json") - with open(VERSION_LIST_PATH, "r", encoding="utf-8") as f: - VERSION_LIST = json.load(f) + WX_OFFS_PATH = os.path.join(os.path.dirname(__file__), "WX_OFFS.json") + with open(WX_OFFS_PATH, "r", encoding="utf-8") as f: + WX_OFFS = json.load(f) except: - VERSION_LIST = {} - VERSION_LIST_PATH = None + WX_OFFS = {} + WX_OFFS_PATH = None # PYWXDUMP_ROOT_PATH = os.path.dirname(__file__) # db_init = DBPool("DBPOOL_INIT") -__version__ = "3.0.42" +__version__ = "3.1.0" diff --git a/pywxdump/analyzer/chat_analysis.py b/pywxdump/analyzer/chat_analysis.py index fbbeed1..fdbbff8 100644 --- a/pywxdump/analyzer/chat_analysis.py +++ b/pywxdump/analyzer/chat_analysis.py @@ -10,8 +10,8 @@ import time from collections import Counter import pandas as pd -from pywxdump.dbpreprocess.utils import xml2dict -from pywxdump.dbpreprocess import parsingMSG +from pywxdump.db.utils import xml2dict +from pywxdump.db import dbMSG def date_chat_count(chat_data, interval="W"): """ diff --git a/pywxdump/api/__init__.py b/pywxdump/api/__init__.py index cd594d9..a734459 100644 --- a/pywxdump/api/__init__.py +++ b/pywxdump/api/__init__.py @@ -5,8 +5,9 @@ # Author: xaoyaoo # Date: 2023/12/14 # ------------------------------------------------------------------------------- -from .api import api -from .utils import read_session, save_session +from .remote_server import rs_api +from .local_server import ls_api +from .utils import get_conf, set_conf if __name__ == '__main__': pass diff --git a/pywxdump/api/api.py b/pywxdump/api/api.py deleted file mode 100644 index f496904..0000000 --- a/pywxdump/api/api.py +++ /dev/null @@ -1,948 +0,0 @@ -# -*- coding: utf-8 -*-# -# ------------------------------------------------------------------------------- -# Name: chat_api.py -# Description: -# Author: xaoyaoo -# Date: 2024/01/02 -# ------------------------------------------------------------------------------- -import base64 -import json -import logging -import os -import re -import time -import shutil -import pythoncom -import pywxdump - -from flask import Flask, request, render_template, g, Blueprint, send_file, make_response, session -from pywxdump import get_core_db, all_merge_real_time_db -from pywxdump.api.rjson import ReJson, RqJson -from pywxdump.api.utils import read_session, get_session_wxids, save_session, error9999, gen_base64, validate_title, \ - read_session_local_wxid -from pywxdump import read_info, VERSION_LIST, batch_decrypt, BiasAddr, merge_db, decrypt_merge, merge_real_time_db - -from pywxdump.dbpreprocess import wxid2userinfo, ParsingMSG, get_user_list, get_recent_user_list, ParsingMediaMSG, \ - download_file, export_csv, export_json, ParsingMicroMsg, ParsingPublicMsg -from pywxdump.dbpreprocess.utils import dat2img - -# app = Flask(__name__, static_folder='../ui/web/dist', static_url_path='/') - -api = Blueprint('api', __name__, template_folder='../ui/web', static_folder='../ui/web/assets/', ) -api.debug = False - - -# 以下为初始化相关 ******************************************************************************************************* -@api.route('/api/init_last_local_wxid', methods=["GET", 'POST']) -@error9999 -def init_last_local_wxid(): - """ - 初始化,包括key - :return: - """ - local_wxid = read_session_local_wxid(g.sf) - if local_wxid: - return ReJson(0, {"local_wxids": local_wxid}) - return ReJson(0, {"local_wxids": []}) - - -@api.route('/api/init_last', methods=["GET", 'POST']) -@error9999 -def init_last(): - """ - 是否初始化 - :return: - """ - my_wxid = request.json.get("my_wxid", "") - my_wxid = my_wxid.strip().strip("'").strip('"') if isinstance(my_wxid, str) else "" - if not my_wxid: - my_wxid = read_session(g.sf, "test", "last") - if my_wxid: - save_session(g.sf, "test", "last", my_wxid) - merge_path = read_session(g.sf, my_wxid, "merge_path") - wx_path = read_session(g.sf, my_wxid, "wx_path") - key = read_session(g.sf, my_wxid, "key") - rdata = { - "merge_path": merge_path, - "wx_path": wx_path, - "key": key, - "my_wxid": my_wxid, - "is_init": True, - } - if merge_path and wx_path: - return ReJson(0, rdata) - return ReJson(0, {"is_init": False, "my_wxid": ""}) - - -@api.route('/api/init_key', methods=["GET", 'POST']) -@error9999 -def init_key(): - """ - 初始化,包括key - :return: - """ - wx_path = request.json.get("wx_path", "").strip().strip("'").strip('"') - key = request.json.get("key", "").strip().strip("'").strip('"') - my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"') - if not wx_path: - return ReJson(1002, body=f"wx_path is required: {wx_path}") - if not os.path.exists(wx_path): - return ReJson(1001, body=f"wx_path not exists: {wx_path}") - if not key: - return ReJson(1002, body=f"key is required: {key}") - if not my_wxid: - return ReJson(1002, body=f"my_wxid is required: {my_wxid}") - - old_merge_save_path = read_session(g.sf, my_wxid, "merge_path") - if isinstance(old_merge_save_path, str) and old_merge_save_path and os.path.exists(old_merge_save_path): - pmsg = ParsingMSG(old_merge_save_path) - pmsg.close_all_connection() - - out_path = os.path.join(g.tmp_path, "decrypted", my_wxid) if my_wxid else os.path.join(g.tmp_path, "decrypted") - # 检查文件夹中文件是否被占用 - if os.path.exists(out_path): - try: - shutil.rmtree(out_path) - except PermissionError as e: - # 显示堆栈信息 - logging.error(f"{e}", exc_info=True) - return ReJson(2001, body=str(e)) - - code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=out_path) - time.sleep(1) - if code: - # 移动merge_save_path到g.tmp_path/my_wxid - if not os.path.exists(os.path.join(g.tmp_path, my_wxid)): - os.makedirs(os.path.join(g.tmp_path, my_wxid)) - merge_save_path_new = os.path.join(g.tmp_path, my_wxid, "merge_all.db") - shutil.move(merge_save_path, str(merge_save_path_new)) - - # 删除out_path - if os.path.exists(out_path): - try: - shutil.rmtree(out_path) - except PermissionError as e: - # 显示堆栈信息 - logging.error(f"{e}", exc_info=True) - - save_session(g.sf, my_wxid, "merge_path", merge_save_path_new) - save_session(g.sf, my_wxid, "wx_path", wx_path) - save_session(g.sf, my_wxid, "key", key) - save_session(g.sf, my_wxid, "my_wxid", my_wxid) - save_session(g.sf, "test", "last", my_wxid) - rdata = { - "merge_path": merge_save_path, - "wx_path": wx_path, - "key": key, - "my_wxid": my_wxid, - "is_init": True, - } - return ReJson(0, rdata) - else: - return ReJson(2001, body=merge_save_path) - - -@api.route('/api/init_nokey', methods=["GET", 'POST']) -@error9999 -def init_nokey(): - """ - 初始化,包括key - :return: - """ - merge_path = request.json.get("merge_path", "").strip().strip("'").strip('"') - wx_path = request.json.get("wx_path", "").strip().strip("'").strip('"') - my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"') - - if not wx_path: - return ReJson(1002, body=f"wx_path is required: {wx_path}") - if not os.path.exists(wx_path): - return ReJson(1001, body=f"wx_path not exists: {wx_path}") - if not merge_path: - return ReJson(1002, body=f"merge_path is required: {merge_path}") - if not my_wxid: - return ReJson(1002, body=f"my_wxid is required: {my_wxid}") - - key = read_session(g.sf, my_wxid, "key") - - save_session(g.sf, my_wxid, "merge_path", merge_path) - save_session(g.sf, my_wxid, "wx_path", wx_path) - save_session(g.sf, my_wxid, "key", key) - save_session(g.sf, my_wxid, "my_wxid", my_wxid) - save_session(g.sf, "test", "last", my_wxid) - rdata = { - "merge_path": merge_path, - "wx_path": wx_path, - "key": "", - "my_wxid": my_wxid, - "is_init": True, - } - return ReJson(0, rdata) - - -# END 以上为初始化相关 *************************************************************************************************** - - -# start 以下为聊天联系人相关api ******************************************************************************************* - -@api.route('/api/recent_user_list', methods=["GET", 'POST']) -@error9999 -def recent_user_list(): - """ - 获取联系人列表 - :return: - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - merge_path = read_session(g.sf, my_wxid, "merge_path") - user_list = get_recent_user_list(merge_path, merge_path, limit=200) - return ReJson(0, user_list) - - -@api.route('/api/user_labels_dict', methods=["GET", 'POST']) -@error9999 -def user_labels_dict(): - """ - 获取标签字典 - :return: - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - merge_path = read_session(g.sf, my_wxid, "merge_path") - user_labels_dict = ParsingMicroMsg(merge_path).labels_dict() - return ReJson(0, user_labels_dict) - - -@api.route('/api/user_list', methods=["GET", 'POST']) -@error9999 -def user_list(): - """ - 获取联系人列表 - :return: - """ - if request.method == "GET": - word = request.args.get("word", "") - elif request.method == "POST": - word = request.json.get("word", "") - else: - return ReJson(1003, msg="Unsupported method") - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - merge_path = read_session(g.sf, my_wxid, "merge_path") - user_list = get_user_list(merge_path, merge_path, word) - return ReJson(0, user_list) - - -@api.route('/api/wxid2user', methods=["GET", 'POST']) -@error9999 -def wxid2user(): - """ - 获取联系人列表 - :return: - """ - if request.method == "GET": - word = request.args.get("wxid", "") - elif request.method == "POST": - word = request.json.get("wxid", "") - else: - return ReJson(1003, msg="Unsupported method") - - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - merge_path = read_session(g.sf, my_wxid, "merge_path") - user_info = wxid2userinfo(merge_path, merge_path, wxid=word) - return ReJson(0, user_info) - - -@api.route('/api/mywxid', methods=["GET", 'POST']) -@error9999 -def mywxid(): - """ - 获取我的微信id - :return: - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - return ReJson(0, {"my_wxid": my_wxid}) - - -# end 以上为聊天联系人相关api ********************************************************************************************* - -# start 以下为聊天记录相关api ********************************************************************************************* - -@api.route('/api/realtimemsg', methods=["GET", "POST"]) -@error9999 -def get_real_time_msg(): - """ - 获取实时消息 使用 merge_real_time_db()函数 - :return: - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - - merge_path = read_session(g.sf, my_wxid, "merge_path") - key = read_session(g.sf, my_wxid, "key") - wx_path = read_session(g.sf, my_wxid, "wx_path") - - if not merge_path or not key or not wx_path or not wx_path: - return ReJson(1002, body="msg_path or media_path or wx_path or key is required") - - code, ret = all_merge_real_time_db(key=key, wx_path=wx_path, merge_path=merge_path) - if code: - return ReJson(0, ret) - else: - return ReJson(2001, body=ret) - - -@api.route('/api/msg_count', methods=["GET", 'POST']) -@error9999 -def msg_count(): - """ - 获取联系人的聊天记录数量 - :return: - """ - if request.method == "GET": - wxid = request.args.get("wxid") - elif request.method == "POST": - wxid = request.json.get("wxid") - else: - return ReJson(1003, msg="Unsupported method") - - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - merge_path = read_session(g.sf, my_wxid, "merge_path") - chat_count = ParsingMSG(merge_path).msg_count(wxid) - if None in chat_count: - chat_count = ParsingPublicMsg(merge_path).msg_count(wxid) - return ReJson(0, chat_count) - - -@api.route('/api/imgsrc/', methods=["GET", 'POST']) -def get_imgsrc(imgsrc): - """ - 获取图片,从网络获取图片,主要功能只是下载图片,缓存到本地 - :return: - """ - if not imgsrc: - return ReJson(1002) - - if imgsrc.startswith("FileStorage"): # 如果是本地图片文件则调用get_img - return get_img(imgsrc) - - # 将?后面的参数连接到imgsrc - imgsrc = imgsrc + "?" + request.query_string.decode("utf-8") if request.query_string else imgsrc - - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - - img_tmp_path = os.path.join(g.tmp_path, my_wxid, "imgsrc") - if not os.path.exists(img_tmp_path): - os.makedirs(img_tmp_path) - file_name = imgsrc.replace("http://", "").replace("https://", "").replace("/", "_").replace("?", "_") - file_name = file_name + ".jpg" - # 如果文件名过长,则将文件明分为目录和文件名 - if len(file_name) > 255: - file_name = file_name[:255] + "/" + file_name[255:] - - img_path_all = os.path.join(img_tmp_path, file_name) - if os.path.exists(img_path_all): - return send_file(img_path_all) - else: - download_file(imgsrc, img_path_all) - if os.path.exists(img_path_all): - return send_file(img_path_all) - else: - return ReJson(4004, body=imgsrc) - - -@api.route('/api/img/', methods=["GET", 'POST']) -@error9999 -def get_img(img_path): - """ - 获取图片 - :return: - """ - - if not img_path: - return ReJson(1002) - - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - wx_path = read_session(g.sf, my_wxid, "wx_path") - - img_path = img_path.replace("\\\\", "\\") - - img_tmp_path = os.path.join(g.tmp_path, my_wxid, "img") - original_img_path = os.path.join(wx_path, img_path) - if os.path.exists(original_img_path): - rc, fomt, md5, out_bytes = dat2img(original_img_path) - if not rc: - return ReJson(1001, body=original_img_path) - imgsavepath = os.path.join(img_tmp_path, img_path + "_" + ".".join([md5, fomt])) - if not os.path.exists(os.path.dirname(imgsavepath)): - os.makedirs(os.path.dirname(imgsavepath)) - with open(imgsavepath, "wb") as f: - f.write(out_bytes) - return send_file(imgsavepath) - else: - return ReJson(1001, body=original_img_path) - - -@api.route('/api/msgs', methods=["GET", 'POST']) -@error9999 -def get_msgs(): - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - merge_path = read_session(g.sf, my_wxid, "merge_path") - - start = request.json.get("start") - limit = request.json.get("limit") - wxid = request.json.get("wxid") - - if not wxid: - return ReJson(1002, body=f"wxid is required: {wxid}") - if start and isinstance(start, str) and start.isdigit(): - start = int(start) - if limit and isinstance(limit, str) and limit.isdigit(): - limit = int(limit) - if start is None or limit is None: - return ReJson(1002, body=f"start or limit is required {start} {limit}") - if not isinstance(start, int) and not isinstance(limit, int): - return ReJson(1002, body=f"start or limit is not int {start} {limit}") - - parsing_msg = ParsingMSG(merge_path) - msgs, wxid_list = parsing_msg.msg_list(wxid, start, limit) - if not msgs: - parsing_public_msg = ParsingPublicMsg(merge_path) - msgs, wxid_list = parsing_public_msg.msg_list(wxid, start, limit) - wxid_list.append(my_wxid) - user_list = wxid2userinfo(merge_path, merge_path, wxid_list) - return ReJson(0, {"msg_list": msgs, "user_list": user_list}) - - -@api.route('/api/video/', methods=["GET", 'POST']) -def get_video(videoPath): - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - wx_path = read_session(g.sf, my_wxid, "wx_path") - - videoPath = videoPath.replace("\\\\", "\\") - - video_tmp_path = os.path.join(g.tmp_path, my_wxid, "video") - original_img_path = os.path.join(wx_path, videoPath) - if not os.path.exists(original_img_path): - return ReJson(5002) - # 复制文件到临时文件夹 - video_save_path = os.path.join(video_tmp_path, videoPath) - if not os.path.exists(os.path.dirname(video_save_path)): - os.makedirs(os.path.dirname(video_save_path)) - if os.path.exists(video_save_path): - return send_file(video_save_path) - shutil.copy(original_img_path, video_save_path) - return send_file(original_img_path) - - -@api.route('/api/audio/', methods=["GET", 'POST']) -def get_audio(savePath): - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - merge_path = read_session(g.sf, my_wxid, "merge_path") - - savePath = os.path.join(g.tmp_path, my_wxid, "audio", savePath) # 这个是从url中获取的 - if os.path.exists(savePath): - return send_file(savePath) - - MsgSvrID = savePath.split("_")[-1].replace(".wav", "") - if not savePath: - return ReJson(1002) - - # 判断savePath路径的文件夹是否存在 - if not os.path.exists(os.path.dirname(savePath)): - os.makedirs(os.path.dirname(savePath)) - - parsing_media_msg = ParsingMediaMSG(merge_path) - wave_data = parsing_media_msg.get_audio(MsgSvrID, is_play=False, is_wave=True, save_path=savePath, rate=24000) - if not wave_data: - return ReJson(1001, body="wave_data is required") - - if os.path.exists(savePath): - return send_file(savePath) - else: - return ReJson(4004, body=savePath) - - -@api.route('/api/file_info', methods=["GET", 'POST']) -def get_file_info(): - file_path = request.args.get("file_path") - file_path = request.json.get("file_path", file_path) - if not file_path: - return ReJson(1002) - - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - wx_path = read_session(g.sf, my_wxid, "wx_path") - - all_file_path = os.path.join(wx_path, file_path) - if not os.path.exists(all_file_path): - return ReJson(5002) - file_name = os.path.basename(all_file_path) - file_size = os.path.getsize(all_file_path) - return ReJson(0, {"file_name": file_name, "file_size": str(file_size)}) - - -@api.route('/api/file/', methods=["GET", 'POST']) -def get_file(filePath): - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - wx_path = read_session(g.sf, my_wxid, "wx_path") - - all_file_path = os.path.join(wx_path, filePath) - if not os.path.exists(all_file_path): - return ReJson(5002) - return send_file(all_file_path) - - -# end 以上为聊天记录相关api ********************************************************************************************* - -# start 导出聊天记录 ***************************************************************************************************** - -@api.route('/api/export_endb', methods=["GET", 'POST']) -def get_export_endb(): - """ - 导出加密数据库 - :return: - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - wx_path = read_session(g.sf, my_wxid, "wx_path") - wx_path = request.json.get("wx_path", wx_path) - - if not wx_path: - return ReJson(1002, body=f"wx_path is required: {wx_path}") - if not os.path.exists(wx_path): - return ReJson(1001, body=f"wx_path not exists: {wx_path}") - - # 分割wx_path的文件名和父目录 - code, wxdbpaths = get_core_db(wx_path) - if not code: - return ReJson(2001, body=wxdbpaths) - - outpath = os.path.join(g.tmp_path, "export", my_wxid, "endb") - if not os.path.exists(outpath): - os.makedirs(outpath) - - for wxdb in wxdbpaths: - # 复制wxdb->outpath, os.path.basename(wxdb) - assert isinstance(outpath, str) # 为了解决pycharm的警告, 无实际意义 - shutil.copy(wxdb, os.path.join(outpath, os.path.basename(wxdb))) - return ReJson(0, body=outpath) - - -@api.route('/api/export_dedb', methods=["GET", "POST"]) -def get_export_dedb(): - """ - 导出解密数据库 - :return: - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - - key = request.json.get("key", read_session(g.sf, my_wxid, "key")) - wx_path = request.json.get("wx_path", read_session(g.sf, my_wxid, "wx_path")) - - if not key: - return ReJson(1002, body=f"key is required: {key}") - if not wx_path: - return ReJson(1002, body=f"wx_path is required: {wx_path}") - if not os.path.exists(wx_path): - return ReJson(1001, body=f"wx_path not exists: {wx_path}") - - outpath = os.path.join(g.tmp_path, "export", my_wxid, "dedb") - if not os.path.exists(outpath): - os.makedirs(outpath) - - code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=outpath) - time.sleep(1) - if code: - return ReJson(0, body=merge_save_path) - else: - return ReJson(2001, body=merge_save_path) - - -@api.route('/api/export_csv', methods=["GET", 'POST']) -def get_export_csv(): - """ - 导出csv - :return: - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - - wxid = request.json.get("wxid") - # st_ed_time = request.json.get("datetime", [0, 0]) - if not wxid: - return ReJson(1002, body=f"username is required: {wxid}") - # if not isinstance(st_ed_time, list) or len(st_ed_time) != 2: - # return ReJson(1002, body=f"datetime is required: {st_ed_time}") - # start, end = st_ed_time - # if not isinstance(start, int) or not isinstance(end, int) or start >= end: - # return ReJson(1002, body=f"datetime is required: {st_ed_time}") - - outpath = os.path.join(g.tmp_path, "export", my_wxid, "csv", wxid) - if not os.path.exists(outpath): - os.makedirs(outpath) - - code, ret = export_csv(wxid, outpath, read_session(g.sf, my_wxid, "merge_path")) - if code: - return ReJson(0, ret) - else: - return ReJson(2001, body=ret) - - -@api.route('/api/export_json', methods=["GET", 'POST']) -def get_export_json(): - """ - 导出json - :return: - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - - wxid = request.json.get("wxid") - if not wxid: - return ReJson(1002, body=f"username is required: {wxid}") - - outpath = os.path.join(g.tmp_path, "export", my_wxid, "json", wxid) - if not os.path.exists(outpath): - os.makedirs(outpath) - - code, ret = export_json(wxid, outpath, read_session(g.sf, my_wxid, "merge_path")) - if code: - return ReJson(0, ret) - else: - return ReJson(2001, body=ret) - - -# @api.route('/api/export', methods=["GET", 'POST']) -# @error9999 -# def export(): -# """ -# 导出聊天记录 -# :return: -# """ -# export_type = request.json.get("export_type") -# start_time = request.json.get("start_time", 0) -# end_time = request.json.get("end_time", 0) -# chat_type = request.json.get("chat_type") -# username = request.json.get("username") -# -# wx_path = request.json.get("wx_path", read_session(g.sf, "wx_path")) -# key = request.json.get("key", read_session(g.sf, "key")) -# -# if not export_type or not isinstance(export_type, str): -# return ReJson(1002) -# -# # 导出路径 -# outpath = os.path.join(g.tmp_path, "export", export_type) -# if not os.path.exists(outpath): -# os.makedirs(outpath) -# -# if export_type == "endb": # 导出加密数据库 -# # 获取微信文件夹路径 -# if not wx_path: -# return ReJson(1002) -# if not os.path.exists(wx_path): -# return ReJson(1001, body=wx_path) -# -# # 分割wx_path的文件名和父目录 -# code, wxdbpaths = get_core_db(wx_path) -# if not code: -# return ReJson(2001, body=wxdbpaths) -# -# for wxdb in wxdbpaths: -# # 复制wxdb->outpath, os.path.basename(wxdb) -# shutil.copy(wxdb, os.path.join(outpath, os.path.basename(wxdb))) -# return ReJson(0, body=outpath) -# -# elif export_type == "dedb": -# if isinstance(start_time, int) and isinstance(end_time, int): -# msg_path = read_session(g.sf, "msg_path") -# micro_path = read_session(g.sf, "micro_path") -# media_path = read_session(g.sf, "media_path") -# dbpaths = [msg_path, media_path, micro_path] -# dbpaths = list(set(dbpaths)) -# mergepath = merge_db(dbpaths, os.path.join(outpath, "merge.db"), start_time, end_time) -# return ReJson(0, body=mergepath) -# # if msg_path == media_path and msg_path == media_path: -# # shutil.copy(msg_path, os.path.join(outpath, "merge.db")) -# # return ReJson(0, body=msg_path) -# # else: -# # dbpaths = [msg_path, msg_path, micro_path] -# # dbpaths = list(set(dbpaths)) -# # mergepath = merge_db(dbpaths, os.path.join(outpath, "merge.db"), start_time, end_time) -# # return ReJson(0, body=mergepath) -# else: -# return ReJson(1002, body={"start_time": start_time, "end_time": end_time}) -# -# elif export_type == "csv": -# outpath = os.path.join(outpath, username) -# if not os.path.exists(outpath): -# os.makedirs(outpath) -# code, ret = analyzer.export_csv(username, outpath, read_session(g.sf, "msg_path")) -# if code: -# return ReJson(0, ret) -# else: -# return ReJson(2001, body=ret) -# elif export_type == "json": -# outpath = os.path.join(outpath, username) -# if not os.path.exists(outpath): -# os.makedirs(outpath) -# code, ret = analyzer.export_json(username, outpath, read_session(g.sf, "msg_path")) -# if code: -# return ReJson(0, ret) -# else: -# return ReJson(2001, body=ret) -# elif export_type == "html": -# outpath = os.path.join(outpath, username) -# if os.path.exists(outpath): -# shutil.rmtree(outpath) -# if not os.path.exists(outpath): -# os.makedirs(outpath) -# # chat_type_tups = [] -# # for ct in chat_type: -# # tup = analyzer.get_name_typeid(ct) -# # if tup: -# # chat_type_tups += tup -# # if not chat_type_tups: -# # return ReJson(1002) -# -# # 复制文件 html -# export_html = os.path.join(os.path.dirname(pywxdump.VERSION_LIST_PATH), "ui", "export") -# indexhtml_path = os.path.join(export_html, "index.html") -# assets_path = os.path.join(export_html, "assets") -# if not os.path.exists(indexhtml_path) or not os.path.exists(assets_path): -# return ReJson(1001) -# js_path = "" -# css_path = "" -# for file in os.listdir(assets_path): -# if file.endswith('.js'): -# js_path = os.path.join(assets_path, file) -# elif file.endswith('.css'): -# css_path = os.path.join(assets_path, file) -# else: -# continue -# # 读取html,js,css -# with open(indexhtml_path, 'r', encoding='utf-8') as f: -# html = f.read() -# with open(js_path, 'r', encoding='utf-8') as f: -# js = f.read() -# with open(css_path, 'r', encoding='utf-8') as f: -# css = f.read() -# -# html = re.sub(r'', '', html) # 删除所有的script标签 -# html = re.sub(r'', '', html) # 删除所有的link标签 -# -# html = html.replace('', f'') -# html = html.replace('', f'') -# # END 生成index.html -# -# rdata = func_get_msgs(0, 10000000, username, "", "") -# -# msg_list = rdata["msg_list"] -# for i in range(len(msg_list)): -# if msg_list[i]["type_name"] == "语音": -# savePath = msg_list[i]["content"]["src"] -# MsgSvrID = savePath.split("_")[-1].replace(".wav", "") -# if not savePath: -# continue -# media_path = read_session(g.sf, "media_path") -# wave_data = read_audio(MsgSvrID, is_wave=True, DB_PATH=media_path) -# if not wave_data: -# continue -# # 判断savePath路径的文件夹是否存在 -# savePath = os.path.join(outpath, savePath) -# if not os.path.exists(os.path.dirname(savePath)): -# os.makedirs(os.path.dirname(savePath)) -# with open(savePath, "wb") as f: -# f.write(wave_data) -# elif msg_list[i]["type_name"] == "图片": -# img_path = msg_list[i]["content"]["src"] -# wx_path = read_session(g.sf, "wx_path") -# img_path_all = os.path.join(wx_path, img_path) -# -# if os.path.exists(img_path_all): -# fomt, md5, out_bytes = read_img_dat(img_path_all) -# imgsavepath = os.path.join(outpath, "img", img_path + "_" + ".".join([md5, fomt])) -# if not os.path.exists(os.path.dirname(imgsavepath)): -# os.makedirs(os.path.dirname(imgsavepath)) -# with open(imgsavepath, "wb") as f: -# f.write(out_bytes) -# msg_list[i]["content"]["src"] = os.path.join("img", img_path + "_" + ".".join([md5, fomt])) -# -# rdata["msg_list"] = msg_list -# rdata["myuserdata"] = rdata["user_list"][rdata["my_wxid"]] -# rdata["myuserdata"]["chat_count"] = len(rdata["msg_list"]) -# save_data = rdata -# save_json_path = os.path.join(outpath, "data") -# if not os.path.exists(save_json_path): -# os.makedirs(save_json_path) -# with open(os.path.join(save_json_path, "msg_user.json"), "w", encoding="utf-8") as f: -# json.dump(save_data, f, ensure_ascii=False) -# -# json_base64 = gen_base64(os.path.join(save_json_path, "msg_user.json")) -# html = html.replace('"./data/msg_user.json"', f'"{json_base64}"') -# -# with open(os.path.join(outpath, "index.html"), 'w', encoding='utf-8') as f: -# f.write(html) -# return ReJson(0, outpath) -# -# elif export_type == "pdf": -# pass -# elif export_type == "docx": -# pass -# else: -# return ReJson(1002) -# -# return ReJson(9999, "") - - -# end 导出聊天记录 ******************************************************************************************************* - -# start 聊天记录分析api ************************************************************************************************** - -@api.route('/api/date_count', methods=["GET", 'POST']) -@error9999 -def get_date_count(): - """ - 获取日期统计 - """ - my_wxid = read_session(g.sf, "test", "last") - if not my_wxid: return ReJson(1001, body="my_wxid is required") - merge_path = read_session(g.sf, my_wxid, "merge_path") - date_count = ParsingMSG(merge_path).date_count() - return ReJson(0, date_count) - - -@api.route('/api/wordcloud', methods=["GET", 'POST']) -@error9999 -def wordcloud(): - pass - - -# start 这部分为专业工具的api ********************************************************************************************* - -@api.route('/api/wxinfo', methods=["GET", 'POST']) -@error9999 -def get_wxinfo(): - """ - 获取微信信息 - :return: - """ - import pythoncom - pythoncom.CoInitialize() - wxinfos = read_info(VERSION_LIST) - pythoncom.CoUninitialize() - return ReJson(0, wxinfos) - - -@api.route('/api/biasaddr', methods=["GET", 'POST']) -@error9999 -def biasaddr(): - """ - BiasAddr - :return: - """ - mobile = request.json.get("mobile") - name = request.json.get("name") - account = request.json.get("account") - key = request.json.get("key", "") - wxdbPath = request.json.get("wxdbPath", "") - if not mobile or not name or not account: - return ReJson(1002) - pythoncom.CoInitialize() - rdata = BiasAddr(account, mobile, name, key, wxdbPath).run() - return ReJson(0, str(rdata)) - - -@api.route('/api/decrypt', methods=["GET", 'POST']) -@error9999 -def decrypt(): - """ - 解密 - :return: - """ - key = request.json.get("key") - if not key: - return ReJson(1002) - wxdb_path = request.json.get("wxdbPath") - if not wxdb_path: - return ReJson(1002) - out_path = request.json.get("outPath") - if not out_path: - out_path = g.tmp_path - wxinfos = batch_decrypt(key, wxdb_path, out_path=out_path) - return ReJson(0, str(wxinfos)) - - -@api.route('/api/merge', methods=["GET", 'POST']) -@error9999 -def merge(): - """ - 合并 - :return: - """ - wxdb_path = request.json.get("dbPath") - if not wxdb_path: - return ReJson(1002) - out_path = request.json.get("outPath") - if not out_path: - return ReJson(1002) - rdata = merge_db(wxdb_path, out_path) - return ReJson(0, str(rdata)) - - -# END 这部分为专业工具的api *********************************************************************************************** - -# 关于、帮助、设置 ******************************************************************************************************* -@api.route('/api/check_update', methods=["GET", 'POST']) -@error9999 -def check_update(): - """ - 检查更新 - :return: - """ - url = "https://api.github.com/repos/xaoyaoo/PyWxDump/tags" - try: - import requests - res = requests.get(url) - if res.status_code == 200: - data = res.json() - NEW_VERSION = data[0].get("name") - if NEW_VERSION[1:] != pywxdump.__version__: - msg = "有新版本" - else: - msg = "已经是最新版本" - return ReJson(0, body={"msg": msg, "latest_version": NEW_VERSION, - "latest_url": "https://github.com/xaoyaoo/PyWxDump/releases/tag/" + NEW_VERSION}) - else: - return ReJson(2001, body="status_code is not 200") - except Exception as e: - return ReJson(9999, msg=str(e)) - - -@api.route('/api/version', methods=["GET", 'POST']) -@error9999 -def version(): - """ - 版本 - :return: - """ - return ReJson(0, pywxdump.__version__) - - -# END 关于、帮助、设置 *************************************************************************************************** - - -@api.route('/') -@error9999 -def index(): - return render_template('index.html') diff --git a/pywxdump/api/local_server.py b/pywxdump/api/local_server.py new file mode 100644 index 0000000..812eeeb --- /dev/null +++ b/pywxdump/api/local_server.py @@ -0,0 +1,283 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: local_server.py +# Description: +# Author: xaoyaoo +# Date: 2024/08/01 +# ------------------------------------------------------------------------------- +import base64 +import json +import logging +import os +import re +import time +import shutil +import pythoncom +import pywxdump + +from flask import Flask, request, render_template, g, Blueprint, send_file, make_response, session +from pywxdump import get_core_db, all_merge_real_time_db +from pywxdump.api.rjson import ReJson, RqJson +from pywxdump.api.utils import get_conf, get_conf_wxids, set_conf, error9999, gen_base64, validate_title, \ + get_conf_local_wxid, ls_loger +from pywxdump import get_wx_info, WX_OFFS, batch_decrypt, BiasAddr, merge_db, decrypt_merge, merge_real_time_db + +from pywxdump.db import DBHandler, download_file, export_csv, export_json + +ls_api = Blueprint('ls_api', __name__, template_folder='../ui/web', static_folder='../ui/web/assets/', ) +ls_api.debug = False + + +# 以下为初始化相关 ******************************************************************************************************* + +@ls_api.route('/api/ls/init_last_local_wxid', methods=["GET", 'POST']) +@error9999 +def init_last_local_wxid(): + """ + 初始化,包括key + :return: + """ + local_wxid = get_conf_local_wxid(g.caf) + local_wxid.remove(g.at) + if local_wxid: + return ReJson(0, {"local_wxids": local_wxid}) + return ReJson(0, {"local_wxids": []}) + + +@ls_api.route('/api/ls/init_last', methods=["GET", 'POST']) +@error9999 +def init_last(): + """ + 是否初始化 + :return: + """ + my_wxid = request.json.get("my_wxid", "") + my_wxid = my_wxid.strip().strip("'").strip('"') if isinstance(my_wxid, str) else "" + if not my_wxid: + my_wxid = get_conf(g.caf, "auto_setting", "last") + if my_wxid: + set_conf(g.caf, "auto_setting", "last", my_wxid) + merge_path = get_conf(g.caf, my_wxid, "merge_path") + wx_path = get_conf(g.caf, my_wxid, "wx_path") + key = get_conf(g.caf, my_wxid, "key") + rdata = { + "merge_path": merge_path, + "wx_path": wx_path, + "key": key, + "my_wxid": my_wxid, + "is_init": True, + } + if merge_path and wx_path: + return ReJson(0, rdata) + return ReJson(0, {"is_init": False, "my_wxid": ""}) + + +@ls_api.route('/api/ls/init_key', methods=["GET", 'POST']) +@error9999 +def init_key(): + """ + 初始化,包括key + :return: + """ + wx_path = request.json.get("wx_path", "").strip().strip("'").strip('"') + key = request.json.get("key", "").strip().strip("'").strip('"') + my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"') + if not wx_path: + return ReJson(1002, body=f"wx_path is required: {wx_path}") + if not os.path.exists(wx_path): + return ReJson(1001, body=f"wx_path not exists: {wx_path}") + if not key: + return ReJson(1002, body=f"key is required: {key}") + if not my_wxid: + return ReJson(1002, body=f"my_wxid is required: {my_wxid}") + + # db_config = get_conf(g.caf, my_wxid, "db_config") + # if isinstance(db_config, dict) and db_config and os.path.exists(db_config.get("path")): + # pmsg = DBHandler(db_config) + # # pmsg.close_all_connection() + + out_path = os.path.join(g.work_path, "decrypted", my_wxid) if my_wxid else os.path.join(g.work_path, "decrypted") + # 检查文件夹中文件是否被占用 + if os.path.exists(out_path): + try: + shutil.rmtree(out_path) + except PermissionError as e: + # 显示堆栈信息 + ls_loger.error(f"{e}", exc_info=True) + return ReJson(2001, body=str(e)) + + code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=str(out_path)) + time.sleep(1) + if code: + # 移动merge_save_path到g.work_path/my_wxid + if not os.path.exists(os.path.join(g.work_path, my_wxid)): + os.makedirs(os.path.join(g.work_path, my_wxid)) + merge_save_path_new = os.path.join(g.work_path, my_wxid, "merge_all.db") + shutil.move(merge_save_path, str(merge_save_path_new)) + + # 删除out_path + if os.path.exists(out_path): + try: + shutil.rmtree(out_path) + except PermissionError as e: + # 显示堆栈信息 + ls_loger.error(f"{e}", exc_info=True) + db_config = { + "key": "merge_all", + "type": "sqlite", + "path": merge_save_path_new + } + set_conf(g.caf, my_wxid, "db_config", db_config) + set_conf(g.caf, my_wxid, "merge_path", merge_save_path_new) + set_conf(g.caf, my_wxid, "wx_path", wx_path) + set_conf(g.caf, my_wxid, "key", key) + set_conf(g.caf, my_wxid, "my_wxid", my_wxid) + set_conf(g.caf, "auto_setting", "last", my_wxid) + rdata = { + "merge_path": merge_save_path_new, + "wx_path": wx_path, + "key": key, + "my_wxid": my_wxid, + "is_init": True, + } + return ReJson(0, rdata) + else: + return ReJson(2001, body=merge_save_path) + + +@ls_api.route('/api/ls/init_nokey', methods=["GET", 'POST']) +@error9999 +def init_nokey(): + """ + 初始化,包括key + :return: + """ + merge_path = request.json.get("merge_path", "").strip().strip("'").strip('"') + wx_path = request.json.get("wx_path", "").strip().strip("'").strip('"') + my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"') + + if not wx_path: + return ReJson(1002, body=f"wx_path is required: {wx_path}") + if not os.path.exists(wx_path): + return ReJson(1001, body=f"wx_path not exists: {wx_path}") + if not merge_path: + return ReJson(1002, body=f"merge_path is required: {merge_path}") + if not my_wxid: + return ReJson(1002, body=f"my_wxid is required: {my_wxid}") + + key = get_conf(g.caf, my_wxid, "key") + + set_conf(g.caf, my_wxid, "merge_path", merge_path) + set_conf(g.caf, my_wxid, "wx_path", wx_path) + set_conf(g.caf, my_wxid, "key", key) + set_conf(g.caf, my_wxid, "my_wxid", my_wxid) + set_conf(g.caf, "test", "last", my_wxid) + rdata = { + "merge_path": merge_path, + "wx_path": wx_path, + "key": "", + "my_wxid": my_wxid, + "is_init": True, + } + return ReJson(0, rdata) + + +# END 以上为初始化相关 *************************************************************************************************** + + +@ls_api.route('/api/ls/realtimemsg', methods=["GET", "POST"]) +@error9999 +def get_real_time_msg(): + """ + 获取实时消息 使用 merge_real_time_db()函数 + :return: + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + + merge_path = get_conf(g.caf, my_wxid, "merge_path") + key = get_conf(g.caf, my_wxid, "key") + wx_path = get_conf(g.caf, my_wxid, "wx_path") + + if not merge_path or not key or not wx_path or not wx_path: + return ReJson(1002, body="msg_path or media_path or wx_path or key is required") + + code, ret = all_merge_real_time_db(key=key, wx_path=wx_path, merge_path=merge_path) + if code: + return ReJson(0, ret) + else: + return ReJson(2001, body=ret) + + +# start 这部分为专业工具的api ********************************************************************************************* + +@ls_api.route('/api/ls/wxinfo', methods=["GET", 'POST']) +@error9999 +def get_wxinfo(): + """ + 获取微信信息 + :return: + """ + import pythoncom + pythoncom.CoInitialize() + wxinfos = get_wx_info(WX_OFFS) + pythoncom.CoUninitialize() + return ReJson(0, wxinfos) + + +@ls_api.route('/api/ls/biasaddr', methods=["GET", 'POST']) +@error9999 +def biasaddr(): + """ + BiasAddr + :return: + """ + mobile = request.json.get("mobile") + name = request.json.get("name") + account = request.json.get("account") + key = request.json.get("key", "") + wxdbPath = request.json.get("wxdbPath", "") + if not mobile or not name or not account: + return ReJson(1002) + pythoncom.CoInitialize() + rdata = BiasAddr(account, mobile, name, key, wxdbPath).run() + return ReJson(0, str(rdata)) + + +@ls_api.route('/api/ls/decrypt', methods=["GET", 'POST']) +@error9999 +def decrypt(): + """ + 解密 + :return: + """ + key = request.json.get("key") + if not key: + return ReJson(1002) + wxdb_path = request.json.get("wxdbPath") + if not wxdb_path: + return ReJson(1002) + out_path = request.json.get("outPath") + if not out_path: + out_path = g.tmp_path + wxinfos = batch_decrypt(key, wxdb_path, out_path=out_path) + return ReJson(0, str(wxinfos)) + + +@ls_api.route('/api/ls/merge', methods=["GET", 'POST']) +@error9999 +def merge(): + """ + 合并 + :return: + """ + wxdb_path = request.json.get("dbPath") + if not wxdb_path: + return ReJson(1002) + out_path = request.json.get("outPath") + if not out_path: + return ReJson(1002) + rdata = merge_db(wxdb_path, out_path) + return ReJson(0, str(rdata)) + +# END 这部分为专业工具的api *********************************************************************************************** diff --git a/pywxdump/api/remote_server.py b/pywxdump/api/remote_server.py new file mode 100644 index 0000000..0725e34 --- /dev/null +++ b/pywxdump/api/remote_server.py @@ -0,0 +1,514 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: chat_api.py +# Description: +# Author: xaoyaoo +# Date: 2024/01/02 +# ------------------------------------------------------------------------------- +import base64 +import json +import logging +import os +import re +import time +import shutil +import pythoncom +import pywxdump + +from flask import Flask, request, render_template, g, Blueprint, send_file, make_response, session +from pywxdump import get_core_db, all_merge_real_time_db +from pywxdump.api.rjson import ReJson, RqJson +from pywxdump.api.utils import get_conf, get_conf_wxids, set_conf, error9999, gen_base64, validate_title, \ + get_conf_local_wxid +from pywxdump import get_wx_info, WX_OFFS, batch_decrypt, BiasAddr, merge_db, decrypt_merge, merge_real_time_db + +from pywxdump.db import DBHandler, download_file, export_csv, export_json +from pywxdump.db.utils import dat2img + +# app = Flask(__name__, static_folder='../ui/web/dist', static_url_path='/') + +rs_api = Blueprint('rs_api', __name__, template_folder='../ui/web', static_folder='../ui/web/assets/', ) +rs_api.debug = False + + +# 是否初始化 +@rs_api.route('/api/rs/is_init', methods=["GET", 'POST']) +@error9999 +def is_init(): + """ + 是否初始化 + :return: + """ + local_wxids = get_conf_local_wxid(g.caf) + if len(local_wxids) > 1: + return ReJson(0, True) + return ReJson(0, False) + + +# start 以下为聊天联系人相关api ******************************************************************************************* + +@rs_api.route('/api/rs/mywxid', methods=["GET", 'POST']) +@error9999 +def mywxid(): + """ + 获取我的微信id + :return: + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + return ReJson(0, {"my_wxid": my_wxid}) + + +@rs_api.route('/api/rs/user_session_list', methods=["GET", 'POST']) +@error9999 +def user_session_list(): + """ + 获取联系人列表 + :return: + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + db_config = get_conf(g.caf, my_wxid, "db_config") + db = DBHandler(db_config) + ret = db.get_session_list() + return ReJson(0, list(ret.values())) + + +@rs_api.route('/api/rs/user_labels_dict', methods=["GET", 'POST']) +@error9999 +def user_labels_dict(): + """ + 获取标签字典 + :return: + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + db_config = get_conf(g.caf, my_wxid, "db_config") + db = DBHandler(db_config) + user_labels_dict = db.get_labels() + return ReJson(0, user_labels_dict) + + +@rs_api.route('/api/rs/user_list', methods=["GET", 'POST']) +@error9999 +def user_list(): + """ + 获取联系人列表,可用于搜索 + :return: + """ + if request.method == "GET": + word = request.args.get("word", "") + wxids = request.args.get("wxids", []) + labels = request.args.get("labels", []) + elif request.method == "POST": + word = request.json.get("word", "") + wxids = request.json.get("wxids", []) + labels = request.json.get("labels", []) + else: + return ReJson(1003, msg="Unsupported method") + + if isinstance(wxids, str) and wxids == '' or wxids is None: wxids = [] + if isinstance(labels, str) and labels == '' or labels is None: labels = [] + + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + db_config = get_conf(g.caf, my_wxid, "db_config") + db = DBHandler(db_config) + users = db.get_user(word, wxids, labels) + return ReJson(0, users) + + +# end 以上为聊天联系人相关api ********************************************************************************************* + +# start 以下为聊天记录相关api ********************************************************************************************* + + +@rs_api.route('/api/rs/imgsrc/', methods=["GET", 'POST']) +@error9999 +def get_imgsrc(imgsrc): + """ + 获取图片, + 1. 从网络获取图片,主要功能只是下载图片,缓存到本地 + 2. 读取本地图片 + :return: + """ + if not imgsrc: + return ReJson(1002) + if imgsrc.startswith("FileStorage"): # 如果是本地图片文件则调用get_img + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + wx_path = get_conf(g.caf, my_wxid, "wx_path") + + img_path = imgsrc.replace("\\\\", "\\") + + img_tmp_path = os.path.join(g.work_path, my_wxid, "img") + original_img_path = os.path.join(wx_path, img_path) + if os.path.exists(original_img_path): + rc, fomt, md5, out_bytes = dat2img(original_img_path) + if not rc: + return ReJson(1001, body=original_img_path) + imgsavepath = os.path.join(str(img_tmp_path), img_path + "_" + ".".join([md5, fomt])) + if not os.path.exists(os.path.dirname(imgsavepath)): + os.makedirs(os.path.dirname(imgsavepath)) + with open(imgsavepath, "wb") as f: + f.write(out_bytes) + return send_file(imgsavepath) + else: + return ReJson(1001, body=original_img_path) + elif imgsrc.startswith("http://") or imgsrc.startswith("https://"): + # 将?后面的参数连接到imgsrc + imgsrc = imgsrc + "?" + request.query_string.decode("utf-8") if request.query_string else imgsrc + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + + img_tmp_path = os.path.join(g.work_path, my_wxid, "imgsrc") + if not os.path.exists(img_tmp_path): + os.makedirs(img_tmp_path) + file_name = imgsrc.replace("http://", "").replace("https://", "").replace("/", "_").replace("?", "_") + file_name = file_name + ".jpg" + # 如果文件名过长,则将文件明分为目录和文件名 + if len(file_name) > 255: + file_name = file_name[:255] + "/" + file_name[255:] + + img_path_all = os.path.join(str(img_tmp_path), file_name) + if os.path.exists(img_path_all): + return send_file(img_path_all) + else: + download_file(imgsrc, img_path_all) + if os.path.exists(img_path_all): + return send_file(img_path_all) + else: + return ReJson(4004, body=imgsrc) + else: + return ReJson(1002, body=imgsrc) + + +@rs_api.route('/api/rs/msg_count', methods=["GET", 'POST']) +@error9999 +def msg_count(): + """ + 获取联系人的聊天记录数量 + :return: + """ + if request.method == "GET": + wxid = request.args.get("wxids", []) + elif request.method == "POST": + wxid = request.json.get("wxids", []) + else: + return ReJson(1003, msg="Unsupported method") + + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + db_config = get_conf(g.caf, my_wxid, "db_config") + db = DBHandler(db_config) + chat_count = db.get_msg_count(wxid) + chat_count1 = db.get_plc_msg_count(wxid) + # 合并两个字典,相同key,则将value相加 + count = {k: chat_count.get(k, 0) + chat_count1.get(k, 0) for k in + list(set(list(chat_count.keys()) + list(chat_count1.keys())))} + return ReJson(0, count) + + +@rs_api.route('/api/rs/msg_list', methods=["GET", 'POST']) +@error9999 +def get_msgs(): + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + db_config = get_conf(g.caf, my_wxid, "db_config") + + start = request.json.get("start") + limit = request.json.get("limit") + wxid = request.json.get("wxid") + + if not wxid: + return ReJson(1002, body=f"wxid is required: {wxid}") + if start and isinstance(start, str) and start.isdigit(): + start = int(start) + if limit and isinstance(limit, str) and limit.isdigit(): + limit = int(limit) + if start is None or limit is None: + return ReJson(1002, body=f"start or limit is required {start} {limit}") + if not isinstance(start, int) and not isinstance(limit, int): + return ReJson(1002, body=f"start or limit is not int {start} {limit}") + + db = DBHandler(db_config) + msgs, wxid_list = db.get_msg_list(wxid, start, limit) + if not msgs: + msgs, wxid_list = db.get_plc_msg_list(wxid, start, limit) + wxid_list.append(my_wxid) + user_list = db.get_user_list(wxids=wxid_list) + return ReJson(0, {"msg_list": msgs, "user_list": user_list}) + + +@rs_api.route('/api/rs/video/', methods=["GET", 'POST']) +def get_video(videoPath): + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + wx_path = get_conf(g.caf, my_wxid, "wx_path") + + videoPath = videoPath.replace("\\\\", "\\") + + video_tmp_path = os.path.join(g.work_path, my_wxid, "video") + original_img_path = os.path.join(wx_path, videoPath) + if not os.path.exists(original_img_path): + return ReJson(5002) + # 复制文件到临时文件夹 + video_save_path = os.path.join(video_tmp_path, videoPath) + if not os.path.exists(os.path.dirname(video_save_path)): + os.makedirs(os.path.dirname(video_save_path)) + if os.path.exists(video_save_path): + return send_file(video_save_path) + shutil.copy(original_img_path, video_save_path) + return send_file(original_img_path) + + +@rs_api.route('/api/rs/audio/', methods=["GET", 'POST']) +def get_audio(savePath): + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + merge_path = get_conf(g.caf, my_wxid, "merge_path") + + savePath = os.path.join(g.work_path, my_wxid, "audio", savePath) # 这个是从url中获取的 + if os.path.exists(savePath): + return send_file(savePath) + + MsgSvrID = savePath.split("_")[-1].replace(".wav", "") + if not savePath: + return ReJson(1002) + + # 判断savePath路径的文件夹是否存在 + if not os.path.exists(os.path.dirname(savePath)): + os.makedirs(os.path.dirname(savePath)) + + parsing_media_msg = MediaHandler(merge_path) + wave_data = parsing_media_msg.get_audio(MsgSvrID, is_play=False, is_wave=True, save_path=savePath, rate=24000) + if not wave_data: + return ReJson(1001, body="wave_data is required") + + if os.path.exists(savePath): + return send_file(savePath) + else: + return ReJson(4004, body=savePath) + + +@rs_api.route('/api/rs/file_info', methods=["GET", 'POST']) +def get_file_info(): + file_path = request.args.get("file_path") + file_path = request.json.get("file_path", file_path) + if not file_path: + return ReJson(1002) + + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + wx_path = get_conf(g.caf, my_wxid, "wx_path") + + all_file_path = os.path.join(wx_path, file_path) + if not os.path.exists(all_file_path): + return ReJson(5002) + file_name = os.path.basename(all_file_path) + file_size = os.path.getsize(all_file_path) + return ReJson(0, {"file_name": file_name, "file_size": str(file_size)}) + + +@rs_api.route('/api/rs/file/', methods=["GET", 'POST']) +def get_file(filePath): + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + wx_path = get_conf(g.caf, my_wxid, "wx_path") + + all_file_path = os.path.join(wx_path, filePath) + if not os.path.exists(all_file_path): + return ReJson(5002) + return send_file(all_file_path) + + +# end 以上为聊天记录相关api ********************************************************************************************* + +# start 导出聊天记录 ***************************************************************************************************** + +@rs_api.route('/api/rs/export_endb', methods=["GET", 'POST']) +def get_export_endb(): + """ + 导出加密数据库 + :return: + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + wx_path = get_conf(g.caf, my_wxid, "wx_path") + wx_path = request.json.get("wx_path", wx_path) + + if not wx_path: + return ReJson(1002, body=f"wx_path is required: {wx_path}") + if not os.path.exists(wx_path): + return ReJson(1001, body=f"wx_path not exists: {wx_path}") + + # 分割wx_path的文件名和父目录 + code, wxdbpaths = get_core_db(wx_path) + if not code: + return ReJson(2001, body=wxdbpaths) + + outpath = os.path.join(g.work_path, "export", my_wxid, "endb") + if not os.path.exists(outpath): + os.makedirs(outpath) + + for wxdb in wxdbpaths: + # 复制wxdb->outpath, os.path.basename(wxdb) + assert isinstance(outpath, str) # 为了解决pycharm的警告, 无实际意义 + shutil.copy(wxdb, os.path.join(outpath, os.path.basename(wxdb))) + return ReJson(0, body=outpath) + + +@rs_api.route('/api/rs/export_dedb', methods=["GET", "POST"]) +def get_export_dedb(): + """ + 导出解密数据库 + :return: + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + + key = request.json.get("key", get_conf(g.caf, my_wxid, "key")) + wx_path = request.json.get("wx_path", get_conf(g.caf, my_wxid, "wx_path")) + + if not key: + return ReJson(1002, body=f"key is required: {key}") + if not wx_path: + return ReJson(1002, body=f"wx_path is required: {wx_path}") + if not os.path.exists(wx_path): + return ReJson(1001, body=f"wx_path not exists: {wx_path}") + + outpath = os.path.join(g.work_path, "export", my_wxid, "dedb") + if not os.path.exists(outpath): + os.makedirs(outpath) + + code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=outpath) + time.sleep(1) + if code: + return ReJson(0, body=merge_save_path) + else: + return ReJson(2001, body=merge_save_path) + + +@rs_api.route('/api/rs/export_csv', methods=["GET", 'POST']) +def get_export_csv(): + """ + 导出csv + :return: + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + db_config = get_conf(g.caf, my_wxid, "db_config") + + wxid = request.json.get("wxid") + # st_ed_time = request.json.get("datetime", [0, 0]) + if not wxid: + return ReJson(1002, body=f"username is required: {wxid}") + # if not isinstance(st_ed_time, list) or len(st_ed_time) != 2: + # return ReJson(1002, body=f"datetime is required: {st_ed_time}") + # start, end = st_ed_time + # if not isinstance(start, int) or not isinstance(end, int) or start >= end: + # return ReJson(1002, body=f"datetime is required: {st_ed_time}") + + outpath = os.path.join(g.work_path, "export", my_wxid, "csv", wxid) + if not os.path.exists(outpath): + os.makedirs(outpath) + + code, ret = export_csv(wxid, outpath, db_config) + if code: + return ReJson(0, ret) + else: + return ReJson(2001, body=ret) + + +@rs_api.route('/api/rs/export_json', methods=["GET", 'POST']) +def get_export_json(): + """ + 导出json + :return: + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + db_config = get_conf(g.caf, my_wxid, "db_config") + + wxid = request.json.get("wxid") + if not wxid: + return ReJson(1002, body=f"username is required: {wxid}") + + outpath = os.path.join(g.work_path, "export", my_wxid, "json", wxid) + if not os.path.exists(outpath): + os.makedirs(outpath) + + code, ret = export_json(wxid, outpath, db_config) + if code: + return ReJson(0, ret) + else: + return ReJson(2001, body=ret) + + +# end 导出聊天记录 ******************************************************************************************************* + +# start 聊天记录分析api ************************************************************************************************** + +@rs_api.route('/api/rs/date_count', methods=["GET", 'POST']) +@error9999 +def get_date_count(): + """ + 获取日期统计 + """ + my_wxid = get_conf(g.caf, g.at, "last") + if not my_wxid: return ReJson(1001, body="my_wxid is required") + merge_path = get_conf(g.caf, my_wxid, "merge_path") + date_count = DBHandler(merge_path).date_count() + return ReJson(0, date_count) + + +@rs_api.route('/api/rs/wordcloud', methods=["GET", 'POST']) +@error9999 +def wordcloud(): + pass + + +# 关于、帮助、设置 ******************************************************************************************************* +@rs_api.route('/api/rs/check_update', methods=["GET", 'POST']) +@error9999 +def check_update(): + """ + 检查更新 + :return: + """ + url = "https://api.github.com/repos/xaoyaoo/PyWxDump/tags" + try: + import requests + res = requests.get(url) + if res.status_code == 200: + data = res.json() + NEW_VERSION = data[0].get("name") + if NEW_VERSION[1:] != pywxdump.__version__: + msg = "有新版本" + else: + msg = "已经是最新版本" + return ReJson(0, body={"msg": msg, "latest_version": NEW_VERSION, + "latest_url": "https://github.com/xaoyaoo/PyWxDump/releases/tag/" + NEW_VERSION}) + else: + return ReJson(2001, body="status_code is not 200") + except Exception as e: + return ReJson(9999, msg=str(e)) + + +@rs_api.route('/api/rs/version', methods=["GET", 'POST']) +@error9999 +def version(): + """ + 版本 + :return: + """ + return ReJson(0, pywxdump.__version__) + + +# END 关于、帮助、设置 *************************************************************************************************** + + +@rs_api.route('/') +@error9999 +def index(): + return render_template('index.html') diff --git a/pywxdump/api/rjson.py b/pywxdump/api/rjson.py index 2e87cf2..dcac799 100644 --- a/pywxdump/api/rjson.py +++ b/pywxdump/api/rjson.py @@ -1,5 +1,7 @@ import logging +loger_rjson = logging.getLogger("rjson") + def ReJson(code: int, body: [dict, list] = None, msg: str = None, error: str = None, extra: dict = None) -> dict: """ @@ -17,7 +19,7 @@ def ReJson(code: int, body: [dict, list] = None, msg: str = None, error: str = N 0: {'code': 0, 'body': body, 'msg': "success", "extra": extra}, # 100 开头代表 请求数据有问题 # 4*** 表示数据库查询结果存在异常 - 1001: {'code': 1001, 'body': body, 'msg': "请求数据格式存在错误!", "extra": extra}, # 请求数据格式存在错误,一般是数据类型错误 + 1001: {'code': 1001, 'body': body, 'msg': "请求数据格式存在错误!", "extra": extra}, # 请求数据格式存在错误,一般是数据类型错误 1002: {'code': 1002, 'body': body, 'msg': "请求参数存在错误!", "extra": extra}, # 请求参数存在错误,一般是缺少参数 2001: {'code': 2001, 'body': body, 'msg': "操作失败!", "extra": extra}, # 请求未能正确执行 4001: {'code': 4001, 'body': body, 'msg': "账号或密码错误!", "extra": extra}, # 表示用户没有权限(令牌、用户名、密码错误) @@ -31,13 +33,13 @@ def ReJson(code: int, body: [dict, list] = None, msg: str = None, error: str = N } rjson = situation.get(code, {'code': 9999, 'body': None, 'msg': "code错误", "extra": {}}) if code != 0: - logging.warning(f"\n{code} \n{rjson['body']}\n{msg if msg else None}") + loger_rjson.warning(f"\n{code=}\nbody=\n{rjson['body']}\nmsg={msg if msg else None}\n") if body: rjson['body'] = body if msg: rjson['msg'] = msg if error: - logging.error(error) + loger_rjson.error(error, exc_info=True) return rjson diff --git a/pywxdump/api/utils.py b/pywxdump/api/utils.py index d668eeb..af58046 100644 --- a/pywxdump/api/utils.py +++ b/pywxdump/api/utils.py @@ -7,72 +7,74 @@ # ------------------------------------------------------------------------------- import base64 import json -import logging import os import re import traceback from .rjson import ReJson from functools import wraps +import logging + +rs_loger = logging.getLogger("rs_api") +ls_loger = logging.getLogger("ls_api") -def read_session_local_wxid(session_file): +def get_conf_local_wxid(conf_file): try: - with open(session_file, 'r') as f: - session = json.load(f) + with open(conf_file, 'r') as f: + conf = json.load(f) except FileNotFoundError: - logging.error(f"Session file not found: {session_file}") + logging.error(f"Session file not found: {conf_file}") return None except json.JSONDecodeError as e: logging.error(f"Error decoding JSON file: {e}") return None - rdata = [k for k in session.keys() if k != "test"] - return rdata + return list(conf.keys()) -def read_session(session_file, wxid, arg): +def get_conf(conf_file, wxid, arg): try: - with open(session_file, 'r') as f: - session = json.load(f) + with open(conf_file, 'r') as f: + conf = json.load(f) except FileNotFoundError: - logging.error(f"Session file not found: {session_file}") + logging.error(f"Session file not found: {conf_file}") return None except json.JSONDecodeError as e: logging.error(f"Error decoding JSON file: {e}") return None - return session.get(wxid, {}).get(arg, None) + return conf.get(wxid, {}).get(arg, None) -def get_session_wxids(session_file): +def get_conf_wxids(conf_file): try: - with open(session_file, 'r') as f: - session = json.load(f) + with open(conf_file, 'r') as f: + conf = json.load(f) except FileNotFoundError: - logging.error(f"Session file not found: {session_file}") + logging.error(f"Session file not found: {conf_file}") return None except json.JSONDecodeError as e: logging.error(f"Error decoding JSON file: {e}") return None - return list(session.keys()) + return list(conf.keys()) -def save_session(session_file, wxid, arg, value): +def set_conf(conf_file, wxid, arg, value): try: - with open(session_file, 'r') as f: - session = json.load(f) + with open(conf_file, 'r') as f: + conf = json.load(f) except FileNotFoundError: - session = {} + conf = {} except json.JSONDecodeError as e: logging.error(f"Error decoding JSON file: {e}") return False - if wxid not in session: - session[wxid] = {} - if not isinstance(session[wxid], dict): - session[wxid] = {} - session[wxid][arg] = value + if wxid not in conf: + conf[wxid] = {} + if not isinstance(conf[wxid], dict): + conf[wxid] = {} + conf[wxid][arg] = value try: - with open(session_file, 'w') as f: - json.dump(session, f, indent=4, ensure_ascii=False) + with open(conf_file, 'w') as f: + json.dump(conf, f, indent=4, ensure_ascii=False) except Exception as e: logging.error(f"Error writing to file: {e}") return False diff --git a/pywxdump/cli.py b/pywxdump/cli.py index 7f44d94..46937dc 100644 --- a/pywxdump/cli.py +++ b/pywxdump/cli.py @@ -106,7 +106,7 @@ class MainBiasAddr(BaseSubMainClass): parser.add_argument("--account", type=str, help="微信账号", metavar="", required=True) parser.add_argument("--key", type=str, metavar="", help="(可选)密钥") parser.add_argument("--db_path", type=str, metavar="", help="(可选)已登录账号的微信文件夹路径") - parser.add_argument("-vlp", '--version_list_path', type=str, metavar="", + parser.add_argument("-vlp", '--WX_OFFS_PATH', type=str, metavar="", help="(可选)微信版本偏移文件路径,如有,则自动更新", default=None) @@ -120,7 +120,7 @@ class MainBiasAddr(BaseSubMainClass): account = args.account key = args.key db_path = args.db_path - vlp = args.version_list_path + vlp = args.WX_OFFS_PATH # 调用 run 函数,并传入参数 rdata = BiasAddr(account, mobile, name, key, db_path).run(True, vlp) return rdata @@ -132,18 +132,18 @@ class MainWxInfo(BaseSubMainClass): def init_parses(self, parser): # 添加 'wx_info' 子命令解析器 - parser.add_argument("-vlp", '--version_list_path', metavar="", type=str, - help="(可选)微信版本偏移文件路径", default=VERSION_LIST_PATH) + parser.add_argument("-vlp", '--WX_OFFS_PATH', metavar="", type=str, + help="(可选)微信版本偏移文件路径", default=WX_OFFS_PATH) parser.add_argument("-s", '--save_path', metavar="", type=str, help="(可选)保存路径【json文件】") return parser def run(self, args): print(f"[*] PyWxDump v{pywxdump.__version__}") # 读取微信各版本偏移 - path = args.version_list_path + path = args.WX_OFFS_PATH save_path = args.save_path - version_list = json.load(open(path, "r", encoding="utf-8")) - result = read_info(version_list, True, save_path) # 读取微信信息 + WX_OFFS = json.load(open(path, "r", encoding="utf-8")) + result = get_wx_info(WX_OFFS, True, save_path) # 读取微信信息 return result @@ -153,7 +153,7 @@ class MainWxDbPath(BaseSubMainClass): def init_parses(self, parser): # 添加 'wx_db_path' 子命令解析器 - parser.add_argument("-r", "--require_list", type=str, + parser.add_argument("-r", "--db_types", type=str, help="(可选)需要的数据库名称(eg: -r MediaMSG;MicroMsg;FTSMSG;MSG;Sns;Emotion )", default="all", metavar="") parser.add_argument("-wf", "--wx_files", type=str, help="(可选)'WeChat Files'路径", default=None, @@ -164,12 +164,13 @@ class MainWxDbPath(BaseSubMainClass): def run(self, args): # 从命令行参数获取值 - require_list = args.require_list + db_types = args.require_list msg_dir = args.wx_files wxid = args.wxid - user_dirs = get_wechat_db(require_list, msg_dir, wxid, True) # 获取微信数据库路径 - return user_dirs + ret = get_wx_db(msg_dir=msg_dir, db_types=db_types, wxids=wxid) + for i in ret: print(i) + return ret class MainDecrypt(BaseSubMainClass): diff --git a/pywxdump/db/__init__.py b/pywxdump/db/__init__.py new file mode 100644 index 0000000..1d2a3c4 --- /dev/null +++ b/pywxdump/db/__init__.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: __init__.py.py +# Description: +# Author: xaoyaoo +# Date: 2024/04/15 +# ------------------------------------------------------------------------------- +import pandas as pd + +from .utils import download_file + +from .dbFavorite import FavoriteHandler +from .dbMSG import MsgHandler +from .dbMicro import MicroHandler +from .dbMedia import MediaHandler +from .dbOpenIMContact import OpenIMContactHandler +from .dbPublicMsg import PublicMsgHandler +from .dbOpenIMMedia import OpenIMMediaHandler + +from .export.exportCSV import export_csv +from .export.exportJSON import export_json + + +class DBHandler(MicroHandler, MediaHandler, OpenIMContactHandler, PublicMsgHandler, OpenIMMediaHandler, + FavoriteHandler): + _class_name = "DBHandler" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.MSG_exist = self.Msg_tables_exist() + self.Micro_exist = self.Micro_tables_exist() + self.Media_exist = self.Media_tables_exist() + self.OpenIMContact_exist = self.OpenIMContact_tables_exist() + self.PublicMsg_exist = self.PublicMSG_tables_exist() + self.OpenIMMedia_exist = self.OpenIMMedia_tables_exist() + self.Favorite_exist = self.Favorite_tables_exist() + + # print(self.MSG_exist, self.Micro_exist, self.Media_exist, self.OpenIMContact_exist, self.PublicMsg_exist, + # self.OpenIMMedia_exist, self.Favorite_exist) + + def get_user(self, word=None, wxids=None, labels=None): + """ + 获取联系人列表 + """ + users = self.get_user_list(word=word, wxids=wxids, label_ids=labels) + if self.OpenIMContact_exist: + users.update(self.get_im_user_list(word=word, wxids=wxids)) + return users diff --git a/pywxdump/dbpreprocess/parsingFavorite.py b/pywxdump/db/dbFavorite.py similarity index 91% rename from pywxdump/dbpreprocess/parsingFavorite.py rename to pywxdump/db/dbFavorite.py index b6a731e..e5314d6 100644 --- a/pywxdump/dbpreprocess/parsingFavorite.py +++ b/pywxdump/db/dbFavorite.py @@ -18,28 +18,15 @@ from .utils import timestamp2str, xml2dict # * FavTags:为收藏内容添加的标签 -def FavoriteTypeId2Name(Type): - TypeNameDict = { - 1: "文本", # 文本 已测试 - 2: "图片", # 图片 已测试 - 3: "语音", # 语音 - 4: "视频", # 视频 已测试 - 5: "链接", # 链接 已测试 - 6: "位置", # 位置 - 7: "小程序", # 小程序 - 8: "文件", # 文件 已测试 - 14: "聊天记录", # 聊天记录 已测试 - 16: "群聊视频", # 群聊中的视频 可能 - 18: "笔记" # 笔记 已测试 - } - return TypeNameDict.get(Type, "未知") - - -class ParsingFavorite(DatabaseBase): +class FavoriteHandler(DatabaseBase): _class_name = "Favorite" + Favorite_required_tables = ["FavItems", "FavDataItem", "FavTagDatas", "FavBindTagDatas"] - def __init__(self, db_path): - super().__init__(db_path) + def Favorite_tables_exist(self): + """ + 判断该类所需要的表是否存在 + """ + return self.check_tables_exist(self.Favorite_required_tables) def get_tags(self, LocalID): """ @@ -49,7 +36,7 @@ class ParsingFavorite(DatabaseBase): sql = "select LocalID, TagName from FavTagDatas order by ServerSeq" else: sql = "select LocalID, TagName from FavTagDatas where LocalID = '%s' order by ServerSeq " % LocalID - tags = self.execute_sql(sql) # [(1, 797940830, '程序语言类'), (2, 806153863, '账单')] + tags = self.execute(sql) # [(1, 797940830, '程序语言类'), (2, 806153863, '账单')] # 转换为字典 tags = {tag[0]: tag[1] for tag in tags} return tags @@ -59,7 +46,7 @@ class ParsingFavorite(DatabaseBase): return: [(FavLocalID, TagName)] """ sql = "select A.FavLocalID, B.TagName from FavBindTagDatas A, FavTagDatas B where A.TagLocalID = B.LocalID" - FavBindTags = self.execute_sql(sql) + FavBindTags = self.execute(sql) return FavBindTags def get_favorite(self): @@ -124,8 +111,8 @@ class ParsingFavorite(DatabaseBase): sql1 = "select " + ",".join(FavItemsFields.keys()) + " from FavItems order by UpdateTime desc" sql2 = "select " + ",".join(FavDataItemFields.keys()) + " from FavDataItem B order by B.RecId asc" - FavItemsList = self.execute_sql(sql1) - FavDataItemList = self.execute_sql(sql2) + FavItemsList = self.execute(sql1) + FavDataItemList = self.execute(sql2) if FavItemsList is None or len(FavItemsList) == 0: return False @@ -152,3 +139,20 @@ class ParsingFavorite(DatabaseBase): pf = pf.fillna("") # 去掉Nan rdata = pf.to_dict(orient="records") return rdata + + +def FavoriteTypeId2Name(Type): + TypeNameDict = { + 1: "文本", # 文本 已测试 + 2: "图片", # 图片 已测试 + 3: "语音", # 语音 + 4: "视频", # 视频 已测试 + 5: "链接", # 链接 已测试 + 6: "位置", # 位置 + 7: "小程序", # 小程序 + 8: "文件", # 文件 已测试 + 14: "聊天记录", # 聊天记录 已测试 + 16: "群聊视频", # 群聊中的视频 可能 + 18: "笔记" # 笔记 已测试 + } + return TypeNameDict.get(Type, "未知") diff --git a/pywxdump/dbpreprocess/parsingMSG.py b/pywxdump/db/dbMSG.py similarity index 61% rename from pywxdump/dbpreprocess/parsingMSG.py rename to pywxdump/db/dbMSG.py index 458bf09..3a06e5a 100644 --- a/pywxdump/dbpreprocess/parsingMSG.py +++ b/pywxdump/db/dbMSG.py @@ -8,17 +8,288 @@ import json import os import re +# import time -import pandas as pd +# import pandas as pd from .dbbase import DatabaseBase -from .utils import get_md5, name2typeid, typeid2name, type_converter, timestamp2str, xml2dict, match_BytesExtra +from .utils import db_error, timestamp2str, xml2dict, match_BytesExtra, type_converter, \ + get_md5, name2typeid, db_loger import lz4.block import blackboxprotobuf -class ParsingMSG(DatabaseBase): +class MsgHandler(DatabaseBase): _class_name = "MSG" + MSG_required_tables = ["MSG"] + + def Msg_tables_exist(self): + """ + 判断该类所需要的表是否存在 + """ + return self.check_tables_exist(self.MSG_required_tables) + + @db_error + def get_msg_count(self, wxids: list = ""): + """ + 获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量,不传wxid则获取所有联系人的聊天记录数量 + :param wxids: wxid list + :return: 聊天记录数量列表 {wxid: chat_count} + """ + if isinstance(wxids, str): + wxids = [wxids] + if wxids: + wxids = "('" + "','".join(wxids) + "')" + sql = f"SELECT StrTalker, COUNT(*) FROM MSG WHERE StrTalker IN {wxids} GROUP BY StrTalker ORDER BY COUNT(*) DESC;" + else: + sql = f"SELECT StrTalker, COUNT(*) FROM MSG GROUP BY StrTalker ORDER BY COUNT(*) DESC;" + sql_total = f"SELECT COUNT(*) FROM MSG;" + + result = self.execute(sql) + total_ret = self.execute(sql_total) + + if not result: + return {} + total = 0 + if total_ret and len(total_ret) > 0: + total = total_ret[0][0] + + msg_count = {"total": total} + msg_count.update({row[0]: row[1] for row in result}) + return msg_count + + # 单条消息处理 + @db_error + def get_msg_detail(self, row): + """ + 获取单条消息详情,格式化输出 + """ + (localId, TalkerId, MsgSvrID, Type, SubType, CreateTime, IsSender, Sequence, StatusEx, FlagEx, Status, + MsgSequence, StrContent, MsgServerSeq, StrTalker, DisplayContent, Reserved0, Reserved1, Reserved3, + Reserved4, Reserved5, Reserved6, CompressContent, BytesExtra, BytesTrans, Reserved2, _id) = row + + CreateTime = timestamp2str(CreateTime) + + type_id = (Type, SubType) + type_name = type_converter(type_id) + + msg = StrContent + src = "" + + if type_id == (1, 0): # 文本 + msg = StrContent + + elif type_id == (3, 0): # 图片 + DictExtra = get_BytesExtra(BytesExtra) + DictExtra_str = str(DictExtra) + img_paths = [i for i in re.findall(r"(FileStorage.*?)'", DictExtra_str)] + img_paths = sorted(img_paths, key=lambda p: "Image" in p, reverse=True) + if img_paths: + img_path = img_paths[0].replace("'", "") + img_path = [i for i in img_path.split("\\") if i] + img_path = os.path.join(*img_path) + src = img_path + else: + src = "" + msg = "图片" + elif type_id == (34, 0): # 语音 + tmp_c = xml2dict(StrContent) + voicelength = tmp_c.get("voicemsg", {}).get("voicelength", "") + transtext = tmp_c.get("voicetrans", {}).get("transtext", "") + if voicelength.isdigit(): + voicelength = int(voicelength) / 1000 + voicelength = f"{voicelength:.2f}" + msg = f"语音时长:{voicelength}秒\n翻译结果:{transtext}" if transtext else f"语音时长:{voicelength}秒" + src = os.path.join("audio", f"{StrTalker}", + f"{CreateTime.replace(':', '-').replace(' ', '_')}_{IsSender}_{MsgSvrID}.wav") + elif type_id == (43, 0): # 视频 + DictExtra = get_BytesExtra(BytesExtra) + DictExtra = str(DictExtra) + + DictExtra_str = str(DictExtra) + video_paths = [i for i in re.findall(r"(FileStorage.*?)'", DictExtra_str)] + video_paths = sorted(video_paths, key=lambda p: "mp4" in p, reverse=True) + if video_paths: + video_path = video_paths[0].replace("'", "") + video_path = [i for i in video_path.split("\\") if i] + video_path = os.path.join(*video_path) + src = video_path + else: + src = "" + msg = "视频" + + elif type_id == (47, 0): # 动画表情 + content_tmp = xml2dict(StrContent) + cdnurl = content_tmp.get("emoji", {}).get("cdnurl", "") + if not cdnurl: + DictExtra = get_BytesExtra(BytesExtra) + cdnurl = match_BytesExtra(DictExtra) + if cdnurl: + content = {"src": cdnurl, "msg": "表情"} + + elif type_id == (48, 0): # 地图信息 + content_tmp = xml2dict(StrContent) + location = content_tmp.get("location", {}) + msg = (f"纬度:【{location.pop('x')}】 经度:【{location.pop('y')}】\n" + f"位置:{location.pop('label')} {location.pop('poiname')}\n" + f"其他信息:{json.dumps(location, ensure_ascii=False, indent=4)}" + ) + src = "" + elif type_id == (49, 0): # 文件 + DictExtra = get_BytesExtra(BytesExtra) + url = match_BytesExtra(DictExtra) + src = url + file_name = os.path.basename(url) + msg = file_name + + elif type_id == (49, 5): # (分享)卡片式链接 + CompressContent = decompress_CompressContent(CompressContent) + CompressContent_tmp = xml2dict(CompressContent) + appmsg = CompressContent_tmp.get("appmsg", {}) + title = appmsg.get("title", "") + des = appmsg.get("des", "") + url = appmsg.get("url", "") + msg = f"{title}\n{des}\n\n{url}" + src = url + + elif type_id == (49, 19): # 合并转发的聊天记录 + CompressContent = decompress_CompressContent(CompressContent) + content_tmp = xml2dict(CompressContent) + title = content_tmp.get("appmsg", {}).get("title", "") + des = content_tmp.get("appmsg", {}).get("des", "") + recorditem = content_tmp.get("appmsg", {}).get("recorditem", "") + recorditem = xml2dict(recorditem) + msg = f"{title}\n{des}" + src = recorditem + + elif type_id == (49, 57): # 带有引用的文本消息 + CompressContent = decompress_CompressContent(CompressContent) + content_tmp = xml2dict(CompressContent) + appmsg = content_tmp.get("appmsg", {}) + title = appmsg.get("title", "") + refermsg = appmsg.get("refermsg", {}) + displayname = refermsg.get("displayname", "") + display_content = refermsg.get("content", "") + display_createtime = refermsg.get("createtime", "") + display_createtime = timestamp2str( + int(display_createtime)) if display_createtime.isdigit() else display_createtime + if display_content.startswith("=? ", param + (start_createtime,)) if start_createtime else ( + "", param) + sql_end_createtime, param = ("AND CreateTime<=? ", param + (end_createtime,)) if end_createtime else ("", param) + + sql = ( + f"{sql_base} WHERE 1=1 " + f"{sql_wxid}" + f"{sql_type}" + f"{sql_sub_type}" + f"{sql_start_createtime}" + f"{sql_end_createtime}" + f"ORDER BY CreateTime ASC LIMIT ?,?" + ) + param = param + (start_index, page_size) + result = self.execute(sql, param) + if not result: + return [], [] + + result_data = (self.get_msg_detail(row) for row in result) + rdata = list(result_data) # 转为列表 + wxid_list = {d['talker'] for d in rdata} # 创建一个无重复的 wxid 列表 + + return rdata, list(wxid_list) + + +@db_error +def decompress_CompressContent(data): + """ + 解压缩Msg:CompressContent内容 + :param data: CompressContent内容 bytes + :return: + """ + if data is None or not isinstance(data, bytes): + return None + try: + dst = lz4.block.decompress(data, uncompressed_size=len(data) << 8) + dst = dst.replace(b'\x00', b'') # 已经解码完成后,还含有0x00的部分,要删掉,要不后面ET识别的时候会报错 + uncompressed_data = dst.decode('utf-8', errors='ignore') + return uncompressed_data + except Exception as e: + return data.decode('utf-8', errors='ignore') + + +@db_error +def get_BytesExtra(BytesExtra): BytesExtra_message_type = { "1": { "type": "message", @@ -242,295 +513,10 @@ class ParsingMSG(DatabaseBase): } } } - - def __init__(self, db_path): - super().__init__(db_path) - - def decompress_CompressContent(self, data): - """ - 解压缩Msg:CompressContent内容 - :param data: CompressContent内容 bytes - :return: - """ - if data is None or not isinstance(data, bytes): - return None - try: - dst = lz4.block.decompress(data, uncompressed_size=len(data) << 8) - dst = dst.replace(b'\x00', b'') # 已经解码完成后,还含有0x00的部分,要删掉,要不后面ET识别的时候会报错 - uncompressed_data = dst.decode('utf-8', errors='ignore') - return uncompressed_data - except Exception as e: - return data.decode('utf-8', errors='ignore') - - def get_BytesExtra(self, BytesExtra): - if BytesExtra is None or not isinstance(BytesExtra, bytes): - return None - try: - deserialize_data, message_type = blackboxprotobuf.decode_message(BytesExtra, self.BytesExtra_message_type) - return deserialize_data - except Exception as e: - return None - - def msg_count(self, wxid: str = ""): - """ - 获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量,不传wxid则获取所有联系人的聊天记录数量 - :param MSG_db_path: MSG.db 文件路径 - :return: 聊天记录数量列表 {wxid: chat_count} - """ - if wxid: - sql = f"SELECT StrTalker, COUNT(*) FROM MSG WHERE StrTalker='{wxid}';" - else: - sql = f"SELECT StrTalker, COUNT(*) FROM MSG GROUP BY StrTalker ORDER BY COUNT(*) DESC;" - - result = self.execute_sql(sql) - if not result: - return {} - df = pd.DataFrame(result, columns=["wxid", "msg_count"]) - # # 排序 - df = df.sort_values(by="msg_count", ascending=False) - # chat_counts : {wxid: chat_count} - chat_counts = df.set_index("wxid").to_dict()["msg_count"] - return chat_counts - - def msg_count_total(self): - """ - 获取聊天记录总数 - :return: 聊天记录总数 - """ - sql = "SELECT COUNT(*) FROM MSG;" - result = self.execute_sql(sql) - if result and len(result) > 0: - chat_counts = result[0][0] - return chat_counts - return 0 - - # def room_user_list(self, selected_talker): - # """ - # 获取群聊中包含的所有用户列表 - # :param MSG_db_path: MSG.db 文件路径 - # :param selected_talker: 选中的聊天对象 wxid - # :return: 聊天用户列表 - # """ - # sql = ( - # "SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType,CreateTime,MsgSvrID,DisplayContent,CompressContent,BytesExtra,ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id " - # "FROM MSG WHERE StrTalker=? " - # "ORDER BY CreateTime ASC") - # - # result1 = self.execute_sql(sql, (selected_talker,)) - # user_list = [] - # read_user_wx_id = [] - # for row in result1: - # localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType, CreateTime, MsgSvrID, DisplayContent, CompressContent, BytesExtra, id = row - # bytes_extra = self.get_BytesExtra(BytesExtra) - # if bytes_extra: - # try: - # talker = bytes_extra['3'][0]['2'].decode('utf-8', errors='ignore') - # except: - # continue - # if talker in read_user_wx_id: - # continue - # user = get_contact(MSG_db_path, talker) - # if not user: - # continue - # user_list.append(user) - # read_user_wx_id.append(talker) - # return user_list - - # 单条消息处理 - def msg_detail(self, row): - """ - 获取单条消息详情,格式化输出 - """ - (localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType, CreateTime, MsgSvrID, - DisplayContent, CompressContent, BytesExtra, id) = row - CreateTime = timestamp2str(CreateTime) - - type_id = (Type, SubType) - type_name = typeid2name(type_id) - - content = {"src": "", "msg": StrContent} - - if type_id == (1, 0): # 文本 - content["msg"] = StrContent - - elif type_id == (3, 0): # 图片 - DictExtra = self.get_BytesExtra(BytesExtra) - DictExtra_str = str(DictExtra) - img_paths = [i for i in re.findall(r"(FileStorage.*?)'", DictExtra_str)] - img_paths = sorted(img_paths, key=lambda p: "Image" in p, reverse=True) - if img_paths: - img_path = img_paths[0].replace("'", "") - img_path = [i for i in img_path.split("\\") if i] - img_path = os.path.join(*img_path) - content["src"] = img_path - else: - content["src"] = "" - content["msg"] = "图片" - elif type_id == (34, 0): # 语音 - tmp_c = xml2dict(StrContent) - voicelength = tmp_c.get("voicemsg", {}).get("voicelength", "") - transtext = tmp_c.get("voicetrans", {}).get("transtext", "") - if voicelength.isdigit(): - voicelength = int(voicelength) / 1000 - voicelength = f"{voicelength:.2f}" - content[ - "msg"] = f"语音时长:{voicelength}秒\n翻译结果:{transtext}" if transtext else f"语音时长:{voicelength}秒" - content["src"] = os.path.join("audio", f"{StrTalker}", - f"{CreateTime.replace(':', '-').replace(' ', '_')}_{IsSender}_{MsgSvrID}.wav") - elif type_id == (43, 0): # 视频 - DictExtra = self.get_BytesExtra(BytesExtra) - DictExtra = str(DictExtra) - - DictExtra_str = str(DictExtra) - video_paths = [i for i in re.findall(r"(FileStorage.*?)'", DictExtra_str)] - video_paths = sorted(video_paths, key=lambda p: "mp4" in p, reverse=True) - if video_paths: - video_path = video_paths[0].replace("'", "") - video_path = [i for i in video_path.split("\\") if i] - video_path = os.path.join(*video_path) - content["src"] = video_path - else: - content["src"] = "" - content["msg"] = "视频" - - elif type_id == (47, 0): # 动画表情 - content_tmp = xml2dict(StrContent) - cdnurl = content_tmp.get("emoji", {}).get("cdnurl", "") - if not cdnurl: - DictExtra = self.get_BytesExtra(BytesExtra) - cdnurl = match_BytesExtra(DictExtra) - if cdnurl: - content = {"src": cdnurl, "msg": "表情"} - - elif type_id == (48, 0): # 地图信息 - content_tmp = xml2dict(StrContent) - location = content_tmp.get("location", {}) - content["msg"] = (f"纬度:【{location.pop('x')}】 经度:【{location.pop('y')}】\n" - f"位置:{location.pop('label')} {location.pop('poiname')}\n" - f"其他信息:{json.dumps(location, ensure_ascii=False, indent=4)}" - ) - content["src"] = "" - elif type_id == (49, 0): # 文件 - DictExtra = self.get_BytesExtra(BytesExtra) - url = match_BytesExtra(DictExtra) - content["src"] = url - file_name = os.path.basename(url) - content["msg"] = file_name - - elif type_id == (49, 5): # (分享)卡片式链接 - CompressContent = self.decompress_CompressContent(CompressContent) - CompressContent_tmp = xml2dict(CompressContent) - appmsg = CompressContent_tmp.get("appmsg", {}) - title = appmsg.get("title", "") - des = appmsg.get("des", "") - url = appmsg.get("url", "") - content["msg"] = f"{title}\n{des}\n\n{url}" - content["src"] = url - - elif type_id == (49, 19): # 合并转发的聊天记录 - CompressContent = self.decompress_CompressContent(CompressContent) - content_tmp = xml2dict(CompressContent) - title = content_tmp.get("appmsg", {}).get("title", "") - des = content_tmp.get("appmsg", {}).get("des", "") - recorditem = content_tmp.get("appmsg", {}).get("recorditem", "") - recorditem = xml2dict(recorditem) - content["msg"] = f"{title}\n{des}" - content["src"] = recorditem - - elif type_id == (49, 57): # 带有引用的文本消息 - CompressContent = self.decompress_CompressContent(CompressContent) - content_tmp = xml2dict(CompressContent) - appmsg = content_tmp.get("appmsg", {}) - title = appmsg.get("title", "") - refermsg = appmsg.get("refermsg", {}) - displayname = refermsg.get("displayname", "") - display_content = refermsg.get("content", "") - display_createtime = refermsg.get("createtime", "") - display_createtime = timestamp2str( - int(display_createtime)) if display_createtime.isdigit() else display_createtime - if display_content.startswith(" 1007911408000 GROUP BY Username " + ") AS SubQuery JOIN ChatInfo A " + "ON A.Username = SubQuery.Username AND LastReadedCreateTime = SubQuery.MaxLastReadedCreateTime " + "ORDER BY A.LastReadedCreateTime DESC;" + ) + result = self.execute(sql) + if not result: + return [] + for row in result: + # 获取用户名、昵称、备注和聊天记录数量 + username, LastReadedCreateTime, LastReadedSvrId = row + LastReadedCreateTime = timestamp2str(LastReadedCreateTime) if LastReadedCreateTime else None + users.append( + {"wxid": username, "LastReadedCreateTime": LastReadedCreateTime, "LastReadedSvrId": LastReadedSvrId}) + return users + + @db_error + def get_user_list(self, word: str = None, wxids: list = None, label_ids: list = None): + """ + 获取联系人列表 + [ 注意:如果修改这个函数,要同时修改dbOpenIMContact.py中的get_im_user_list函数 ] + :param word: 查询关键字,可以是wxid,用户名、昵称、备注、描述,允许拼音 + :param wxids: wxid列表 + :param label_ids: 标签id + :return: 联系人字典 + """ + if isinstance(wxids, str): + wxids = [wxids] + if isinstance(label_ids, str): + label_ids = [label_ids] + + users = {} + + sql = ( + "SELECT A.UserName, A.Alias, A.DelFlag, A.Type, A.VerifyFlag, A.Reserved1, A.Reserved2," + "A.Remark, A.NickName, A.LabelIDList, A.ChatRoomType, A.ChatRoomNotify, A.Reserved5," + "A.Reserved6 as describe, A.ExtraBuf, B.bigHeadImgUrl " + "FROM Contact A LEFT JOIN ContactHeadImgUrl B ON A.UserName = B.usrName WHERE 1==1 ;" + ) + if word: + sql = sql.replace(";", + f"AND ( A.UserName LIKE '%{word}%' " + f"OR A.NickName LIKE '%{word}%' " + f"OR A.Remark LIKE '%{word}%' " + f"OR A.Alias LIKE '%{word}%' " + f"OR LOWER(A.QuanPin) LIKE LOWER('%{word}%') " + f"OR LOWER(A.PYInitial) LIKE LOWER('%{word}%') " + f"OR LOWER(A.RemarkQuanPin) LIKE LOWER('%{word}%') " + f"OR LOWER(A.RemarkPYInitial) LIKE LOWER('%{word}%') " + f") " + ";") + if wxids: + sql = sql.replace(";", f"AND A.UserName IN ('" + "','".join(wxids) + "') ;") + + if label_ids: + sql_label = [f"A.LabelIDList LIKE '%{i}%' " for i in label_ids] + sql_label = " OR ".join(sql_label) + sql = sql.replace(";", f"AND ({sql_label}) ;") + + result = self.execute(sql) + if not result: + return users + id2label = self.get_labels() + for row in result: + # 获取wxid,昵称,备注,描述,头像,标签 + (UserName, Alias, DelFlag, Type, VerifyFlag, Reserved1, Reserved2, Remark, NickName, LabelIDList, + ChatRoomType, ChatRoomNotify, Reserved5, describe, ExtraBuf, bigHeadImgUrl) = row + + ExtraBuf = get_ExtraBuf(ExtraBuf) + LabelIDList = LabelIDList.split(",") if LabelIDList else [] + LabelIDList = [id2label.get(int(label_id), label_id) for label_id in LabelIDList if label_id] + + # print(f"{UserName=}\n{Alias=}\n{DelFlag=}\n{Type=}\n{VerifyFlag=}\n{Reserved1=}\n{Reserved2=}\n" + # f"{Remark=}\n{NickName=}\n{LabelIDList=}\n{ChatRoomType=}\n{ChatRoomNotify=}\n{Reserved5=}\n" + # f"{describe=}\n{ExtraBuf=}\n{bigHeadImgUrl=}") + users[UserName] = { + "wxid": UserName, "nickname": NickName, "remark": Remark, "account": Alias, + "describe": describe, "headImgUrl": bigHeadImgUrl if bigHeadImgUrl else "", + "ExtraBuf": ExtraBuf, "LabelIDList": tuple(LabelIDList)} + return users + + @db_error + def get_room_list(self, word=None, roomwxids: list = None): + """ + 获取群聊列表 + :param word: 群聊搜索词 + :param roomwxids: 群聊wxid列表 + :return: 群聊字典 + """ + # 连接 MicroMsg.db 数据库,并执行查询 + if isinstance(roomwxids, str): + roomwxids = [roomwxids] + + sql = ( + "SELECT A.ChatRoomName,A.UserNameList,A.DisplayNameList,A.ChatRoomFlag,A.IsShowName," + "A.SelfDisplayName,A.Reserved2,A.RoomData, " + "B.Announcement,B.AnnouncementEditor,B.AnnouncementPublishTime " + "FROM ChatRoom A LEFT JOIN ChatRoomInfo B ON A.ChatRoomName==B.ChatRoomName " + "WHERE 1==1 ;") + if word: + sql = sql.replace(";", + f"AND A.ChatRoomName LIKE '%{word}%' ;") + if roomwxids: + sql = sql.replace(";", f"AND A.UserName IN ('" + "','".join(roomwxids) + "') ;") + + rooms = {} + result = self.execute(sql) + if not result: + return rooms + + for row in result: + # 获取用户名、昵称、备注和聊天记录数量 + (ChatRoomName, UserNameList, DisplayNameList, ChatRoomFlag, IsShowName, SelfDisplayName, + Reserved2, RoomData, + Announcement, AnnouncementEditor, AnnouncementPublishTime) = row + + UserNameList = UserNameList.split("^G") + DisplayNameList = DisplayNameList.split("^G") + + RoomData = ChatRoom_RoomData(RoomData) + wxid2remark = {} + if RoomData: + rd = [] + for k, v in RoomData.items(): + if isinstance(v, list): + rd += v + for i in rd: + try: + if isinstance(i, dict) and isinstance(i.get('1'), str) and i.get('2'): + wxid2remark[i['1']] = i["2"] + except Exception as e: + db_loger.error(f"wxid2remark: ChatRoomName:{ChatRoomName}, {i} error:{e}", exc_info=True) + rooms[ChatRoomName] = { + "wxid": ChatRoomName, "UserNameList": UserNameList, "DisplayNameList": DisplayNameList, + "ChatRoomFlag": ChatRoomFlag, "IsShowName": IsShowName, "SelfDisplayName": SelfDisplayName, + "owner": Reserved2, "wxid2remark": wxid2remark, + "Announcement": Announcement, "AnnouncementEditor": AnnouncementEditor, + "AnnouncementPublishTime": AnnouncementPublishTime} + return rooms + + +@db_error +def ChatRoom_RoomData(RoomData): + # 读取群聊数据,主要为 wxid,以及对应昵称 + if RoomData is None or not isinstance(RoomData, bytes): + return None + data = get_BytesExtra(RoomData) + bytes2str(data) if data else None + return data + + +@db_error +def get_BytesExtra(BytesExtra): + if BytesExtra is None or not isinstance(BytesExtra, bytes): + return None + try: + deserialize_data, message_type = blackboxprotobuf.decode_message(BytesExtra) + return deserialize_data + except Exception as e: + db_loger.warning(f"\nget_BytesExtra: {e}\n{BytesExtra}", exc_info=True) + return None + + +@db_error +def get_ExtraBuf(ExtraBuf: bytes): + """ + 读取ExtraBuf(联系人表) + :param ExtraBuf: + :return: + """ + if not ExtraBuf: + return None + buf_dict = { + '74752C06': '性别[1男2女]', '46CF10C4': '个性签名', 'A4D9024A': '国', 'E2EAA8D1': '省', '1D025BBF': '市', + 'F917BCC0': '公司名称', '759378AD': '手机号', '4EB96D85': '企微属性', '81AE19B4': '朋友圈背景', + '0E719F13': '备注图片', '945f3190': '备注图片2', + 'DDF32683': '0', '88E28FCE': '1', '761A1D2D': '2', '0263A0CB': '3', '0451FF12': '4', '228C66A8': '5', + '4D6C4570': '6', '4335DFDD': '7', 'DE4CDAEB': '8', 'A72BC20A': '9', '069FED52': '10', '9B0F4299': '11', + '3D641E22': '12', '1249822C': '13', 'B4F73ACB': '14', '0959EB92': '15', '3CF4A315': '16', + 'C9477AC60201E44CD0E8': '17', 'B7ACF0F5': '18', '57A7B5A8': '19', '695F3170': '20', 'FB083DD9': '21', + '0240E37F': '22', '315D02A3': '23', '7DEC0BC3': '24', '16791C90': '25' + } + + rdata = {} + for buf_name in buf_dict: + rdata_name = buf_dict[buf_name] + buf_name = bytes.fromhex(buf_name) + offset = ExtraBuf.find(buf_name) + if offset == -1: + rdata[rdata_name] = "" + continue + offset += len(buf_name) + type_id = ExtraBuf[offset: offset + 1] + offset += 1 + + if type_id == b"\x04": + rdata[rdata_name] = int.from_bytes(ExtraBuf[offset: offset + 4], "little") + + elif type_id == b"\x18": + length = int.from_bytes(ExtraBuf[offset: offset + 4], "little") + rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-16").rstrip("\x00") + + elif type_id == b"\x17": + length = int.from_bytes(ExtraBuf[offset: offset + 4], "little") + rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-8", errors="ignore").rstrip( + "\x00") + elif type_id == b"\x05": + rdata[rdata_name] = f"0x{ExtraBuf[offset: offset + 8].hex()}" + return rdata diff --git a/pywxdump/db/dbOpenIMContact.py b/pywxdump/db/dbOpenIMContact.py new file mode 100644 index 0000000..f100fc8 --- /dev/null +++ b/pywxdump/db/dbOpenIMContact.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: parsingOpenIMContact.py +# Description: +# Author: xaoyaoo +# Date: 2024/04/16 +# ------------------------------------------------------------------------------- +from .dbbase import DatabaseBase +from .utils import db_error + + +class OpenIMContactHandler(DatabaseBase): + _class_name = "OpenIMContact" + OpenIMContact_required_tables = ["OpenIMContact"] + + def OpenIMContact_tables_exist(self): + """ + 判断该类所需要的表是否存在 + """ + return self.check_tables_exist(self.OpenIMContact_required_tables) + + def get_im_user_list(self, word=None, wxids=None): + """ + 获取联系人列表 + [ 注意:如果修改这个函数,要同时修改dbMicro.py中的get_user_list函数 ] + :param word: 查询关键字,可以是用户名、昵称、备注、描述,允许拼音 + :param wxids: 微信id列表 + :return: 联系人字典 + """ + if not wxids: + wxids = [] + if isinstance(wxids, str): + wxids = [wxids] + sql = ("SELECT UserName,NickName,Type,Remark,BigHeadImgUrl,CustomInfoDetail,CustomInfoDetailVisible," + "AntiSpamTicket,AppId,Sex,DescWordingId,ExtraBuf " + "FROM OpenIMContact WHERE 1==1 ;") + if word: + sql = sql.replace(";", + f"AND (UserName LIKE '%{word}%' " + f"OR NickName LIKE '%{word}%' " + f"OR Remark LIKE '%{word}%' " + f"OR LOWER(NickNamePYInit) LIKE LOWER('%{word}%') " + f"OR LOWER(NickNameQuanPin) LIKE LOWER('%{word}%') " + f"OR LOWER(RemarkPYInit) LIKE LOWER('%{word}%') " + f"OR LOWER(RemarkQuanPin) LIKE LOWER('%{word}%') " + ") ;") + if wxids: + sql = sql.replace(";", f"AND UserName IN ('" + "','".join(wxids) + "') ;") + + result = self.execute(sql) + if not result: + return [] + + users = {} + for row in result: + # 获取用户名、昵称、备注和聊天记录数量 + (UserName, NickName, Type, Remark, BigHeadImgUrl, CustomInfoDetail, CustomInfoDetailVisible, + AntiSpamTicket, AppId, Sex, DescWordingId, ExtraBuf) = row + + users[UserName] = { + "wxid": UserName, "nickname": NickName, "remark": Remark, "account": UserName, + "describe": '', "headImgUrl": BigHeadImgUrl if BigHeadImgUrl else "", + "ExtraBuf": None, "LabelIDList": tuple()} + return users + + +@db_error +def get_ExtraBuf(ExtraBuf: bytes): + """ + 读取ExtraBuf(联系人表) + :param ExtraBuf: + :return: + """ + if not ExtraBuf: + return None + buf_dict = { + '74752C06': '性别[1男2女]', '46CF10C4': '个性签名', 'A4D9024A': '国', 'E2EAA8D1': '省', '1D025BBF': '市', + 'F917BCC0': '公司名称', '759378AD': '手机号', '4EB96D85': '企微属性', '81AE19B4': '朋友圈背景', + '0E719F13': '备注图片', '945f3190': '备注图片2', + 'DDF32683': '0', '88E28FCE': '1', '761A1D2D': '2', '0263A0CB': '3', '0451FF12': '4', '228C66A8': '5', + '4D6C4570': '6', '4335DFDD': '7', 'DE4CDAEB': '8', 'A72BC20A': '9', '069FED52': '10', '9B0F4299': '11', + '3D641E22': '12', '1249822C': '13', 'B4F73ACB': '14', '0959EB92': '15', '3CF4A315': '16', + 'C9477AC60201E44CD0E8': '17', 'B7ACF0F5': '18', '57A7B5A8': '19', '695F3170': '20', 'FB083DD9': '21', + '0240E37F': '22', '315D02A3': '23', '7DEC0BC3': '24', '16791C90': '25' + } + rdata = {} + for buf_name in buf_dict: + rdata_name = buf_dict[buf_name] + buf_name = bytes.fromhex(buf_name) + offset = ExtraBuf.find(buf_name) + if offset == -1: + rdata[rdata_name] = "" + continue + offset += len(buf_name) + type_id = ExtraBuf[offset: offset + 1] + offset += 1 + + if type_id == b"\x04": + rdata[rdata_name] = int.from_bytes(ExtraBuf[offset: offset + 4], "little") + + elif type_id == b"\x18": + length = int.from_bytes(ExtraBuf[offset: offset + 4], "little") + rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-16").rstrip("\x00") + + elif type_id == b"\x17": + length = int.from_bytes(ExtraBuf[offset: offset + 4], "little") + rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-8", errors="ignore").rstrip( + "\x00") + elif type_id == b"\x05": + rdata[rdata_name] = f"0x{ExtraBuf[offset: offset + 8].hex()}" + return rdata diff --git a/pywxdump/db/dbOpenIMMedia.py b/pywxdump/db/dbOpenIMMedia.py new file mode 100644 index 0000000..4083500 --- /dev/null +++ b/pywxdump/db/dbOpenIMMedia.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: MediaMSG_parsing.py +# Description: +# Author: xaoyaoo +# Date: 2024/04/15 +# ------------------------------------------------------------------------------- +from .dbbase import DatabaseBase +from .utils import silk2audio, db_loger + + +class OpenIMMediaHandler(DatabaseBase): + _class_name = "OpenIMMedia" + OpenIMMedia_required_tables = ["OpenIMMedia"] + + def OpenIMMedia_tables_exist(self): + """ + 判断该类所需要的表是否存在 + """ + return self.check_tables_exist(self.OpenIMMedia_required_tables) + + def get_im_audio(self, MsgSvrID, is_play=False, is_wave=False, save_path=None, rate=24000): + sql = "select Buf from OpenIMMedia where Reserved0=? " + DBdata = self.execute(sql, (MsgSvrID,)) + if not DBdata: + return False + if len(DBdata) == 0: + return False + data = DBdata[0][0] # [1:] + b'\xFF\xFF' + try: + pcm_data = silk2audio(buf_data=data, is_play=is_play, is_wave=is_wave, save_path=save_path, rate=rate) + return pcm_data + except Exception as e: + db_loger.warning(e, exc_info=True) + return False diff --git a/pywxdump/db/dbPublicMsg.py b/pywxdump/db/dbPublicMsg.py new file mode 100644 index 0000000..0cbdbd3 --- /dev/null +++ b/pywxdump/db/dbPublicMsg.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: parsingPublicMsg.py +# Description: +# Author: xaoyaoo +# Date: 2024/07/03 +# ------------------------------------------------------------------------------- + +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: parsingMSG.py +# Description: +# Author: xaoyaoo +# Date: 2024/04/15 +# ------------------------------------------------------------------------------- +import json +import os +import re +from typing import Union, Tuple + +import pandas as pd + +from .dbbase import DatabaseBase +from .dbMSG import MsgHandler +from .utils import get_md5, name2typeid, typeid2name, type_converter, timestamp2str, xml2dict, match_BytesExtra, \ + db_error +import lz4.block +import blackboxprotobuf + + +class PublicMsgHandler(MsgHandler): + _class_name = "PublicMSG" + PublicMSG_required_tables = ["PublicMsg"] + + @db_error + def PublicMSG_tables_exist(self): + """ + 判断该类所需要的表是否存在 + """ + return self.check_tables_exist(self.PublicMSG_required_tables) + + @db_error + def get_plc_msg_count(self, wxids: list = ""): + """ + 获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量,不传wxid则获取所有联系人的聊天记录数量 + :param wxids: wxid list + :return: 聊天记录数量列表 {wxid: chat_count} + """ + if isinstance(wxids, str): + wxids = [wxids] + if wxids: + wxids = "('" + "','".join(wxids) + "')" + sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg WHERE StrTalker IN {wxids} GROUP BY StrTalker ORDER BY COUNT(*) DESC;" + else: + sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg GROUP BY StrTalker ORDER BY COUNT(*) DESC;" + sql_total = f"SELECT COUNT(*) FROM MSG;" + + result = self.execute(sql) + total_ret = self.execute(sql_total) + + if not result: + return {} + total = 0 + if total_ret and len(total_ret) > 0: + total = total_ret[0][0] + + msg_count = {"total": total} + msg_count.update({row[0]: row[1] for row in result}) + return msg_count + + @db_error + def get_plc_msg_list(self, wxid="", start_index=0, page_size=500, msg_type: str = "", msg_sub_type: str = "", + start_createtime=None, end_createtime=None): + sql_base = ("SELECT localId,TalkerId,MsgSvrID,Type,SubType,CreateTime,IsSender,Sequence,StatusEx,FlagEx,Status," + "MsgSequence,StrContent,MsgServerSeq,StrTalker,DisplayContent,Reserved0,Reserved1,Reserved3," + "Reserved4,Reserved5,Reserved6,CompressContent,BytesExtra,BytesTrans,Reserved2," + "ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id " + "FROM PublicMsg ") + + param = () + sql_wxid, param = ("AND StrTalker=? ", param + (wxid,)) if wxid else ("", param) + sql_type, param = ("AND Type=? ", param + (msg_type,)) if msg_type else ("", param) + sql_sub_type, param = ("AND SubType=? ", param + (msg_sub_type,)) if msg_type and msg_sub_type else ("", param) + sql_start_createtime, param = ("AND CreateTime>=? ", param + (start_createtime,)) if start_createtime else ( + "", param) + sql_end_createtime, param = ("AND CreateTime<=? ", param + (end_createtime,)) if end_createtime else ("", param) + + sql = ( + f"{sql_base} WHERE 1=1 " + f"{sql_wxid}" + f"{sql_type}" + f"{sql_sub_type}" + f"{sql_start_createtime}" + f"{sql_end_createtime}" + f"ORDER BY CreateTime ASC LIMIT ?,?" + ) + param = param + (start_index, page_size) + result = self.execute(sql, param) + if not result: + return [], [] + + result_data = (self.get_msg_detail(row) for row in result) + rdata = list(result_data) # 转为列表 + wxid_list = {d['talker'] for d in rdata} # 创建一个无重复的 wxid 列表 + + return rdata, list(wxid_list) diff --git a/pywxdump/db/dbbase.py b/pywxdump/db/dbbase.py new file mode 100644 index 0000000..4861ea8 --- /dev/null +++ b/pywxdump/db/dbbase.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: dbbase.py +# Description: +# Author: xaoyaoo +# Date: 2024/04/15 +# ------------------------------------------------------------------------------- +import importlib +import os +import sqlite3 +import time + +from .utils import db_loger +from dbutils.pooled_db import PooledDB + + +# import logging +# +# db_loger = logging.getLogger("db_prepare") + + +class DatabaseSingletonBase: + # _singleton_instances = {} # 使用字典存储不同db_path对应的单例实例 + _class_name = "DatabaseSingletonBase" + _db_pool = {} # 使用字典存储不同db_path对应的连接池 + + # def __new__(cls, *args, **kwargs): + # if cls._class_name not in cls._singleton_instances: + # cls._singleton_instances[cls._class_name] = super().__new__(cls) + # return cls._singleton_instances[cls._class_name] + + @classmethod + def connect(cls, db_config): + """ + 连接数据库,如果增加其他数据库连接,则重写该方法 + :param db_config: 数据库配置 + :return: 连接池 + """ + if not db_config: + raise ValueError("db_config 不能为空") + db_key = db_config["key"] + db_type = db_config["type"] + if db_key in cls._db_pool and cls._db_pool[db_key] is not None: + return cls._db_pool[db_key] + + if db_type == "sqlite": + db_path = db_config["path"] + if not os.path.exists(db_path): + raise FileNotFoundError(f"文件不存在: {db_path}") + pool = PooledDB( + creator=sqlite3, # 使用 sqlite3 作为连接创建者 + ping=0, # ping 数据库判断是否服务正常 + database=db_path + ) + elif db_type == "mysql": + mysql_config = { + 'user': db_config['user'], + 'host': db_config['host'], + 'password': db_config['password'], + 'database': db_config['database'], + 'port': db_config['port'] + } + pool = PooledDB( + creator=importlib.import_module('pymysql'), # 使用 mysql 作为连接创建者 + ping=1, # ping 数据库判断是否服务正常 + **mysql_config + ) + else: + raise ValueError(f"不支持的数据库类型: {db_type}") + + db_loger.info(f"{pool} 连接句柄创建 {db_config}") + cls._db_pool[db_key] = pool + return pool + + +class DatabaseBase(DatabaseSingletonBase): + _class_name = "DatabaseBase" + + def __init__(self, db_config): + """ + db_config = { + "key": "test1", + "type": "sqlite", + "path": r"C:\***\wxdump_work\merge_all.db" + } + """ + self.config = db_config + self.pool = self.connect(self.config) + + def execute(self, sql, params=None): + """ + 执行SQL语句 + :param sql: SQL语句 (str) + :param params: 参数 (tuple) + :return: 查询结果 (list) + """ + connection = self.pool.connection() + try: + # connection.text_factory = bytes + cursor = connection.cursor() + if params: + cursor.execute(sql, params) + else: + cursor.execute(sql) + return cursor.fetchall() + except Exception as e1: + try: + connection.text_factory = bytes + cursor = connection.cursor() + if params: + cursor.execute(sql, params) + else: + cursor.execute(sql) + rdata = cursor.fetchall() + connection.text_factory = str + return rdata + except Exception as e2: + db_loger.error(f"{sql=}\n{params=}\n{e1=}\n{e2=}\n", exc_info=True) + return None + + def check_tables_exist(self, required_tables): + """ + 判断该类所需要的表是否存在 + """ + required_tables = required_tables or [] + required_tables_str = "'" + "','".join(required_tables) + "'" + sql = (f"SELECT tbl_name FROM sqlite_master " + f"WHERE type='table' AND tbl_name in ({required_tables_str});") + existing_tables = self.execute(sql) + existing_tables = [row[0] for row in existing_tables] # 将查询结果转换为列表 + # 检查所有必需的表是否都在现有表中 + return all(table in existing_tables for table in required_tables) + + def close(self): + self.pool.close() + db_loger.info(f"关闭数据库 - {self.config}") + + def __del__(self): + self.close() + +# class MsgDb(DatabaseBase): +# +# def p(self, *args, **kwargs): +# sel = "select tbl_name from sqlite_master where type='table'" +# data = self.execute(sel) +# # print([i[0] for i in data]) +# return data +# +# +# class MsgDb1(DatabaseBase): +# _class_name = "MsgDb1" +# +# def p(self, *args, **kwargs): +# sel = "select tbl_name from sqlite_master where type='table'" +# data = self.execute(sel) +# # print([i[0] for i in data]) +# return data +# +# +# if __name__ == '__main__': +# logging.basicConfig(level=logging.INFO, +# style='{', +# datefmt='%Y-%m-%d %H:%M:%S', +# format='[{levelname[0]}] {asctime} [{name}:{levelno}] {pathname}:{lineno} {message}' +# ) +# +# config1 = { +# "key": "test1", +# "type": "sqlite", +# "path": r"D:\e_all.db" +# } +# config2 = { +# "key": "test2", +# "type": "sqlite", +# "path": r"D:\_call.db" +# } +# +# t1 = MsgDb(config1) +# t1.p() +# t2 = MsgDb(config2) +# t2.p() +# t3 = MsgDb1(config1) +# t3.p() +# t4 = MsgDb1(config2) +# t4.p() +# +# print(t4._db_pool) +# # 销毁t1 +# del t1 +# # 销毁t2 +# del t2 +# del t3 +# +# # 销毁t4 +# del t4 +# import time +# time.sleep(1) +# +# t1 = MsgDb(config1) +# t1.p() +# t2 = MsgDb(config2) +# t2.p() +# +# +# print(t2._db_pool) diff --git a/pywxdump/dbpreprocess/export/__init__.py b/pywxdump/db/export/__init__.py similarity index 100% rename from pywxdump/dbpreprocess/export/__init__.py rename to pywxdump/db/export/__init__.py diff --git a/pywxdump/dbpreprocess/export/exportCSV.py b/pywxdump/db/export/exportCSV.py similarity index 79% rename from pywxdump/dbpreprocess/export/exportCSV.py rename to pywxdump/db/export/exportCSV.py index 73c5b93..2ef4258 100644 --- a/pywxdump/dbpreprocess/export/exportCSV.py +++ b/pywxdump/db/export/exportCSV.py @@ -8,18 +8,18 @@ import csv import json import os -from ..parsingMSG import ParsingMSG +from ..dbMSG import MsgHandler -def export_csv(wxid, outpath, msg_path, page_size=5000): +def export_csv(wxid, outpath, db_config, page_size=5000): if not os.path.exists(outpath): outpath = os.path.join(os.getcwd(), "export" + os.sep + wxid) if not os.path.exists(outpath): os.makedirs(outpath) - pmsg = ParsingMSG(msg_path) + pmsg = MsgHandler(db_config) - count = pmsg.msg_count(wxid) + count = pmsg.get_msg_count(wxid) chatCount = count.get(wxid, 0) if chatCount == 0: return False, "没有聊天记录" @@ -29,7 +29,7 @@ def export_csv(wxid, outpath, msg_path, page_size=5000): for i in range(0, chatCount, page_size): start_index = i - data, wxid_list = pmsg.msg_list(wxid, start_index, page_size) + data, wxid_list = pmsg.get_msg_list(wxid, start_index, page_size) if len(data) == 0: return False, "没有聊天记录" @@ -39,7 +39,7 @@ def export_csv(wxid, outpath, msg_path, page_size=5000): with open(save_path, "w", encoding="utf-8", newline='') as f: csv_writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL) - csv_writer.writerow(["id", "MsgSvrID", "type_name", "is_sender", "talker", "room_name", "content", + csv_writer.writerow(["id", "MsgSvrID", "type_name", "is_sender", "talker", "room_name", "msg", "src", "CreateTime"]) for row in data: id = row.get("id", "") @@ -48,11 +48,10 @@ def export_csv(wxid, outpath, msg_path, page_size=5000): is_sender = row.get("is_sender", "") talker = row.get("talker", "") room_name = row.get("room_name", "") - content = row.get("content", "") + msg = row.get("msg", "") + src = row.get("src", "") CreateTime = row.get("CreateTime", "") - - content = json.dumps(content, ensure_ascii=False) - csv_writer.writerow([id, MsgSvrID, type_name, is_sender, talker, room_name, content, CreateTime]) + csv_writer.writerow([id, MsgSvrID, type_name, is_sender, talker, room_name, msg, src, CreateTime]) return True, f"导出成功: {outpath}" diff --git a/pywxdump/dbpreprocess/export/exportJSON.py b/pywxdump/db/export/exportJSON.py similarity index 82% rename from pywxdump/dbpreprocess/export/exportJSON.py rename to pywxdump/db/export/exportJSON.py index f0a4015..68942bc 100644 --- a/pywxdump/dbpreprocess/export/exportJSON.py +++ b/pywxdump/db/export/exportJSON.py @@ -7,18 +7,18 @@ # ------------------------------------------------------------------------------- import json import os -from ..parsingMSG import ParsingMSG +from ..dbMSG import MsgHandler -def export_json(wxid, outpath, msg_path): +def export_json(wxid, outpath, db_config): if not os.path.exists(outpath): outpath = os.path.join(os.getcwd(), "export" + os.sep + wxid) if not os.path.exists(outpath): os.makedirs(outpath) - pmsg = ParsingMSG(msg_path) + pmsg = MsgHandler(db_config) - count = pmsg.msg_count(wxid) + count = pmsg.get_msg_count(wxid) chatCount = count.get(wxid, 0) if chatCount == 0: return False, "没有聊天记录" @@ -26,7 +26,7 @@ def export_json(wxid, outpath, msg_path): page_size = chatCount + 1 for i in range(0, chatCount, page_size): start_index = i - data, wxid_list = pmsg.msg_list(wxid, start_index, page_size) + data, wxid_list = pmsg.get_msg_list(wxid, start_index, page_size) if len(data) == 0: return False, "没有聊天记录" save_path = os.path.join(outpath, f"{wxid}_{i}_{i + page_size}.json") diff --git a/pywxdump/db/utils/__init__.py b/pywxdump/db/utils/__init__.py new file mode 100644 index 0000000..87f9fdf --- /dev/null +++ b/pywxdump/db/utils/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: __init__.py.py +# Description: +# Author: xaoyaoo +# Date: 2024/07/23 +# ------------------------------------------------------------------------------- +from ._loger import db_loger +from .common_utils import timestamp2str, xml2dict, silk2audio, bytes2str, get_md5, name2typeid, typeid2name, \ + type_converter, match_BytesExtra, db_error, download_file,dat2img diff --git a/pywxdump/db/utils/_loger.py b/pywxdump/db/utils/_loger.py new file mode 100644 index 0000000..2a8163b --- /dev/null +++ b/pywxdump/db/utils/_loger.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: _loger.py +# Description: +# Author: xaoyaoo +# Date: 2024/07/23 +# ------------------------------------------------------------------------------- +import logging + +db_loger = logging.getLogger("db_prepare") diff --git a/pywxdump/dbpreprocess/utils.py b/pywxdump/db/utils/common_utils.py similarity index 96% rename from pywxdump/dbpreprocess/utils.py rename to pywxdump/db/utils/common_utils.py index c761111..f68173b 100644 --- a/pywxdump/dbpreprocess/utils.py +++ b/pywxdump/db/utils/common_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*-# # ------------------------------------------------------------------------------- -# Name: utils.py +# Name: common_utils.py # Description: # Author: xaoyaoo # Date: 2024/04/15 @@ -17,6 +17,25 @@ import pysilk import lxml.etree as ET # 这个模块更健壮些,微信XML格式有时有非标格式,会导致xml.etree.ElementTree处理失败 from collections import defaultdict +from ._loger import db_loger + + +def db_error(func): + """ + 错误处理装饰器 + :param func: + :return: + """ + + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + db_loger.error(f"db_error: {e}", exc_info=True) + return None + + return wrapper + def type_converter(type_id_or_name: [str, tuple]): """ diff --git a/pywxdump/dbpreprocess/__init__.py b/pywxdump/dbpreprocess/__init__.py deleted file mode 100644 index 2da6753..0000000 --- a/pywxdump/dbpreprocess/__init__.py +++ /dev/null @@ -1,90 +0,0 @@ -# -*- coding: utf-8 -*-# -# ------------------------------------------------------------------------------- -# Name: __init__.py.py -# Description: -# Author: xaoyaoo -# Date: 2024/04/15 -# ------------------------------------------------------------------------------- -import pandas as pd - -from .parsingFavorite import ParsingFavorite -from .parsingMSG import ParsingMSG -from .parsingMicroMsg import ParsingMicroMsg -from .parsingMediaMSG import ParsingMediaMSG -from .parsingOpenIMContact import ParsingOpenIMContact -from .parsingPublicMsg import ParsingPublicMsg -from .utils import download_file - -from .export.exportCSV import export_csv -from .export.exportJSON import export_json - - -def get_user_list(MicroMsg_db_path, OpenIMContact_db_path=None, word=None): - """ - 获取联系人列表 - :param MicroMsg_db_path: MicroMsg.db 文件路径 - :param OpenIMContact_db_path: OpenIMContact.db 文件路径 - :param word: 搜索关键字 - :return: 联系人列表 - """ - # 连接 MicroMsg.db 数据库,并执行查询 - if not MicroMsg_db_path: - return [] - parsing_micromsg = ParsingMicroMsg(MicroMsg_db_path) - users = parsing_micromsg.user_list(word=word) - # 如果有 OpenIMContact.db 文件,获取 OpenIMContact.db 中的联系人信息 - if OpenIMContact_db_path: - parsing_openimcontact = ParsingOpenIMContact(OpenIMContact_db_path) - users += parsing_openimcontact.user_list(word=word) - # 去重 - # print(users) - unique_users = [dict(t) for t in {tuple(d.items()) for d in users}] - return unique_users - - -def get_recent_user_list(MicroMsg_db_path, OpenIMContact_db_path=None, limit=200): - """ - 获取联系人列表 - :param MicroMsg_db_path: MicroMsg.db 文件路径 - :param OpenIMContact_db_path: OpenIMContact.db 文件路径 - :param limit: 最大数量 - :return: 联系人列表 - """ - # 连接 MicroMsg.db 数据库,并执行查询 - if not MicroMsg_db_path: - return [] - parsing_micromsg = ParsingMicroMsg(MicroMsg_db_path) - recent_users = parsing_micromsg.recent_chat_wxid() # [{"wxid": username, "LastReadedCreateTime": LastReadedCreateTime, "LastReadedSvrId": LastReadedSvrId},] - recent_users = pd.DataFrame(recent_users, columns=["wxid", "LastReadedCreateTime", "LastReadedSvrId"]) - recent_users = recent_users.sort_values(by="LastReadedCreateTime", ascending=False) - recent_users = recent_users.drop_duplicates(subset=["wxid"], keep="first").head(limit) - - users = get_user_list(MicroMsg_db_path, OpenIMContact_db_path) - users = pd.DataFrame(users) - - users = pd.merge(users, recent_users, on="wxid", how="right") - # users = users.drop_duplicates(subset=["wxid"], keep="last") # 保留最新的 - users = users.sort_values(by="LastReadedCreateTime", ascending=False) if not users.empty else users - users = users.drop_duplicates(subset=["wxid"], keep="first") # 保留最新的 - users = users.fillna("") - users = users.to_dict(orient="records") - return users - - -def wxid2userinfo(MicroMsg_db_path, OpenIMContact_db_path, wxid): - """ - 获取联系人信息 - :param MicroMsg_db_path: MicroMsg.db 文件路径 - :param OpenIMContact_db_path: OpenIMContact.db 文件路径 - :param wxid: 微信id,可以是单个id,也可以是多个id,使用list传入 - :return: 联系人信息 {wxid: {wxid: wxid, nickname: nickname, remark: remark, account: account, describe: describe, headImgUrl: headImgUrl}} - """ - # 连接 MicroMsg.db 数据库,并执行查询 - parsing_micromsg = ParsingMicroMsg(MicroMsg_db_path) - users = parsing_micromsg.wxid2userinfo(wxid) - # {'wxid_uw8ruinee7zq12': {'wxid': 'wxid_uw8ruinee7zq12', 'nickname': '2021年', 'remark': '于浩', 'account': 'yh13327404424', 'describe': '', 'headImgUrl': 'https://wx.qlogo.cn/mmhead/ver_1/LLibM2qUys7nBt9Hl8uuTQkn9ILFicoImlt2616ZNGoIvRbA8VmJ0Vibhd3V96JFfxQ25Tj1nRWTsXYDdH3z2FAQkQDXSnjS5PBuSraey4ZnoooOkEu2e3DjXbJaJJXKUib1/0'}} - # 如果有 OpenIMContact.db 文件,获取 OpenIMContact.db 中的联系人信息 - if OpenIMContact_db_path: - parsing_openimcontact = ParsingOpenIMContact(OpenIMContact_db_path) - users.update(parsing_openimcontact.wxid2userinfo(wxid)) - return users diff --git a/pywxdump/dbpreprocess/dbbase.py b/pywxdump/dbpreprocess/dbbase.py deleted file mode 100644 index 035af61..0000000 --- a/pywxdump/dbpreprocess/dbbase.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*-# -# ------------------------------------------------------------------------------- -# Name: dbbase.py -# Description: -# Author: xaoyaoo -# Date: 2024/04/15 -# ------------------------------------------------------------------------------- -import os -import sqlite3 -import logging - - -class DatabaseBase: - _singleton_instances = {} # 使用字典存储不同db_path对应的单例实例 - _connection_pool = {} # 使用字典存储不同db_path对应的连接池 - _class_name = "DatabaseBase" - - def __new__(cls, db_path): - if cls._class_name not in cls._singleton_instances: - cls._singleton_instances[cls._class_name] = super().__new__(cls) - return cls._singleton_instances[cls._class_name] - - def __init__(self, db_path): - self._db_path = db_path - self._db_connection = self._connect_to_database(db_path) - - @classmethod - def _connect_to_database(cls, db_path): - if not os.path.exists(db_path): - raise FileNotFoundError(f"文件不存在: {db_path}") - if db_path in cls._connection_pool and cls._connection_pool[db_path] is not None: - return cls._connection_pool[db_path] - connection = sqlite3.connect(db_path, check_same_thread=False) - logging.info(f"{connection} 连接句柄创建 {db_path}") - return connection - - def execute_sql(self, sql, params=None): - """ - 执行SQL语句 - :param sql: SQL语句 (str) - :param params: 参数 (tuple) - :return: 查询结果 (list) - """ - # 检测数据库连接是否关闭 - if not self._db_connection: - logging.warning(f"重新连接数据库 - {self._db_path}") - self._connect_to_database(self._db_path) - connection = self._db_connection - try: - # connection.text_factory = bytes - cursor = connection.cursor() - if params: - cursor.execute(sql, params) - else: - cursor.execute(sql) - return cursor.fetchall() - except Exception as e1: - try: - connection.text_factory = bytes - cursor = connection.cursor() - if params: - cursor.execute(sql, params) - else: - cursor.execute(sql) - rdata = cursor.fetchall() - connection.text_factory = str - return rdata - except Exception as e2: - logging.error(f"**********\nSQL: {sql}\nparams: {params}\n{e1}\n{e2}\n**********") - return None - - def close_connection(self): - if self._db_connection: - self._db_connection.close() - logging.info(f"关闭数据库 - {self._db_path}") - self._db_connection = None - - def close_all_connection(self): - for db_path in self._connection_pool: - if self._connection_pool[db_path]: - self._connection_pool[db_path].close() - logging.info(f"关闭数据库 - {db_path}") - self._connection_pool[db_path] = None - - def show__singleton_instances(self): - print(self._singleton_instances) - - def __del__(self): - self.close_connection() - # del self._singleton_instances[self._db_path] - - -if __name__ == '__main__': - a = DatabaseBase("test.db") - b = DatabaseBase("test1.db") - - d1 = a.execute_sql("select * from sqlite_master;") - d2 = b.execute_sql("select * from sqlite_master;") - print([i[1] for i in d1]) - print([i[1] for i in d2]) - - a.close_connection() - b.close_connection() diff --git a/pywxdump/dbpreprocess/parsingMicroMsg.py b/pywxdump/dbpreprocess/parsingMicroMsg.py deleted file mode 100644 index 9bc5ab9..0000000 --- a/pywxdump/dbpreprocess/parsingMicroMsg.py +++ /dev/null @@ -1,267 +0,0 @@ -# -*- coding: utf-8 -*-# -# ------------------------------------------------------------------------------- -# Name: parsingMicroMsg.py -# Description: -# Author: xaoyaoo -# Date: 2024/04/15 -# ------------------------------------------------------------------------------- -import logging - -from .dbbase import DatabaseBase -from .utils import timestamp2str, bytes2str - -import blackboxprotobuf - - -class ParsingMicroMsg(DatabaseBase): - _class_name = "MicroMsg" - - def __init__(self, db_path): - super().__init__(db_path) - - def get_BytesExtra(self, BytesExtra): - if BytesExtra is None or not isinstance(BytesExtra, bytes): - return None - try: - deserialize_data, message_type = blackboxprotobuf.decode_message(BytesExtra) - return deserialize_data - except Exception as e: - return None - - def get_ExtraBuf(self, ExtraBuf: bytes): - """ - 读取ExtraBuf(联系人表) - :param ExtraBuf: - :return: - """ - if not ExtraBuf: - return None - try: - buf_dict = { - 'DDF32683': '0', '74752C06': '性别[1男2女]', '88E28FCE': '2', '761A1D2D': '3', '0263A0CB': '4', - '0451FF12': '5', - '228C66A8': '6', '46CF10C4': '个性签名', 'A4D9024A': '国', 'E2EAA8D1': '省', '1D025BBF': '市', - '4D6C4570': '11', - 'F917BCC0': '公司名称', '759378AD': '手机号', '4335DFDD': '14', 'DE4CDAEB': '15', 'A72BC20A': '16', - '069FED52': '17', - '9B0F4299': '18', '3D641E22': '19', '1249822C': '20', '4EB96D85': '企微属性', 'B4F73ACB': '22', - '0959EB92': '23', - '3CF4A315': '24', 'C9477AC60201E44CD0E8': '26', 'B7ACF0F5': '28', '57A7B5A8': '29', - '81AE19B4': '朋友圈背景', - '695F3170': '31', 'FB083DD9': '32', '0240E37F': '33', '315D02A3': '34', '7DEC0BC3': '35', - '0E719F13': '备注图片', - '16791C90': '37' - } - - rdata = {} - for buf_name in buf_dict: - rdata_name = buf_dict[buf_name] - buf_name = bytes.fromhex(buf_name) - offset = ExtraBuf.find(buf_name) - if offset == -1: - rdata[rdata_name] = "" - continue - offset += len(buf_name) - type_id = ExtraBuf[offset: offset + 1] - offset += 1 - - if type_id == b"\x04": - rdata[rdata_name] = int.from_bytes(ExtraBuf[offset: offset + 4], "little") - - elif type_id == b"\x18": - length = int.from_bytes(ExtraBuf[offset: offset + 4], "little") - rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-16").rstrip("\x00") - - elif type_id == b"\x17": - length = int.from_bytes(ExtraBuf[offset: offset + 4], "little") - rdata[rdata_name] = ExtraBuf[offset + 4: offset + 4 + length].decode("utf-8").rstrip("\x00") - - elif type_id == b"\x05": - rdata[rdata_name] = f"0x{ExtraBuf[offset: offset + 8].hex()}" - return rdata - except Exception as e: - print(f'解析错误:\n{e}') - return None - - def ChatRoom_RoomData(self, RoomData): - # 读取群聊数据,主要为 wxid,以及对应昵称 - if RoomData is None or not isinstance(RoomData, bytes): - return None - try: - data = self.get_BytesExtra(RoomData) - bytes2str(data) - return data - except Exception as e: - return None - - def wxid2userinfo(self, wxid): - """ - 获取单个联系人信息 - :param wxid: 微信id,可以是单个id,也可以是id列表 - :return: 联系人信息 - """ - if isinstance(wxid, str): - wxid = [wxid] - elif isinstance(wxid, list): - wxid = wxid - else: - return {} - wxid = "','".join(wxid) - wxid = f"'{wxid}'" - # 获取username是wx_id的用户 - sql = ("SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl,A.LabelIDList " - "FROM Contact A,ContactHeadImgUrl B " - f"WHERE A.UserName = B.usrName AND A.UserName in ({wxid}) " - "ORDER BY NickName ASC;") - result = self.execute_sql(sql) - if not result: - return {} - users = {} - for row in result: - # 获取wxid,昵称,备注,描述,头像 - username, nickname, remark, Alias, describe, headImgUrl, LabelIDList = row - LabelIDList = LabelIDList.split(",") if LabelIDList else [] - users[username] = {"wxid": username, "nickname": nickname, "remark": remark, "account": Alias, - "describe": describe, "headImgUrl": headImgUrl, "LabelIDList": tuple(LabelIDList)} - return users - - def user_list(self, word=None): - """ - 获取联系人列表 - :param word 查询关键字,可以是用户名、昵称、备注、描述,允许拼音 - :return: 联系人列表 - """ - users = [] - sql = ( - "SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl,A.LabelIDList " - "FROM Contact A left join ContactHeadImgUrl B on A.UserName==B.usrName " - "ORDER BY A.NickName DESC;") - if word: - sql = sql.replace("ORDER BY A.NickName DESC;", - f"where " - f"A.UserName LIKE '%{word}%' " - f"OR A.NickName LIKE '%{word}%' " - f"OR A.Remark LIKE '%{word}%' " - f"OR A.Alias LIKE '%{word}%' " - f"OR A.QuanPin LIKE LOWER('%{word}%') " - f"OR LOWER(A.PYInitial) LIKE LOWER('%{word}%') " - # f"OR A.Reserved6 LIKE '%{word}%' " - "ORDER BY A.NickName DESC;") - result = self.execute_sql(sql) - if not result: - return [] - for row in result: - # 获取wxid,昵称,备注,描述,头像,标签 - username, nickname, remark, Alias, describe, headImgUrl, LabelIDList = row - LabelIDList = LabelIDList.split(",") if LabelIDList else [] - users.append( - {"wxid": username, "nickname": nickname, "remark": remark, "account": Alias, - "describe": describe, "headImgUrl": headImgUrl if headImgUrl else "", - "LabelIDList": tuple(LabelIDList)}) - return users - - def user_list_by_label(self, label_id): - """ - 获取标签联系人列表 - :param label_id: 标签id - :return: 标签联系人列表 - """ - users = [] - sql = ( - "SELECT A.UserName, A.NickName, A.Remark,A.Alias,A.Reserved6,B.bigHeadImgUrl,A.LabelIDList " - "FROM Contact A left join ContactHeadImgUrl B on A.UserName==B.usrName " - f"where A.LabelIDList LIKE '%{label_id}%' " - "ORDER BY A.NickName DESC;") - result = self.execute_sql(sql) - if not result: - return [] - for row in result: - # 获取wxid,昵称,备注,描述,头像,标签 - username, nickname, remark, Alias, describe, headImgUrl, LabelIDList = row - LabelIDList = LabelIDList.split(",") if LabelIDList else [] - users.append( - {"wxid": username, "nickname": nickname, "remark": remark, "account": Alias, - "describe": describe, "headImgUrl": headImgUrl if headImgUrl else "", - "LabelIDList": tuple(LabelIDList)}) - return users - - def recent_chat_wxid(self): - """ - 获取最近聊天的联系人 - :return: 最近聊天的联系人 - """ - users = [] - sql = ( - "SELECT C.Username, C.LastReadedCreateTime,C.LastReadedSvrId " - "FROM ChatInfo C WHERE C.LastReadedCreateTime IS NOT NULL AND C.LastReadedCreateTime > 1007911408000 " - "ORDER BY C.LastReadedCreateTime DESC;") - result = self.execute_sql(sql) - if not result: - return [] - for row in result: - # 获取用户名、昵称、备注和聊天记录数量 - username, LastReadedCreateTime, LastReadedSvrId = row - LastReadedCreateTime = timestamp2str(LastReadedCreateTime) if LastReadedCreateTime else None - users.append( - {"wxid": username, "LastReadedCreateTime": LastReadedCreateTime, "LastReadedSvrId": LastReadedSvrId}) - return users - - def chatroom_list(self, roomwxid=None): - """ - 获取群聊列表 - :param MicroMsg_db_path: MicroMsg.db 文件路径 - :return: 群聊列表 - """ - rooms = [] - # 连接 MicroMsg.db 数据库,并执行查询 - sql = ( - "SELECT A.ChatRoomName,A.UserNameList, A.DisplayNameList,A.RoomData, B.Announcement,B.AnnouncementEditor " - "FROM ChatRoom A,ChatRoomInfo B " - "where A.ChatRoomName==B.ChatRoomName " - "ORDER BY A.ChatRoomName ASC;") - if roomwxid: - sql = sql.replace("ORDER BY A.ChatRoomName ASC;", - f"and A.ChatRoomName LIKE '%{roomwxid}%' " - "ORDER BY A.ChatRoomName ASC;") - result = self.execute_sql(sql) - if not result: - return [] - room_datas = [] - for row in result: - # 获取用户名、昵称、备注和聊天记录数量 - ChatRoomName, UserNameList, DisplayNameList, RoomData, Announcement, AnnouncementEditor = row - UserNameList = UserNameList.split("^G") - DisplayNameList = DisplayNameList.split("^G") - RoomData = self.ChatRoom_RoomData(RoomData) - wxid2remark = {} - if RoomData: - rd = [] - for k, v in RoomData.items(): - if isinstance(v, list): - rd += v - for i in rd: - try: - if isinstance(i, dict) and isinstance(i.get('1'), str) and i.get('2'): - wxid2remark[i['1']] = i["2"] - except Exception as e: - logging.error(f"wxid2remark: ChatRoomName:{ChatRoomName}, {i} error:{e}") - rooms.append( - {"ChatRoomName": ChatRoomName, "UserNameList": UserNameList, "DisplayNameList": DisplayNameList, - "Announcement": Announcement, "AnnouncementEditor": AnnouncementEditor, "wxid2remark": wxid2remark}) - return rooms - - def labels_dict(self, id_is_key=True): - """ - 读取标签列表 - :param label_list: - :return: - """ - sql = "SELECT LabelId, LabelName FROM ContactLabel ORDER BY LabelName ASC;" - result = self.execute_sql(sql) - if not result: - return [] - if id_is_key: - labels = {row[0]: row[1] for row in result} - else: - labels = {row[1]: row[0] for row in result} - return labels diff --git a/pywxdump/dbpreprocess/parsingOpenIMContact.py b/pywxdump/dbpreprocess/parsingOpenIMContact.py deleted file mode 100644 index b56ab12..0000000 --- a/pywxdump/dbpreprocess/parsingOpenIMContact.py +++ /dev/null @@ -1,74 +0,0 @@ -# -*- coding: utf-8 -*-# -# ------------------------------------------------------------------------------- -# Name: parsingOpenIMContact.py -# Description: -# Author: xaoyaoo -# Date: 2024/04/16 -# ------------------------------------------------------------------------------- -from .dbbase import DatabaseBase - - -class ParsingOpenIMContact(DatabaseBase): - _class_name = "OpenIMContact" - - def __init__(self, db_path): - super().__init__(db_path) - - def wxid2userinfo(self, wxid): - """ - 获取单个联系人信息 - :param wxid: 微信id - :return: 联系人信息 - """ - if isinstance(wxid, str): - wxid = [wxid] - elif isinstance(wxid, list): - wxid = wxid - else: - return {} - wxid = "','".join(wxid) - wxid = f"'{wxid}'" - # 获取username是wx_id的用户 - sql = ("SELECT A.UserName, A.NickName, A.Remark,A.BigHeadImgUrl " - "FROM OpenIMContact A " - f"WHERE A.UserName in ({wxid}) " - "ORDER BY NickName ASC;") - - result = self.execute_sql(sql) - if not result: - return {} - users = {} - for row in result: - # 获取用户名、昵称、备注和聊天记录数量 - username, nickname, remark, headImgUrl = row - users[username] = {"wxid": username, "nickname": nickname, "remark": remark, "account": "", "describe": "", - "headImgUrl": headImgUrl, "LabelIDList": ()} - return users - - def user_list(self, word=None): - """ - 获取联系人列表 - :param MicroMsg_db_path: MicroMsg.db 文件路径 - :return: 联系人列表 - """ - sql = ("SELECT A.UserName, A.NickName, A.Remark,A.BigHeadImgUrl FROM OpenIMContact A " - "ORDER BY NickName ASC;") - if word: - sql = sql.replace("ORDER BY NickName ASC;", - f"where " - f"UserName LIKE '%{word}%' " - f"OR NickName LIKE '%{word}%' " - f"OR Remark LIKE '%{word}%' " - "ORDER BY NickName ASC;") - result = self.execute_sql(sql) - if not result: - return [] - - users = [] - for row in result: - # 获取用户名、昵称、备注和聊天记录数量 - username, nickname, remark, headImgUrl = row - users.append( - {"wxid": username, "nickname": nickname, "remark": remark, "account": "", "describe": "", - "headImgUrl": headImgUrl, "LabelIDList": ()}) - return users diff --git a/pywxdump/dbpreprocess/parsingPublicMsg.py b/pywxdump/dbpreprocess/parsingPublicMsg.py deleted file mode 100644 index 5c8d915..0000000 --- a/pywxdump/dbpreprocess/parsingPublicMsg.py +++ /dev/null @@ -1,94 +0,0 @@ -# -*- coding: utf-8 -*-# -# ------------------------------------------------------------------------------- -# Name: parsingPublicMsg.py -# Description: -# Author: xaoyaoo -# Date: 2024/07/03 -# ------------------------------------------------------------------------------- - -# -*- coding: utf-8 -*-# -# ------------------------------------------------------------------------------- -# Name: parsingMSG.py -# Description: -# Author: xaoyaoo -# Date: 2024/04/15 -# ------------------------------------------------------------------------------- -import json -import os -import re -from typing import Union, Tuple - -import pandas as pd - -from .dbbase import DatabaseBase -from .parsingMSG import ParsingMSG -from .utils import get_md5, name2typeid, typeid2name, type_converter, timestamp2str, xml2dict, match_BytesExtra -import lz4.block -import blackboxprotobuf - - -class ParsingPublicMsg(ParsingMSG): - _class_name = "PublicMSG" - - def msg_count(self, wxid: str = ""): - """ - 获取聊天记录数量,根据wxid获取单个联系人的聊天记录数量,不传wxid则获取所有联系人的聊天记录数量 - :param MSG_db_path: MSG.db 文件路径 - :return: 聊天记录数量列表 {wxid: chat_count} - """ - if wxid: - sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg WHERE StrTalker='{wxid}';" - else: - sql = f"SELECT StrTalker, COUNT(*) FROM PublicMsg GROUP BY StrTalker ORDER BY COUNT(*) DESC;" - - result = self.execute_sql(sql) - if not result: - return {} - df = pd.DataFrame(result, columns=["wxid", "msg_count"]) - # # 排序 - df = df.sort_values(by="msg_count", ascending=False) - # chat_counts : {wxid: chat_count} - chat_counts = df.set_index("wxid").to_dict()["msg_count"] - return chat_counts - - def msg_count_total(self): - """ - 获取聊天记录总数 - :return: 聊天记录总数 - """ - sql = "SELECT COUNT(*) FROM PublicMsg;" - result = self.execute_sql(sql) - if result and len(result) > 0: - chat_counts = result[0][0] - return chat_counts - return 0 - - - def msg_list(self, wxid="", start_index=0, page_size=500, msg_type: str = ""): - sql = ( - "SELECT localId, IsSender, StrContent, StrTalker, Sequence, Type, SubType, CreateTime, MsgSvrID, " - "DisplayContent, CompressContent, BytesExtra, ROW_NUMBER() OVER (ORDER BY CreateTime ASC) AS id " - "FROM PublicMsg WHERE 1==1 " - "ORDER BY CreateTime ASC LIMIT ?, ?" - ) - params = [start_index, page_size] - if msg_type: - sql = sql.replace("ORDER BY CreateTime ASC LIMIT ?, ?", - f"AND Type=? ORDER BY CreateTime ASC LIMIT ?,?") - params = [msg_type] + params - - if wxid: - sql = sql.replace("WHERE 1==1", f"WHERE StrTalker=? ") - params = [wxid] + params - params = tuple(params) - result1 = self.execute_sql(sql, params) - if not result1: - return [], [] - data = [] - wxid_list = [] - for row in result1: - tmpdata = self.msg_detail(row) - wxid_list.append(tmpdata["talker"]) - data.append(tmpdata) - wxid_list = list(set(wxid_list)) - return data, wxid_list diff --git a/pywxdump/server.py b/pywxdump/server.py index 87fd949..b046460 100644 --- a/pywxdump/server.py +++ b/pywxdump/server.py @@ -9,6 +9,19 @@ import os import subprocess import sys import time +import logging + +server_loger = logging.getLogger("server") + + +def is_port_in_use(_host, _port): + import socket + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + try: + s.bind((_host, _port)) + except socket.error: + return True + return False def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online=False, debug=False, @@ -25,17 +38,17 @@ def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online :param isopenBrowser: 是否自动打开浏览器 :return: """ - tmp_path = os.path.join(os.getcwd(), "wxdump_tmp") # 临时文件夹,用于存放图片等 - if not os.path.exists(tmp_path): - os.makedirs(tmp_path) - print(f"[+] 创建临时文件夹:{tmp_path}") + work_path = os.path.join(os.getcwd(), "wxdump_work") # 临时文件夹,用于存放图片等 + if not os.path.exists(work_path): + os.makedirs(work_path) + server_loger.info(f"[+] 创建临时文件夹:{work_path}") + print(f"[+] 创建临时文件夹:{work_path}") - session_file = os.path.join(tmp_path, "conf.json") # 用于存放各种基础信息 + conf_auto_file = os.path.join(work_path, "conf_auto.json") # 用于存放各种基础信息 from flask import Flask, g from flask_cors import CORS - from pywxdump.api import api, read_session, save_session - import logging + from pywxdump.api import rs_api, ls_api, get_conf, set_conf # 检查端口是否被占用 if online: @@ -49,7 +62,7 @@ def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online app.config['TIMEOUT'] = 1000 app.secret_key = 'secret_key' - app.logger.setLevel(logging.ERROR) + app.logger.setLevel(logging.WARNING) CORS(app, resources={r"/*": {"origins": "*"}}, supports_credentials=True) # 允许所有域名跨域 @@ -63,18 +76,20 @@ def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online @app.before_request def before_request(): + g.work_path = work_path # 临时文件夹,用于存放图片等-新版本 + g.caf = conf_auto_file # 用于存放各种基础信息-新版本 + g.at = "auto_setting" # 用于默认设置-新版本 - g.tmp_path = tmp_path # 临时文件夹,用于存放图片等 - g.sf = session_file # 用于存放各种基础信息 + if merge_path: set_conf(conf_auto_file, g.at, "merge_path", merge_path) + if wx_path: set_conf(conf_auto_file, g.at, "wx_path", wx_path) + if key: set_conf(conf_auto_file, g.at, "key", key) + if my_wxid: set_conf(conf_auto_file, g.at, "my_wxid", my_wxid) + if not os.path.exists(conf_auto_file): + set_conf(conf_auto_file, g.at, "last", my_wxid) - if merge_path: save_session(session_file, "test", "merge_path", merge_path) - if wx_path: save_session(session_file, "test", "wx_path", wx_path) - if key: save_session(session_file, "test", "key", key) - if my_wxid: save_session(session_file, "test", "my_wxid", my_wxid) - if not os.path.exists(session_file): - save_session(session_file, "test", "last", my_wxid) + app.register_blueprint(rs_api) + app.register_blueprint(ls_api) - app.register_blueprint(api) if isopenBrowser: try: # 自动打开浏览器 @@ -87,24 +102,18 @@ def start_falsk(merge_path="", wx_path="", key="", my_wxid="", port=5000, online elif sys.platform.startswith('linux'): # Linux subprocess.call(['xdg-open', url]) else: + server_loger.error(f"Unsupported platform, can't open browser automatically.", exc_info=True) print("Unsupported platform, can't open browser automatically.") except Exception as e: - pass - - def is_port_in_use(host, port): - import socket - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - try: - s.bind((host, port)) - except socket.error: - return True - return False + server_loger.error(f"自动打开浏览器失败:{e}", exc_info=True) if is_port_in_use(host, port): + server_loger.error(f"Port {port} is already in use. Choose a different port.") print(f"Port {port} is already in use. Choose a different port.") input("Press Enter to exit...") else: time.sleep(1) + server_loger.info(f"启动flask服务,host:port:{host}:{port}") print("[+] 请使用浏览器访问 http://127.0.0.1:5000/ 查看聊天记录") app.run(host=host, port=port, debug=debug) diff --git a/pywxdump/wx_info/__init__.py b/pywxdump/wx_core/__init__.py similarity index 64% rename from pywxdump/wx_info/__init__.py rename to pywxdump/wx_core/__init__.py index baf70f1..d153bc5 100644 --- a/pywxdump/wx_info/__init__.py +++ b/pywxdump/wx_core/__init__.py @@ -5,8 +5,7 @@ # Author: xaoyaoo # Date: 2023/08/21 # ------------------------------------------------------------------------------- -from .get_wx_info import read_info, get_wechat_db, get_core_db +from .wx_info import get_wx_info, get_wx_db, get_core_db from .get_bias_addr import BiasAddr from .decryption import batch_decrypt, decrypt -from .merge_db import merge_msg_db, merge_copy_db, merge_media_msg_db, merge_db, decrypt_merge, merge_real_time_db, \ - all_merge_real_time_db +from .merge_db import merge_db, decrypt_merge, merge_real_time_db, all_merge_real_time_db diff --git a/pywxdump/wx_info/ctypes_utils.py b/pywxdump/wx_core/ctypes_utils.py similarity index 100% rename from pywxdump/wx_info/ctypes_utils.py rename to pywxdump/wx_core/ctypes_utils.py diff --git a/pywxdump/wx_info/decryption.py b/pywxdump/wx_core/decryption.py similarity index 87% rename from pywxdump/wx_info/decryption.py rename to pywxdump/wx_core/decryption.py index e913864..da96798 100644 --- a/pywxdump/wx_info/decryption.py +++ b/pywxdump/wx_core/decryption.py @@ -16,9 +16,10 @@ import hashlib import os from typing import Union, List from Cryptodome.Cipher import AES - # from Crypto.Cipher import AES # 如果上面的导入失败,可以尝试使用这个 +from .utils import wx_core_error, wx_core_loger + SQLITE_FILE_HEADER = "SQLite format 3\x00" # SQLite文件头 KEY_SIZE = 32 @@ -26,7 +27,8 @@ DEFAULT_PAGESIZE = 4096 # 通过密钥解密数据库 -def decrypt(key: str, db_path, out_path): +@wx_core_error +def decrypt(key: str, db_path: str, out_path: str): """ 通过密钥解密数据库 :param key: 密钥 64位16进制字符串 @@ -72,11 +74,19 @@ def decrypt(key: str, db_path, out_path): return True, [db_path, out_path, key] - -def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_logging: bool = False): +@wx_core_error +def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_print: bool = False): + """ + 批量解密数据库 + :param key: 密钥 64位16进制字符串 + :param db_path: 待解密的数据库路径(文件或文件夹) + :param out_path: 解密后的数据库输出路径(文件夹) + :param is_logging: 是否打印日志 + :return: (bool, [[input_db_path, output_db_path, key],...]) + """ if not isinstance(key, str) or not isinstance(out_path, str) or not os.path.exists(out_path) or len(key) != 64: error = f"[-] (key:'{key}' or out_path:'{out_path}') Error!" - if is_logging: print(error) + wx_core_loger.error(error, exc_info=True) return False, error process_list = [] @@ -84,7 +94,7 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo if isinstance(db_path, str): if not os.path.exists(db_path): error = f"[-] db_path:'{db_path}' not found!" - if is_logging: print(error) + wx_core_loger.error(error, exc_info=True) return False, error if os.path.isfile(db_path): @@ -104,7 +114,7 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo process_list.append([key, inpath, outpath]) else: error = f"[-] db_path:'{db_path}' Error " - if is_logging: print(error) + wx_core_loger.error(error, exc_info=True) return False, error elif isinstance(db_path, list): @@ -114,9 +124,9 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo for inpath in db_path: if not os.path.exists(inpath): - erreor = f"[-] db_path:'{db_path}' not found!" - if is_logging: print(erreor) - return False, erreor + error = f"[-] db_path:'{db_path}' not found!" + wx_core_loger.error(error, exc_info=True) + return False, error inpath = os.path.normpath(inpath) rel = os.path.relpath(os.path.dirname(inpath), rt_path) @@ -126,7 +136,7 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo process_list.append([key, inpath, outpath]) else: error = f"[-] db_path:'{db_path}' Error " - if is_logging: print(error) + wx_core_loger.error(error, exc_info=True) return False, error result = [] @@ -139,7 +149,7 @@ def batch_decrypt(key: str, db_path: Union[str, List[str]], out_path: str, is_lo if not os.listdir(os.path.join(root, dir)): os.rmdir(os.path.join(root, dir)) - if is_logging: + if is_print: print("=" * 32) success_count = 0 fail_count = 0 diff --git a/pywxdump/wx_info/get_bias_addr.py b/pywxdump/wx_core/get_bias_addr.py similarity index 96% rename from pywxdump/wx_info/get_bias_addr.py rename to pywxdump/wx_core/get_bias_addr.py index 54099d8..84c8816 100644 --- a/pywxdump/wx_info/get_bias_addr.py +++ b/pywxdump/wx_core/get_bias_addr.py @@ -151,7 +151,7 @@ class BiasAddr: return j - module.lpBaseOfDll return 0 - def run(self, logging_path=False, version_list_path=None): + def run(self, logging_path=False, WX_OFFS_PATH=None): if not self.get_process_handle()[0]: return None mobile_bias = self.search_memory_value(self.mobile, self.module_name) @@ -164,11 +164,11 @@ class BiasAddr: rdata = {self.version: [name_bias, account_bias, mobile_bias, 0, key_bias]} - if version_list_path and os.path.exists(version_list_path): - with open(version_list_path, "r", encoding="utf-8") as f: + if WX_OFFS_PATH and os.path.exists(WX_OFFS_PATH): + with open(WX_OFFS_PATH, "r", encoding="utf-8") as f: data = json.load(f) data.update(rdata) - with open(version_list_path, "w", encoding="utf-8") as f: + with open(WX_OFFS_PATH, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=4) if os.path.exists(logging_path) and isinstance(logging_path, str): with open(logging_path, "a", encoding="utf-8") as f: diff --git a/pywxdump/wx_info/memory_search.py b/pywxdump/wx_core/memory_search.py similarity index 100% rename from pywxdump/wx_info/memory_search.py rename to pywxdump/wx_core/memory_search.py diff --git a/pywxdump/wx_core/merge_db.py b/pywxdump/wx_core/merge_db.py new file mode 100644 index 0000000..c9f4f6d --- /dev/null +++ b/pywxdump/wx_core/merge_db.py @@ -0,0 +1,471 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: merge_db.py +# Description: +# Author: xaoyaoo +# Date: 2023/12/03 +# ------------------------------------------------------------------------------- +import logging +import os +import shutil +import sqlite3 +import subprocess +import time +from typing import List + +from .decryption import batch_decrypt +from .wx_info import get_core_db +from .utils import wx_core_loger, wx_core_error + + +@wx_core_error +def execute_sql(connection, sql, params=None): + """ + 执行给定的SQL语句,返回结果。 + 参数: + - connection: SQLite连接 + - sql:要执行的SQL语句 + - params:SQL语句中的参数 + """ + try: + # connection.text_factory = bytes + cursor = connection.cursor() + if params: + cursor.execute(sql, params) + else: + cursor.execute(sql) + return cursor.fetchall() + except Exception as e: + try: + connection.text_factory = bytes + cursor = connection.cursor() + if params: + cursor.execute(sql, params) + else: + cursor.execute(sql) + rdata = cursor.fetchall() + connection.text_factory = str + return rdata + except Exception as e: + wx_core_loger.error(f"**********\nSQL: {sql}\nparams: {params}\n{e}\n**********", exc_info=True) + return None + + +@wx_core_error +def check_create_sync_log(connection): + """ + 检查是否存在表 sync_log,用于记录同步记录,包括微信数据库路径,表名,记录数,同步时间 + :param connection: SQLite连接 + :return: True or False + """ + + out_cursor = connection.cursor() + # 检查是否存在表 sync_log,用于记录同步记录,包括微信数据库路径,表名,记录数,同步时间 + sync_log_status = execute_sql(connection, "SELECT name FROM sqlite_master WHERE type='table' AND name='sync_log'") + if len(sync_log_status) < 1: + # db_path 微信数据库路径,tbl_name 表名,src_count 源数据库记录数,current_count 当前合并后的数据库对应表记录数 + sync_record_create_sql = ("CREATE TABLE sync_log (" + "id INTEGER PRIMARY KEY AUTOINCREMENT," + "db_path TEXT NOT NULL," + "tbl_name TEXT NOT NULL," + "src_count INT," + "current_count INT," + "createTime INT DEFAULT (strftime('%s', 'now')), " + "updateTime INT DEFAULT (strftime('%s', 'now'))" + ");") + out_cursor.execute(sync_record_create_sql) + # 创建索引 + out_cursor.execute("CREATE INDEX idx_sync_log_db_path ON sync_log (db_path);") + out_cursor.execute("CREATE INDEX idx_sync_log_tbl_name ON sync_log (tbl_name);") + # 创建联合索引,防止重复 + out_cursor.execute("CREATE UNIQUE INDEX idx_sync_log_db_tbl ON sync_log (db_path, tbl_name);") + connection.commit() + out_cursor.close() + return True + + +@wx_core_error +def check_create_file_md5(connection): + """ + 检查是否存在表 file_md5,用于记录文件信息,后续用于去重等操作,暂时闲置 + """ + pass + + +@wx_core_error +def merge_db(db_paths: list[dict], save_path: str = "merge.db", is_merge_data: bool = True, + startCreateTime: int = 0, endCreateTime: int = 0): + """ + 合并数据库 会忽略主键以及重复的行。 + :param db_paths: [{"db_path": "xxx", "de_path": "xxx"},...] + db_path表示初始路径,de_path表示解密后的路径;初始路径用于保存合并的日志情况,解密后的路径用于读取数据 + :param save_path: str 输出文件路径 + :param is_merge_data: bool 是否合并数据(如果为False,则只解密,并创建表,不插入数据) + :param startCreateTime: 开始时间戳 主要用于MSG数据库的合并 + :param endCreateTime: 结束时间戳 主要用于MSG数据库的合并 + :return: + """ + if os.path.isdir(save_path): + save_path = os.path.join(save_path, f"merge_{int(time.time())}.db") + + if isinstance(db_paths, list): + # alias, file_path + databases = {f"MSG{i}": (db['db_path'], + db.get('de_path', db['db_path']) + ) for i, db in enumerate(db_paths) + } + else: + raise TypeError("db_paths 类型错误") + + outdb = sqlite3.connect(save_path) + + is_sync_log = check_create_sync_log(outdb) + if not is_sync_log: + wx_core_loger.warning("创建同步记录表失败") + + out_cursor = outdb.cursor() + + # 将MSG_db_paths中的数据合并到out_db_path中 + for alias, db in databases.items(): + db_path = db[0] + de_path = db[1] + + # 附加数据库 + sql_attach = f"ATTACH DATABASE '{de_path}' AS {alias}" + out_cursor.execute(sql_attach) + outdb.commit() + sql_query_tbl_name = f"SELECT name FROM {alias}.sqlite_master WHERE type='table' ORDER BY name;" + tables = execute_sql(outdb, sql_query_tbl_name) + for table in tables: + table = table[0] + if table == "sqlite_sequence": + continue + # 获取表中的字段名 + sql_query_columns = f"PRAGMA table_info({table})" + columns = execute_sql(outdb, sql_query_columns) + col_type = { + (i[1] if isinstance(i[1], str) else i[1].decode(), + i[2] if isinstance(i[2], str) else i[2].decode()) + for i in columns} + columns = [i[0] for i in col_type] + if not columns or len(columns) < 1: + continue + # 创建表table + sql_create_tbl = f"CREATE TABLE IF NOT EXISTS {table} AS SELECT * FROM {alias}.{table} WHERE 0 = 1;" + out_cursor.execute(sql_create_tbl) + # 创建包含 NULL 值比较的 UNIQUE 索引 + index_name = f"{table}_unique_index" + coalesce_columns = ','.join(f"COALESCE({column}, '')" for column in columns) + sql = f"CREATE UNIQUE INDEX IF NOT EXISTS {index_name} ON {table} ({coalesce_columns})" + out_cursor.execute(sql) + + # 插入sync_log + sql_query_sync_log = f"SELECT src_count FROM sync_log WHERE db_path=? AND tbl_name=?" + sync_log = execute_sql(outdb, sql_query_sync_log, (db_path, table)) + if not sync_log or len(sync_log) < 1: + sql_insert_sync_log = "INSERT INTO sync_log (db_path, tbl_name, src_count, current_count) VALUES (?, ?, ?, ?)" + out_cursor.execute(sql_insert_sync_log, (db_path, table, 0, 0)) + outdb.commit() + + if is_merge_data: + # 比较源数据库和合并后的数据库记录数 + log_src_count = execute_sql(outdb, sql_query_sync_log, (db_path, table))[0][0] + src_count = execute_sql(outdb, f"SELECT COUNT(*) FROM {alias}.{table}")[0][0] + if src_count <= log_src_count: + wx_core_loger.info(f"忽略 {db_path} {de_path} {table} {src_count} {log_src_count}") + continue + + # 构建数据查询sql + sql_base = f"SELECT {','.join([i for i in columns])} FROM {alias}.{table} " + where_clauses, params = [], [] + if "CreateTime" in columns: + if startCreateTime > 0: + where_clauses.append("CreateTime > ?") + params.append(startCreateTime) + if endCreateTime > 0: + where_clauses.append("CreateTime < ?") + params.append(endCreateTime) + # 如果有WHERE子句,将其添加到SQL语句中,并添加ORDER BY子句 + sql = f"{sql_base} WHERE {' AND '.join(where_clauses)} ORDER BY CreateTime" if where_clauses else sql_base + src_data = execute_sql(outdb, sql, tuple(params)) + if not src_data or len(src_data) < 1: + continue + # 插入数据 + sql = f"INSERT OR IGNORE INTO {table} ({','.join([i for i in columns])}) VALUES ({','.join(['?'] * len(columns))})" + try: + out_cursor.executemany(sql, src_data) + + # update sync_log + sql_update_sync_log = ("UPDATE sync_log " + "SET src_count = ? ," + f"current_count=(SELECT COUNT(*) FROM {table}) " + "WHERE db_path=? AND tbl_name=?") + out_cursor.execute(sql_update_sync_log, (src_count, db_path, table)) + + except Exception as e: + wx_core_loger.error( + f"error: {db_path}\n{de_path}\n{table}\n{sql}\n{src_data}\n{len(src_data)}\n{e}\n", + exc_info=True) + # 分离数据库 + sql_detach = f"DETACH DATABASE {alias}" + out_cursor.execute(sql_detach) + outdb.commit() + out_cursor.close() + outdb.close() + return save_path + + +# @wx_core_error +# def merge_db1(db_paths: list[dict], save_path: str = "merge.db", is_merge_data: bool = True, +# startCreateTime: int = 0, endCreateTime: int = 0): +# """ +# 合并数据库 会忽略主键以及重复的行。 +# :param db_paths: [{"db_path": "xxx", "de_path": "xxx"},...] +# db_path表示初始路径,de_path表示解密后的路径;初始路径用于保存合并的日志情况,解密后的路径用于读取数据 +# :param save_path: str 输出文件路径 +# :param is_merge_data: bool 是否合并数据(如果为False,则只解密,并创建表,不插入数据) +# :param startCreateTime: 开始时间戳 主要用于MSG数据库的合并 +# :param endCreateTime: 结束时间戳 主要用于MSG数据库的合并 +# :return: +# """ +# if os.path.isdir(save_path): +# save_path = os.path.join(save_path, f"merge_{int(time.time())}.db") +# +# if isinstance(db_paths, list): +# # alias, file_path +# databases = {f"MSG{i}": (db['db_path'], +# db.get('de_path', db['db_path']) +# ) for i, db in enumerate(db_paths) +# } +# else: +# raise TypeError("db_paths 类型错误") +# +# from sqlalchemy import create_engine, MetaData, Table, select, insert, Column, UniqueConstraint +# from sqlalchemy.orm import sessionmaker +# from sqlalchemy import inspect, PrimaryKeyConstraint +# +# outdb = create_engine(f"sqlite:///{save_path}", echo=False) +# +# # 创建Session实例 +# Session = sessionmaker() +# Session.configure(bind=outdb) +# session = Session() +# +# # 将MSG_db_paths中的数据合并到out_db_path中 +# for alias, db in databases.items(): +# db_path = db[0] +# de_path = db[1] +# +# db_engine = create_engine(f"sqlite:///{de_path}", echo=False) +# +# # 反射源数据库的表结构 +# metadata = MetaData() +# metadata.reflect(bind=db_engine) +# +# # 创建表 +# outdb_metadata = MetaData() +# inspector = inspect(db_engine) +# table_names = [i for i in inspector.get_table_names() if i not in ["sqlite_sequence"]] +# for table_name in table_names: +# # 创建表table +# columns_list_dict = inspector.get_columns(table_name) +# col_names = [i['name'] for i in columns_list_dict] +# columns = [Column(i['name'], i['type'], primary_key=False) for i in columns_list_dict] +# table = Table(table_name, outdb_metadata, *columns) +# if len(columns) > 1: # 联合索引 +# unique_constraint = UniqueConstraint(*col_names, name=f"{table_name}_unique_index") +# table.append_constraint(unique_constraint) +# else: +# table.append_constraint(PrimaryKeyConstraint(*col_names)) +# table.create(outdb, checkfirst=True) +# +# # 将源数据库中的数据插入目标数据库 +# outdb_metadata = MetaData() +# for table_name in metadata.tables: +# source_table = Table(table_name, metadata, autoload_with=db_engine) +# outdb_table = Table(table_name, outdb_metadata, autoload_with=outdb) +# +# # 查询源表中的所有数据 +# query = select(source_table) +# with db_engine.connect() as connection: +# result = connection.execute(query).fetchall() +# +# # 插入到目标表中 +# for row in result: +# row_data = row._asdict() +# +# # 尝试将所有文本数据转换为 UTF-8 +# for key, value in row_data.items(): +# if isinstance(value, str): +# row_data[key] = value.encode("utf-8") +# +# insert_stmt = insert(outdb_table).values(row_data) +# try: +# session.execute(insert_stmt) +# except Exception as e: +# pass +# db_engine.dispose() +# +# # 提交事务 +# session.commit() +# # 关闭Session +# session.close() +# outdb.dispose() +# return save_path + +@wx_core_error +def decrypt_merge(wx_path: str, key: str, outpath: str = "", + merge_save_path: str = None, + is_merge_data=True, is_del_decrypted: bool = True, + startCreateTime: int = 0, endCreateTime: int = 0, + db_type=None) -> (bool, str): + """ + 解密合并数据库 msg.db, microMsg.db, media.db,注意:会删除原数据库 + :param wx_path: 微信路径 eg: C:\\*******\\WeChat Files\\wxid_********* + :param key: 解密密钥 + :param outpath: 输出路径 + :param merge_save_path: 合并后的数据库路径 + :param is_merge_data: 是否合并数据(如果为False,则只解密,并创建表,不插入数据) + :param is_del_decrypted: 是否删除解密后的数据库(除了合并后的数据库) + :param startCreateTime: 开始时间戳 主要用于MSG数据库的合并 + :param endCreateTime: 结束时间戳 主要用于MSG数据库的合并 + :param db_type: 数据库类型,从核心数据库中选择 + :return: (true,解密后的数据库路径) or (false,错误信息) + """ + if db_type is None: + db_type = [] + + outpath = outpath if outpath else "decrypt_merge_tmp" + merge_save_path = os.path.join(outpath, + f"merge_{int(time.time())}.db") if merge_save_path is None else merge_save_path + decrypted_path = os.path.join(outpath, "decrypted") + + if not wx_path or not key or not os.path.exists(wx_path): + wx_core_loger.error("参数错误", exc_info=True) + return False, "参数错误" + + # 解密 + code, wxdbpaths = get_core_db(wx_path, db_type) + if not code: + wx_core_loger.error(f"获取数据库路径失败{wxdbpaths}", exc_info=True) + return False, wxdbpaths + + # 判断out_path是否为空目录 + if os.path.exists(decrypted_path) and os.listdir(decrypted_path): + for root, dirs, files in os.walk(decrypted_path, topdown=False): + for name in files: + os.remove(os.path.join(root, name)) + for name in dirs: + os.rmdir(os.path.join(root, name)) + + if not os.path.exists(decrypted_path): + os.makedirs(decrypted_path) + + wxdbpaths = {i["db_path"]: i for i in wxdbpaths} + + # 调用 decrypt 函数,并传入参数 # 解密 + code, ret = batch_decrypt(key=key, db_path=list(wxdbpaths.keys()), out_path=decrypted_path, is_print=False) + if not code: + wx_core_loger.error(f"解密失败{ret}", exc_info=True) + return False, ret + + out_dbs = [] + for code1, ret1 in ret: + if code1: + out_dbs.append(ret1) + + parpare_merge_db_path = [] + for db_path, out_path, _ in out_dbs: + parpare_merge_db_path.append({"db_path": db_path, "de_path": out_path}) + merge_save_path = merge_db(parpare_merge_db_path, merge_save_path, is_merge_data=is_merge_data, + startCreateTime=startCreateTime, endCreateTime=endCreateTime) + if is_del_decrypted: + shutil.rmtree(decrypted_path, True) + if isinstance(merge_save_path, str): + return True, merge_save_path + else: + return False, "未知错误" + + +@wx_core_error +def merge_real_time_db(key, merge_path: str, db_paths: [dict] or dict): + """ + 合并实时数据库消息,暂时只支持64位系统 + :param key: 解密密钥 + :param db_paths: [dict] or dict eg: {'wxid': 'wxid_***', 'db_type': 'MicroMsg', + 'db_path': 'C:\**\wxid_***\Msg\MicroMsg.db', 'wxid_dir': 'C:\***\wxid_***'} + :param merge_path: 合并后的数据库路径 + :return: + """ + try: + import platform + except: + raise ImportError("未找到模块 platform") + # 判断系统位数是否为64位,如果不是则抛出异常 + if platform.architecture()[0] != '64bit': + raise Exception("System is not 64-bit.") + + if isinstance(db_paths, dict): + db_paths = [db_paths] + + merge_path = os.path.abspath(merge_path) # 合并后的数据库路径,必须为绝对路径 + merge_path_base = os.path.dirname(merge_path) # 合并后的数据库路径 + if not os.path.exists(merge_path_base): + os.makedirs(merge_path_base) + + endbs = [] + + for db_info in db_paths: + db_path = os.path.abspath(db_info['db_path']) + if not os.path.exists(db_path): + # raise FileNotFoundError("数据库不存在") + continue + if "MSG" not in db_path and "MicroMsg" not in db_path and "MediaMSG" not in db_path: + # raise FileNotFoundError("数据库不是消息数据库") # MicroMsg实时数据库 + continue + endbs.append(os.path.abspath(db_path)) + endbs = '" "'.join(list(set(endbs))) + + # 获取当前文件夹路径 + current_path = os.path.dirname(__file__) + real_time_exe_path = os.path.join(current_path, "tools", "realTime.exe") + + # 调用cmd命令 + cmd = f'{real_time_exe_path} "{key}" "{merge_path}" "{endbs}"' + # os.system(cmd) + p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=merge_path_base, + creationflags=subprocess.CREATE_NO_WINDOW) + # p.communicate() + # 查看返回值 + out, err = p.communicate() + if out and out.decode("utf-8").find("SUCCESS") >= 0: + wx_core_loger.info(f"合并实时数据库成功{out}") + return True, merge_path + if err: + wx_core_loger.error(f"合并实时数据库失败\n{out}\n{err}") + return False, err + + +@wx_core_error +def all_merge_real_time_db(key, wx_path, merge_path: str): + """ + 合并所有实时数据库 + 注:这是全量合并,会有可能产生重复数据,需要自行去重 + :param key: 解密密钥 + :param wx_path: 微信路径 + :param merge_path: 合并后的数据库路径 eg: C:\\*******\\WeChat Files\\wxid_*********\\merge.db + :return: + """ + if not merge_path or not key or not wx_path or not wx_path: + return False, "msg_path or media_path or wx_path or key is required" + try: + from pywxdump import get_core_db + except ImportError: + return False, "未找到模块 pywxdump" + + db_paths = get_core_db(wx_path, ["MediaMSG", "MSG", "MicroMsg"]) + if not db_paths[0]: + return False, db_paths[1] + db_paths = db_paths[1] + merge_real_time_db(key=key, merge_path=merge_path, db_paths=db_paths) + return True, merge_path diff --git a/pywxdump/wx_core/tools/libcrypto-1_1-x64.dll b/pywxdump/wx_core/tools/libcrypto-1_1-x64.dll new file mode 100644 index 0000000..e69de29 diff --git a/pywxdump/wx_info/tools/realTime.exe b/pywxdump/wx_core/tools/realTime.exe similarity index 73% rename from pywxdump/wx_info/tools/realTime.exe rename to pywxdump/wx_core/tools/realTime.exe index 1246e3a..2295d69 100644 Binary files a/pywxdump/wx_info/tools/realTime.exe and b/pywxdump/wx_core/tools/realTime.exe differ diff --git a/pywxdump/wx_core/utils/__init__.py b/pywxdump/wx_core/utils/__init__.py new file mode 100644 index 0000000..4611de6 --- /dev/null +++ b/pywxdump/wx_core/utils/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: __init__.py.py +# Description: +# Author: xaoyaoo +# Date: 2024/07/23 +# ------------------------------------------------------------------------------- + +from .common_utils import verify_key, get_exe_version, get_exe_bit, wx_core_error +from .ctypes_utils import get_process_list, get_memory_maps, get_process_exe_path, \ + get_file_version_info +from .memory_search import search_memory +from ._loger import wx_core_loger + +DB_TYPE_CORE = ["MSG", "MediaMSG", "MicroMsg", "OpenIMContact", "OpenIMMedia", "OpenIMMsg", "Favorite", "PublicMsg"] diff --git a/pywxdump/wx_core/utils/_loger.py b/pywxdump/wx_core/utils/_loger.py new file mode 100644 index 0000000..2ee429e --- /dev/null +++ b/pywxdump/wx_core/utils/_loger.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: _loger.py +# Description: +# Author: xaoyaoo +# Date: 2024/07/23 +# ------------------------------------------------------------------------------- +import logging + +wx_core_loger = logging.getLogger("wx_core") diff --git a/pywxdump/wx_info/utils.py b/pywxdump/wx_core/utils/common_utils.py similarity index 87% rename from pywxdump/wx_info/utils.py rename to pywxdump/wx_core/utils/common_utils.py index 06102f3..c49b04f 100644 --- a/pywxdump/wx_info/utils.py +++ b/pywxdump/wx_core/utils/common_utils.py @@ -11,12 +11,20 @@ import hmac import sys import traceback import hashlib +<<<<<<< HEAD:pywxdump/wx_info/utils.py +======= +from ._loger import wx_core_loger +>>>>>>> ta:pywxdump/wx_core/utils/common_utils.py if sys.platform == "win32": from win32com.client import Dispatch else: Dispatch = None +<<<<<<< HEAD:pywxdump/wx_info/utils.py def info_error(func): +======= +def wx_core_error(func): +>>>>>>> ta:pywxdump/wx_core/utils/common_utils.py """ 错误处理装饰器 :param func: @@ -26,10 +34,15 @@ def info_error(func): try: return func(*args, **kwargs) except Exception as e: +<<<<<<< HEAD:pywxdump/wx_info/utils.py traceback_data = traceback.format_exc() rdata = f"{traceback_data}" print(f"info_error: \n{rdata}") return "None" +======= + wx_core_loger.error(f"wx_core_error: {e}", exc_info=True) + return None +>>>>>>> ta:pywxdump/wx_core/utils/common_utils.py return wrapper @@ -53,7 +66,7 @@ def verify_key(key, wx_db_path): return False return True -@info_error +@wx_core_error def get_exe_version(file_path): """ 获取 PE 文件的版本号 diff --git a/pywxdump/wx_core/utils/ctypes_utils.py b/pywxdump/wx_core/utils/ctypes_utils.py new file mode 100644 index 0000000..a633ab4 --- /dev/null +++ b/pywxdump/wx_core/utils/ctypes_utils.py @@ -0,0 +1,264 @@ +import ctypes +import ctypes.wintypes +from collections import namedtuple + +# 定义必要的常量 +TH32CS_SNAPPROCESS = 0x00000002 +MAX_PATH = 260 +PROCESS_QUERY_INFORMATION = 0x0400 +PROCESS_VM_READ = 0x0010 + + +# MEMORY_BASIC_INFORMATION 结构体定义 +class MEMORY_BASIC_INFORMATION(ctypes.Structure): + _fields_ = [ + ('BaseAddress', ctypes.wintypes.LPVOID), + ('AllocationBase', ctypes.wintypes.LPVOID), + ('AllocationProtect', ctypes.wintypes.DWORD), + ('RegionSize', ctypes.c_size_t), + ('State', ctypes.wintypes.DWORD), + ('Protect', ctypes.wintypes.DWORD), + ('Type', ctypes.wintypes.DWORD) + ] + + +class MODULEINFO(ctypes.Structure): + _fields_ = [ + ("lpBaseOfDll", ctypes.c_void_p), # remote pointer + ("SizeOfImage", ctypes.c_ulong), + ("EntryPoint", ctypes.c_void_p), # remote pointer + ] + + +# 定义PROCESSENTRY32结构 +class PROCESSENTRY32(ctypes.Structure): + _fields_ = [("dwSize", ctypes.wintypes.DWORD), + ("cntUsage", ctypes.wintypes.DWORD), + ("th32ProcessID", ctypes.wintypes.DWORD), + ("th32DefaultHeapID", ctypes.POINTER(ctypes.wintypes.ULONG)), + ("th32ModuleID", ctypes.wintypes.DWORD), + ("cntThreads", ctypes.wintypes.DWORD), + ("th32ParentProcessID", ctypes.wintypes.DWORD), + ("pcPriClassBase", ctypes.wintypes.LONG), + ("dwFlags", ctypes.wintypes.DWORD), + ("szExeFile", ctypes.c_char * MAX_PATH)] + + +class VS_FIXEDFILEINFO(ctypes.Structure): + _fields_ = [ + ('dwSignature', ctypes.wintypes.DWORD), + ('dwStrucVersion', ctypes.wintypes.DWORD), + ('dwFileVersionMS', ctypes.wintypes.DWORD), + ('dwFileVersionLS', ctypes.wintypes.DWORD), + ('dwProductVersionMS', ctypes.wintypes.DWORD), + ('dwProductVersionLS', ctypes.wintypes.DWORD), + ('dwFileFlagsMask', ctypes.wintypes.DWORD), + ('dwFileFlags', ctypes.wintypes.DWORD), + ('dwFileOS', ctypes.wintypes.DWORD), + ('dwFileType', ctypes.wintypes.DWORD), + ('dwFileSubtype', ctypes.wintypes.DWORD), + ('dwFileDateMS', ctypes.wintypes.DWORD), + ('dwFileDateLS', ctypes.wintypes.DWORD), + ] + + +# 加载dll +kernel32 = ctypes.WinDLL('kernel32', use_last_error=True) +psapi = ctypes.WinDLL('psapi', use_last_error=True) +version = ctypes.WinDLL('version', use_last_error=True) + +# 创建进程快照 +CreateToolhelp32Snapshot = kernel32.CreateToolhelp32Snapshot +CreateToolhelp32Snapshot.argtypes = [ctypes.wintypes.DWORD, ctypes.wintypes.DWORD] +CreateToolhelp32Snapshot.restype = ctypes.wintypes.HANDLE + +# 获取第一个进程 +Process32First = kernel32.Process32First +Process32First.argtypes = [ctypes.wintypes.HANDLE, ctypes.POINTER(PROCESSENTRY32)] +Process32First.restype = ctypes.wintypes.BOOL + +# 获取下一个进程 +Process32Next = kernel32.Process32Next +Process32Next.argtypes = [ctypes.wintypes.HANDLE, ctypes.POINTER(PROCESSENTRY32)] +Process32Next.restype = ctypes.wintypes.BOOL + +# 关闭句柄 +CloseHandle = kernel32.CloseHandle +CloseHandle.argtypes = [ctypes.wintypes.HANDLE] +CloseHandle.restype = ctypes.wintypes.BOOL + +# 打开进程 +OpenProcess = kernel32.OpenProcess +OpenProcess.argtypes = [ctypes.wintypes.DWORD, ctypes.wintypes.BOOL, ctypes.wintypes.DWORD] +OpenProcess.restype = ctypes.wintypes.HANDLE + +# 获取模块文件名 +GetModuleFileNameEx = psapi.GetModuleFileNameExA +GetModuleFileNameEx.argtypes = [ctypes.wintypes.HANDLE, ctypes.wintypes.HANDLE, ctypes.c_char_p, ctypes.wintypes.DWORD] +GetModuleFileNameEx.restype = ctypes.wintypes.DWORD + +# 获取文件版本信息大小 +GetFileVersionInfoSizeW = version.GetFileVersionInfoSizeW +GetFileVersionInfoSizeW.argtypes = [ctypes.wintypes.LPCWSTR, ctypes.POINTER(ctypes.wintypes.DWORD)] +GetFileVersionInfoSizeW.restype = ctypes.wintypes.DWORD + +# 获取文件版本信息 +GetFileVersionInfoW = version.GetFileVersionInfoW +GetFileVersionInfoW.argtypes = [ctypes.wintypes.LPCWSTR, ctypes.wintypes.DWORD, ctypes.wintypes.DWORD, ctypes.c_void_p] +GetFileVersionInfoW.restype = ctypes.wintypes.BOOL + +# 查询文件版本信息 +VerQueryValueW = version.VerQueryValueW +VerQueryValueW.argtypes = [ctypes.c_void_p, ctypes.wintypes.LPCWSTR, ctypes.POINTER(ctypes.c_void_p), + ctypes.POINTER(ctypes.wintypes.UINT)] +VerQueryValueW.restype = ctypes.wintypes.BOOL + +# 获取模块信息 +GetModuleInformation = psapi.GetModuleInformation +GetModuleInformation.argtypes = [ctypes.wintypes.HANDLE, ctypes.wintypes.HMODULE, ctypes.POINTER(MODULEINFO), + ctypes.wintypes.DWORD] +GetModuleInformation.restype = ctypes.c_bool + +# 读取进程内存 +ReadProcessMemory = ctypes.windll.kernel32.ReadProcessMemory + +# 定义VirtualQueryEx函数 +VirtualQueryEx = kernel32.VirtualQueryEx +VirtualQueryEx.argtypes = [ctypes.wintypes.HANDLE, ctypes.wintypes.LPCVOID, ctypes.POINTER(MEMORY_BASIC_INFORMATION), + ctypes.c_size_t] +VirtualQueryEx.restype = ctypes.c_size_t + +# 获取映射文件名 +GetMappedFileName = psapi.GetMappedFileNameA +GetMappedFileName.argtypes = [ctypes.wintypes.HANDLE, ctypes.wintypes.LPVOID, ctypes.c_char_p, ctypes.wintypes.DWORD] +GetMappedFileName.restype = ctypes.wintypes.DWORD + +GetMappedFileNameW = psapi.GetMappedFileNameW +GetMappedFileNameW.restype = ctypes.wintypes.DWORD +GetMappedFileNameW.argtypes = [ctypes.wintypes.HANDLE, ctypes.c_void_p, ctypes.wintypes.LPWSTR, ctypes.wintypes.DWORD] + + +def get_memory_maps(pid): + # 打开进程 + access = PROCESS_QUERY_INFORMATION | PROCESS_VM_READ + hProcess = OpenProcess(access, False, pid) + if not hProcess: + return [] + + memory_maps = [] + base_address = 0 + mbi = MEMORY_BASIC_INFORMATION() + max_address = 0x7FFFFFFFFFFFFFFF # 64位系统的最大地址 + + while base_address < max_address: + if VirtualQueryEx(hProcess, base_address, ctypes.byref(mbi), ctypes.sizeof(mbi)) == 0: + break + + mapped_file_name = ctypes.create_unicode_buffer(ctypes.wintypes.MAX_PATH) + if GetMappedFileNameW(hProcess, base_address, mapped_file_name, ctypes.wintypes.MAX_PATH) > 0: + file_name = mapped_file_name.value + else: + file_name = None + + # module_info = MODULEINFO() + # if GetModuleInformation(hProcess, mbi.BaseAddress, ctypes.byref(module_info), ctypes.sizeof(module_info)): + # file_name = get_file_version_info(module_info.lpBaseOfDll) + + memory_maps.append({ + 'BaseAddress': mbi.BaseAddress, + 'RegionSize': mbi.RegionSize, + 'State': mbi.State, + 'Protect': mbi.Protect, + 'Type': mbi.Type, + 'FileName': file_name + }) + + base_address += mbi.RegionSize + + CloseHandle(hProcess) + MemMap = namedtuple('MemMap', ['BaseAddress', 'RegionSize', 'State', 'Protect', 'Type', 'FileName']) + return [MemMap(**m) for m in memory_maps] + + +def get_process_exe_path(process_id): + h_process = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, process_id) + if not h_process: + return None + exe_path = ctypes.create_string_buffer(MAX_PATH) + if GetModuleFileNameEx(h_process, None, exe_path, MAX_PATH) > 0: + CloseHandle(h_process) + return exe_path.value.decode('utf-8', errors='ignore') + else: + CloseHandle(h_process) + return None + + +def get_file_version_info(file_path): + size = GetFileVersionInfoSizeW(file_path, None) + if size == 0: + return None + res = ctypes.create_string_buffer(size) + if not GetFileVersionInfoW(file_path, 0, size, res): + return None + + uLen = ctypes.wintypes.UINT() + lplpBuffer = ctypes.c_void_p() + + if not VerQueryValueW(res, r'\\', ctypes.byref(lplpBuffer), ctypes.byref(uLen)): + return None + + ffi = ctypes.cast(lplpBuffer, ctypes.POINTER(VS_FIXEDFILEINFO)).contents + + if ffi.dwSignature != 0xFEEF04BD: + return None + + version = ( + (ffi.dwFileVersionMS >> 16) & 0xffff, + ffi.dwFileVersionMS & 0xffff, + (ffi.dwFileVersionLS >> 16) & 0xffff, + ffi.dwFileVersionLS & 0xffff, + ) + # f"{version[0]}.{version[1]}.{version[2]}.{version[3]}" + return f"{version[0]}.{version[1]}.{version[2]}.{version[3]}" + + +def get_process_list(): + h_process_snap = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0) + if h_process_snap == ctypes.wintypes.HANDLE(-1).value: + print("Failed to create snapshot") + return [] + + pe32 = PROCESSENTRY32() + pe32.dwSize = ctypes.sizeof(PROCESSENTRY32) + process_list = [] + + if not Process32First(h_process_snap, ctypes.byref(pe32)): + print("Failed to get first process") + CloseHandle(h_process_snap) + return [] + + while True: + # process_path = get_process_exe_path(pe32.th32ProcessID) + process_list.append((pe32.th32ProcessID, pe32.szExeFile.decode('utf-8', errors='ignore'))) + if not Process32Next(h_process_snap, ctypes.byref(pe32)): + break + + CloseHandle(h_process_snap) + return process_list + + +if __name__ == "__main__": + processes = get_process_list() + for pid, name in processes: + if name == "WeChat.exe": + # print(f"PID: {pid}, Process Name: {name}, Exe Path: {path}") + # Handle = ctypes.windll.kernel32.OpenProcess(0x1F0FFF, False, pid) + # wechat_base_address = 0 + memory_maps = get_memory_maps(pid) + for module in memory_maps: + if module.FileName and 'WeChatWin.dll' in module.FileName: + print(module.BaseAddress) + print(module.FileName) + break + # print(wechat_base_address) + # get_info_with_key(Handle, key_baseaddr, addrLen) diff --git a/pywxdump/wx_core/utils/memory_search.py b/pywxdump/wx_core/utils/memory_search.py new file mode 100644 index 0000000..e39a583 --- /dev/null +++ b/pywxdump/wx_core/utils/memory_search.py @@ -0,0 +1,117 @@ +import ctypes +import ctypes.wintypes as wintypes +import logging +import re +import sys + +# 定义常量 +PROCESS_QUERY_INFORMATION = 0x0400 +PROCESS_VM_READ = 0x0010 + +PAGE_EXECUTE = 0x10 +PAGE_EXECUTE_READ = 0x20 +PAGE_EXECUTE_READWRITE = 0x40 +PAGE_EXECUTE_WRITECOPY = 0x80 +PAGE_NOACCESS = 0x01 +PAGE_READONLY = 0x02 +PAGE_READWRITE = 0x04 +PAGE_WRITECOPY = 0x08 +PAGE_GUARD = 0x100 +PAGE_NOCACHE = 0x200 +PAGE_WRITECOMBINE = 0x400 + +MEM_COMMIT = 0x1000 +MEM_FREE = 0x10000 +MEM_RESERVE = 0x2000 +MEM_DECOMMIT = 0x4000 +MEM_RELEASE = 0x8000 + + +# 定义结构体 +class MEMORY_BASIC_INFORMATION(ctypes.Structure): + _fields_ = [ + ("BaseAddress", ctypes.c_void_p), + ("AllocationBase", ctypes.c_void_p), + ("AllocationProtect", wintypes.DWORD), + ("RegionSize", ctypes.c_size_t), + ("State", wintypes.DWORD), + ("Protect", wintypes.DWORD), + ("Type", wintypes.DWORD), + ] + + +# 加载Windows API函数 +kernel32 = ctypes.WinDLL('kernel32', use_last_error=True) + +OpenProcess = kernel32.OpenProcess +OpenProcess.restype = wintypes.HANDLE +OpenProcess.argtypes = [wintypes.DWORD, wintypes.BOOL, wintypes.DWORD] + +ReadProcessMemory = kernel32.ReadProcessMemory + +VirtualQueryEx = kernel32.VirtualQueryEx +VirtualQueryEx.restype = ctypes.c_size_t +VirtualQueryEx.argtypes = [wintypes.HANDLE, ctypes.c_void_p, ctypes.POINTER(MEMORY_BASIC_INFORMATION), ctypes.c_size_t] + +CloseHandle = kernel32.CloseHandle +CloseHandle.restype = wintypes.BOOL +CloseHandle.argtypes = [wintypes.HANDLE] + + +def search_memory(hProcess, pattern=br'\\Msg\\FTSContact', max_num=100,start_address=0x0,end_address=0x7FFFFFFFFFFFFFFF): + """ + 在进程内存中搜索字符串 + :param p: 进程ID或者进程句柄 + :param pattern: 要搜索的字符串 + :param max_num: 最多找到的数量 + """ + result = [] + # 打开进程 + if not hProcess: + raise ctypes.WinError(ctypes.get_last_error()) + + mbi = MEMORY_BASIC_INFORMATION() + + address = start_address + max_address = end_address if sys.maxsize > 2 ** 32 else 0x7fff0000 + pattern = re.compile(pattern) + + while address < max_address: + if VirtualQueryEx(hProcess, address, ctypes.byref(mbi), ctypes.sizeof(mbi)) == 0: + break + # 读取内存数据 + allowed_protections = [PAGE_EXECUTE, PAGE_EXECUTE_READ, PAGE_EXECUTE_READWRITE, PAGE_READWRITE, PAGE_READONLY, ] + if mbi.State != MEM_COMMIT or mbi.Protect not in allowed_protections: + address += mbi.RegionSize + continue + + # 使用正确的类型来避免OverflowError + base_address_c = ctypes.c_ulonglong(mbi.BaseAddress) + region_size_c = ctypes.c_size_t(mbi.RegionSize) + + page_bytes = ctypes.create_string_buffer(mbi.RegionSize) + bytes_read = ctypes.c_size_t() + + if ReadProcessMemory(hProcess, base_address_c, page_bytes, region_size_c, ctypes.byref(bytes_read)) == 0: + address += mbi.RegionSize + continue + # 搜索字符串 re print(page_bytes.raw) + find = [address + match.start() for match in pattern.finditer(page_bytes, re.DOTALL)] + if find: + result.extend(find) + if len(result) >= max_num: + break + address += mbi.RegionSize + return result + + +if __name__ == '__main__': + # 示例用法 + pid = 29320 # 将此替换为你要查询的进程ID + try: + maps = search_memory(pid) + print(len(maps)) + for m in maps: + print(hex(m)) + except Exception as e: + logging.error(e, exc_info=True) diff --git a/pywxdump/wx_core/wx_info.py b/pywxdump/wx_core/wx_info.py new file mode 100644 index 0000000..89a0540 --- /dev/null +++ b/pywxdump/wx_core/wx_info.py @@ -0,0 +1,438 @@ +# -*- coding: utf-8 -*-# +# ------------------------------------------------------------------------------- +# Name: getwxinfo.py +# Description: +# Author: xaoyaoo +# Date: 2023/08/21 +# ------------------------------------------------------------------------------- +import ctypes +import json +import os +import re +import winreg +from typing import List, Union +from .utils import verify_key, get_exe_bit, wx_core_error +from .utils import get_process_list, get_memory_maps, get_process_exe_path, get_file_version_info +from .utils import search_memory +from .utils import wx_core_loger, DB_TYPE_CORE +import ctypes.wintypes as wintypes + +# 定义常量 +PROCESS_QUERY_INFORMATION = 0x0400 +PROCESS_VM_READ = 0x0010 + +kernel32 = ctypes.WinDLL('kernel32', use_last_error=True) +OpenProcess = kernel32.OpenProcess +OpenProcess.restype = wintypes.HANDLE +OpenProcess.argtypes = [wintypes.DWORD, wintypes.BOOL, wintypes.DWORD] + +CloseHandle = kernel32.CloseHandle +CloseHandle.restype = wintypes.BOOL +CloseHandle.argtypes = [wintypes.HANDLE] + +ReadProcessMemory = kernel32.ReadProcessMemory +void_p = ctypes.c_void_p + + +# 读取内存中的字符串(key部分) +@wx_core_error +def get_key_by_offs(h_process, address, address_len=8): + array = ctypes.create_string_buffer(address_len) + if ReadProcessMemory(h_process, void_p(address), array, address_len, 0) == 0: return None + address = int.from_bytes(array, byteorder='little') # 逆序转换为int地址(key地址) + key = ctypes.create_string_buffer(32) + if ReadProcessMemory(h_process, void_p(address), key, 32, 0) == 0: return None + key_string = bytes(key).hex() + return key_string + + +# 读取内存中的字符串(非key部分) +@wx_core_error +def get_info_string(h_process, address, n_size=64): + array = ctypes.create_string_buffer(n_size) + if ReadProcessMemory(h_process, void_p(address), array, n_size, 0) == 0: return None + array = bytes(array).split(b"\x00")[0] if b"\x00" in array else bytes(array) + text = array.decode('utf-8', errors='ignore') + return text.strip() if text.strip() != "" else None + + +# 读取内存中的字符串(昵称部分name) +@wx_core_error +def get_info_name(h_process, address, address_len=8, n_size=64): + array = ctypes.create_string_buffer(n_size) + if ReadProcessMemory(h_process, void_p(address), array, n_size, 0) == 0: return None + address1 = int.from_bytes(array[:address_len], byteorder='little') # 逆序转换为int地址(key地址) + info_name = get_info_string(h_process, address1, n_size) + if info_name != None: + return info_name + array = bytes(array).split(b"\x00")[0] if b"\x00" in array else bytes(array) + text = array.decode('utf-8', errors='ignore') + return text.strip() if text.strip() != "" else None + + +# 读取内存中的wxid +@wx_core_error +def get_info_wxid(h_process): + find_num = 100 + addrs = search_memory(h_process, br'\\Msg\\FTSContact', max_num=find_num) + wxids = [] + for addr in addrs: + array = ctypes.create_string_buffer(80) + if ReadProcessMemory(h_process, void_p(addr - 30), array, 80, 0) == 0: return None + array = bytes(array) # .split(b"\\")[0] + array = array.split(b"\\Msg")[0] + array = array.split(b"\\")[-1] + wxids.append(array.decode('utf-8', errors='ignore')) + wxid = max(wxids, key=wxids.count) if wxids else None + return wxid + + +# 读取内存中的wx_path基于wxid(慢) +@wx_core_error +def get_wx_dir_by_wxid(h_process, wxid=""): + find_num = 10 + addrs = search_memory(h_process, wxid.encode() + br'\\Msg\\FTSContact', max_num=find_num) + wxid_dir = [] + for addr in addrs: + win_addr_len = 260 + array = ctypes.create_string_buffer(win_addr_len) + if ReadProcessMemory(h_process, void_p(addr - win_addr_len + 50), array, win_addr_len, 0) == 0: return None + array = bytes(array).split(b"\\Msg")[0] + array = array.split(b"\00")[-1] + wxid_dir.append(array.decode('utf-8', errors='ignore')) + wxid_dir = max(wxid_dir, key=wxid_dir.count) if wxid_dir else None + return wxid_dir + + +@wx_core_error +def get_wx_dir_by_reg(wxid="all"): + """ + # 读取 wx_dir (微信文件路径) (快) + :param wxid: 微信id + :return: 返回wx_dir,if wxid="all" return wx_dir else return wx_dir/wxid + """ + if not wxid: + return None + w_dir = "MyDocument:" + is_w_dir = False + + try: + key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r"Software\Tencent\WeChat", 0, winreg.KEY_READ) + value, _ = winreg.QueryValueEx(key, "FileSavePath") + winreg.CloseKey(key) + w_dir = value + is_w_dir = True + except Exception as e: + w_dir = "MyDocument:" + + if not is_w_dir: + try: + user_profile = os.environ.get("USERPROFILE") + path_3ebffe94 = os.path.join(user_profile, "AppData", "Roaming", "Tencent", "WeChat", "All Users", "config", + "3ebffe94.ini") + with open(path_3ebffe94, "r", encoding="utf-8") as f: + w_dir = f.read() + is_w_dir = True + except Exception as e: + w_dir = "MyDocument:" + + if w_dir == "MyDocument:": + try: + # 打开注册表路径 + key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Explorer\User Shell Folders") + documents_path = winreg.QueryValueEx(key, "Personal")[0] # 读取文档实际目录路径 + winreg.CloseKey(key) # 关闭注册表 + documents_paths = os.path.split(documents_path) + if "%" in documents_paths[0]: + w_dir = os.environ.get(documents_paths[0].replace("%", "")) + w_dir = os.path.join(w_dir, os.path.join(*documents_paths[1:])) + # print(1, w_dir) + else: + w_dir = documents_path + except Exception as e: + profile = os.environ.get("USERPROFILE") + w_dir = os.path.join(profile, "Documents") + + wx_dir = os.path.join(w_dir, "WeChat Files") + + if wxid and wxid != "all": + wxid_dir = os.path.join(wx_dir, wxid) + return wxid_dir if os.path.exists(wxid_dir) else None + return wx_dir if os.path.exists(wx_dir) else None + + +def get_wx_dir(wxid: str = "", Handle=None): + """ + 综合运用多种方法获取wx_path + 优先调用 get_wx_dir_by_reg (该方法速度快) + 次要调用 get_wx_dir_by_wxid (该方法通过搜索内存进行,速度较慢) + """ + if wxid: + wx_dir = get_wx_dir_by_reg(wxid) if wxid else None + if wxid is not None and wx_dir is None and Handle: # 通过wxid获取wx_path,如果wx_path为空则通过wxid获取wx_path + wx_dir = get_wx_dir_by_wxid(Handle, wxid=wxid) + else: + wx_dir = get_wx_dir_by_reg() + return wx_dir + + +@wx_core_error +def get_key_by_mem_search(pid, db_path, addr_len): + """ + 获取key (慢) + :param pid: 进程id + :param db_path: 微信数据库路径 + :param addr_len: 地址长度 + :return: 返回key + """ + + def read_key_bytes(h_process, address, address_len=8): + array = ctypes.create_string_buffer(address_len) + if ReadProcessMemory(h_process, void_p(address), array, address_len, 0) == 0: return None + address = int.from_bytes(array, byteorder='little') # 逆序转换为int地址(key地址) + key = ctypes.create_string_buffer(32) + if ReadProcessMemory(h_process, void_p(address), key, 32, 0) == 0: return None + key_bytes = bytes(key) + return key_bytes + + phone_type1 = "iphone\x00" + phone_type2 = "android\x00" + phone_type3 = "ipad\x00" + + MicroMsg_path = os.path.join(db_path, "MSG", "MicroMsg.db") + + start_adress = 0x7FFFFFFFFFFFFFFF + end_adress = 0 + + memory_maps = get_memory_maps(pid) + for module in memory_maps: + if module.FileName and 'WeChatWin.dll' in module.FileName: + s = module.BaseAddress + e = module.BaseAddress + module.RegionSize + start_adress = s if s < start_adress else start_adress + end_adress = e if e > end_adress else end_adress + + hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, pid) + type1_addrs = search_memory(hProcess, phone_type1.encode(), max_num=2, start_address=start_adress, + end_address=end_adress) + type2_addrs = search_memory(hProcess, phone_type2.encode(), max_num=2, start_address=start_adress, + end_address=end_adress) + type3_addrs = search_memory(hProcess, phone_type3.encode(), max_num=2, start_address=start_adress, + end_address=end_adress) + + type_addrs = [] + if len(type1_addrs) >= 2: type_addrs += type1_addrs + if len(type2_addrs) >= 2: type_addrs += type2_addrs + if len(type3_addrs) >= 2: type_addrs += type3_addrs + if len(type_addrs) == 0: return None + + type_addrs.sort() # 从小到大排序 + + for i in type_addrs[::-1]: + for j in range(i, i - 2000, -addr_len): + key_bytes = read_key_bytes(hProcess, j, addr_len) + if key_bytes == None: + continue + if verify_key(key_bytes, MicroMsg_path): + return key_bytes.hex() + CloseHandle(hProcess) + return None + + +@wx_core_error +def get_wx_key(key: str = "", wx_dir: str = "", pid=0, addrLen=8): + """ + 获取key (慢) + :param key: 微信key + :param wx_dir: 微信文件路径 + :param pid: 进程id + :param addrLen: 地址长度 + :return: 返回key + """ + isKey = verify_key( + bytes.fromhex(key), + os.path.join(wx_dir, "MSG", "MicroMsg.db")) if key is not None and wx_dir is not None else False + if wx_dir is not None and not isKey: + key = get_key_by_mem_search(pid, wx_dir, addrLen) + return key + + +@wx_core_error +def get_info_details(pid, WX_OFFS: dict = None): + path = get_process_exe_path(pid) + rd = {'pid': pid, 'version': get_file_version_info(path), + "account": None, "mobile": None, "nickname": None, "mail": None, + "wxid": None, "key": None, "wx_dir": None} + try: + bias_list = WX_OFFS.get(rd['version'], None) + + Handle = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, False, pid) + + addrLen = get_exe_bit(path) // 8 + if not isinstance(bias_list, list) or len(bias_list) <= 4: + wx_core_loger.warning(f"[-] WeChat Current Version Is Not Supported(not get account,mobile,nickname,mail)") + else: + wechat_base_address = 0 + memory_maps = get_memory_maps(pid) + for module in memory_maps: + if module.FileName and 'WeChatWin.dll' in module.FileName: + wechat_base_address = module.BaseAddress + rd['version'] = get_file_version_info(module.FileName) if os.path.exists(module.FileName) else rd[ + 'version'] + bias_list = WX_OFFS.get(rd['version'], None) + break + if wechat_base_address != 0: + name_baseaddr = wechat_base_address + bias_list[0] + account_baseaddr = wechat_base_address + bias_list[1] + mobile_baseaddr = wechat_base_address + bias_list[2] + mail_baseaddr = wechat_base_address + bias_list[3] + key_baseaddr = wechat_base_address + bias_list[4] + + rd['account'] = get_info_string(Handle, account_baseaddr, 32) if bias_list[1] != 0 else None + rd['mobile'] = get_info_string(Handle, mobile_baseaddr, 64) if bias_list[2] != 0 else None + rd['nickname'] = get_info_name(Handle, name_baseaddr, addrLen, 64) if bias_list[0] != 0 else None + rd['mail'] = get_info_string(Handle, mail_baseaddr, 64) if bias_list[3] != 0 else None + rd['key'] = get_key_by_offs(Handle, key_baseaddr, addrLen) if bias_list[4] != 0 else None + else: + wx_core_loger.warning(f"[-] WeChat WeChatWin.dll Not Found") + + rd['wxid'] = get_info_wxid(Handle) + rd['wx_dir'] = get_wx_dir(rd['wxid'], Handle) + rd['key'] = get_wx_key(rd['key'], rd['wx_dir'], rd['pid'], addrLen) + + CloseHandle(Handle) + except Exception as e: + wx_core_loger.error(f"[-] WeChat Get Info Error:{e}", exc_info=True) + return rd + + +# 读取微信信息(account,mobile,nickname,mail,wxid,key) +@wx_core_error +def get_wx_info(WX_OFFS: dict = None, is_print: bool = False, save_path: str = None): + """ + 读取微信信息(account,mobile,nickname,mail,wxid,key) + :param WX_OFFS: 版本偏移量 + :param is_print: 是否打印结果 + :param save_path: 保存路径 + :return: 返回微信信息 [{"pid": pid, "version": version, "account": account, + "mobile": mobile, "nickname": nickname, "mail": mail, "wxid": wxid, + "key": key, "wx_dir": wx_dir}, ...] + """ + if WX_OFFS is None: + WX_OFFS = {} + + wechat_pids = [] + result = [] + + processes = get_process_list() + for pid, name in processes: + if name == "WeChat.exe": + wechat_pids.append(pid) + + if len(wechat_pids) <= 0: + wx_core_loger.error("[-] WeChat No Run") + return result + + for pid in wechat_pids: + rd = get_info_details(pid, WX_OFFS) + result.append(rd) + + if is_print: + print("=" * 32) + if isinstance(result, str): # 输出报错 + print(result) + else: # 输出结果 + for i, rlt in enumerate(result): + for k, v in rlt.items(): + print(f"[+] {k:>8}: {v if v else 'None'}") + print(end="-" * 32 + "\n" if i != len(result) - 1 else "") + print("=" * 32) + + if save_path: + try: + infos = json.load(open(save_path, "r", encoding="utf-8")) if os.path.exists(save_path) else [] + except: + infos = [] + with open(save_path, "w", encoding="utf-8") as f: + infos += result + json.dump(infos, f, ensure_ascii=False, indent=4) + return result + + +@wx_core_error +def get_wx_db(msg_dir: str = None, + db_types: Union[List[str], str] = None, + wxids: Union[List[str], str] = None) -> list[dict]: + r""" + 获取微信数据库路径 + :param msg_dir: 微信数据库目录 eg: C:\Users\user\Documents\WeChat Files (非wxid目录) + :param db_types: 需要获取的数据库类型,如果为空,则获取所有数据库 + :param wxids: 微信id列表,如果为空,则获取所有wxid下的数据库 + :return: [{"wxid": wxid, "db_type": db_type, "db_path": db_path, "wxid_dir": wxid_dir}, ...] + """ + result = [] + + if not msg_dir or not os.path.exists(msg_dir): + wx_core_loger.warning(f"[-] 微信文件目录不存在: {msg_dir}, 将使用默认路径") + msg_dir = get_wx_dir_by_reg(wxid="all") + + if not os.path.exists(msg_dir): + wx_core_loger.error(f"[-] 目录不存在: {msg_dir}", exc_info=True) + return result + + wxids = wxids.split(";") if isinstance(wxids, str) else wxids + if not isinstance(wxids, list) or len(wxids) <= 0: + wxids = None + db_types = db_types.split(";") if isinstance(db_types, str) else db_types + if not isinstance(db_types, list) or len(db_types) <= 0: + db_types = None + + wxid_dirs = {} # wx用户目录 + for sub_dir in os.listdir(msg_dir): + if os.path.isdir(os.path.join(msg_dir, sub_dir)) and sub_dir not in ["All Users", "Applet", "WMPF"]: + wxid_dirs[os.path.basename(sub_dir)] = os.path.join(msg_dir, sub_dir) + + for wxid, wxid_dir in wxid_dirs.items(): + if wxids and wxid not in wxids: # 如果指定wxid,则过滤掉其他wxid + continue + for root, dirs, files in os.walk(wxid_dir): + for file_name in files: + if not file_name.endswith(".db"): + continue + db_type = re.sub(r"\d*\.db$", "", file_name) + if db_types and db_type not in db_types: # 如果指定db_type,则过滤掉其他db_type + continue + db_path = os.path.join(root, file_name) + result.append({"wxid": wxid, "db_type": db_type, "db_path": db_path, "wxid_dir": wxid_dir}) + return result + + +@wx_core_error +def get_core_db(wx_path: str, db_types: list = None) -> [dict]: + """ + 获取聊天消息核心数据库路径 + :param wx_path: 微信文件夹路径 eg:C:\*****\WeChat Files\wxid******* + :param db_types: 数据库类型 eg: DB_TYPE_CORE,中选择一个或多个 + :return: 返回数据库路径 eg: [{"wxid": wxid, "db_type": db_type, "db_path": db_path, "wxid_dir": wxid_dir}, ...] + """ + if not os.path.exists(wx_path): + return False, f"[-] 目录不存在: {wx_path}" + + if not db_types: + db_types = DB_TYPE_CORE + db_types = [dt for dt in db_types if dt in DB_TYPE_CORE] + msg_dir = os.path.dirname(wx_path) + my_wxid = os.path.basename(wx_path) + wxdbpaths = get_wx_db(msg_dir=msg_dir, db_types=db_types, wxids=my_wxid) + + if len(wxdbpaths) == 0: + wx_core_loger.error(f"[-] get_core_db 未获取到数据库路径") + return False, "未获取到数据库路径" + return True, wxdbpaths + + +if __name__ == '__main__': + from pywxdump import WX_OFFS + + get_wx_info(WX_OFFS, is_print=True) diff --git a/pywxdump/wx_info/tools/libcrypto-1_1-x64.dll b/pywxdump/wx_info/tools/libcrypto-1_1-x64.dll deleted file mode 100644 index 5d7fea5..0000000 Binary files a/pywxdump/wx_info/tools/libcrypto-1_1-x64.dll and /dev/null differ