From d0398e1b44bf1676cad191b023c9ece75c35785a Mon Sep 17 00:00:00 2001 From: cllcode <2440893398@qq.com> Date: Sun, 7 Jul 2024 17:42:02 +0800 Subject: [PATCH] =?UTF-8?q?1.=E5=A2=9E=E5=8A=A0=E4=BA=86=E5=AF=B9=E8=B1=A1?= =?UTF-8?q?=E5=AD=98=E5=82=A8=E7=9A=84=E6=94=AF=E6=8C=81=EF=BC=8C=E5=B9=B6?= =?UTF-8?q?=E5=85=BC=E5=AE=B9=E8=AF=BB=E5=8F=96=E6=9C=AC=E5=9C=B0=E6=96=87?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pywxdump/api/api.py | 193 ++++++++++++++++---------------- pywxdump/api/utils.py | 12 +- pywxdump/dbpreprocess/dbbase.py | 32 +++++- pywxdump/dbpreprocess/utils.py | 11 +- requirements.txt | 4 +- 5 files changed, 143 insertions(+), 109 deletions(-) diff --git a/pywxdump/api/api.py b/pywxdump/api/api.py index a4edbbe..c089f51 100644 --- a/pywxdump/api/api.py +++ b/pywxdump/api/api.py @@ -18,6 +18,7 @@ if sys.platform == "win32": else: pythoncom = None import pywxdump +from pywxdump.file import AttachmentContext from flask import Flask, request, render_template, g, Blueprint, send_file, make_response, session from pywxdump import get_core_db, all_merge_real_time_db @@ -90,7 +91,7 @@ def init_key(): my_wxid = request.json.get("my_wxid", "").strip().strip("'").strip('"') if not wx_path: return ReJson(1002, body=f"wx_path is required: {wx_path}") - if not os.path.exists(wx_path): + if not AttachmentContext.exists(wx_path): return ReJson(1001, body=f"wx_path not exists: {wx_path}") if not key: return ReJson(1002, body=f"key is required: {key}") @@ -98,13 +99,13 @@ def init_key(): return ReJson(1002, body=f"my_wxid is required: {my_wxid}") old_merge_save_path = read_session(g.sf, my_wxid, "merge_path") - if isinstance(old_merge_save_path, str) and old_merge_save_path and os.path.exists(old_merge_save_path): + if isinstance(old_merge_save_path, str) and old_merge_save_path and AttachmentContext.exists(old_merge_save_path): pmsg = ParsingMSG(old_merge_save_path) pmsg.close_all_connection() - out_path = os.path.join(g.tmp_path, "decrypted", my_wxid) if my_wxid else os.path.join(g.tmp_path, "decrypted") + out_path = AttachmentContext.join(g.tmp_path, "decrypted", my_wxid) if my_wxid else AttachmentContext.join(g.tmp_path, "decrypted") # 检查文件夹中文件是否被占用 - if os.path.exists(out_path): + if AttachmentContext.exists(out_path): try: shutil.rmtree(out_path) except PermissionError as e: @@ -116,13 +117,13 @@ def init_key(): time.sleep(1) if code: # 移动merge_save_path到g.tmp_path/my_wxid - if not os.path.exists(os.path.join(g.tmp_path, my_wxid)): - os.makedirs(os.path.join(g.tmp_path, my_wxid)) - merge_save_path_new = os.path.join(g.tmp_path, my_wxid, "merge_all.db") + if not AttachmentContext.exists(AttachmentContext.join(g.tmp_path, my_wxid)): + os.makedirs(AttachmentContext.join(g.tmp_path, my_wxid)) + merge_save_path_new = AttachmentContext.join(g.tmp_path, my_wxid, "merge_all.db") shutil.move(merge_save_path, str(merge_save_path_new)) # 删除out_path - if os.path.exists(out_path): + if AttachmentContext.exists(out_path): try: shutil.rmtree(out_path) except PermissionError as e: @@ -159,7 +160,7 @@ def init_nokey(): if not wx_path: return ReJson(1002, body=f"wx_path is required: {wx_path}") - if not os.path.exists(wx_path): + if not AttachmentContext.exists(wx_path): return ReJson(1001, body=f"wx_path not exists: {wx_path}") if not merge_path: return ReJson(1002, body=f"merge_path is required: {merge_path}") @@ -336,8 +337,8 @@ def get_imgsrc(imgsrc): my_wxid = read_session(g.sf, "test", "last") if not my_wxid: return ReJson(1001, body="my_wxid is required") - img_tmp_path = os.path.join(g.tmp_path, my_wxid, "imgsrc") - if not os.path.exists(img_tmp_path): + img_tmp_path = AttachmentContext.join(g.tmp_path, my_wxid, "imgsrc") + if not AttachmentContext.exists(img_tmp_path): os.makedirs(img_tmp_path) file_name = imgsrc.replace("http://", "").replace("https://", "").replace("/", "_").replace("?", "_") file_name = file_name + ".jpg" @@ -345,13 +346,13 @@ def get_imgsrc(imgsrc): if len(file_name) > 255: file_name = file_name[:255] + "/" + file_name[255:] - img_path_all = os.path.join(img_tmp_path, file_name) - if os.path.exists(img_path_all): - return send_file(img_path_all) + img_path_all = AttachmentContext.join(img_tmp_path, file_name) + if AttachmentContext.exists(img_path_all): + return AttachmentContext.send_attachment(img_path_all) else: download_file(imgsrc, img_path_all) - if os.path.exists(img_path_all): - return send_file(img_path_all) + if AttachmentContext.exists(img_path_all): + return AttachmentContext.send_attachment(img_path_all) else: return ReJson(4004, body=imgsrc) @@ -373,16 +374,16 @@ def get_img(img_path): img_path = img_path.replace("\\\\", "\\") - img_tmp_path = os.path.join(g.tmp_path, my_wxid, "img") - original_img_path = os.path.join(wx_path, img_path) - if os.path.exists(original_img_path): + img_tmp_path = AttachmentContext.join(g.tmp_path, my_wxid, "img") + original_img_path = AttachmentContext.join(wx_path, img_path) + if AttachmentContext.exists(original_img_path): fomt, md5, out_bytes = dat2img(original_img_path) - imgsavepath = os.path.join(img_tmp_path, img_path + "_" + ".".join([md5, fomt])) - if not os.path.exists(os.path.dirname(imgsavepath)): - os.makedirs(os.path.dirname(imgsavepath)) - with open(imgsavepath, "wb") as f: + imgsavepath = AttachmentContext.join(img_tmp_path, img_path + "_" + ".".join([md5, fomt])) + if not AttachmentContext.exists(AttachmentContext.dirname(imgsavepath)): + AttachmentContext.makedirs(AttachmentContext.dirname(imgsavepath)) + with AttachmentContext.open_file(imgsavepath, "wb") as f: f.write(out_bytes) - return send_file(imgsavepath) + return AttachmentContext.send_attachment(imgsavepath) else: return ReJson(1001, body=original_img_path) @@ -424,18 +425,18 @@ def get_video(videoPath): videoPath = videoPath.replace("\\\\", "\\") - video_tmp_path = os.path.join(g.tmp_path, my_wxid, "video") - original_img_path = os.path.join(wx_path, videoPath) - if not os.path.exists(original_img_path): + video_tmp_path = AttachmentContext.join(g.tmp_path, my_wxid, "video") + original_img_path = AttachmentContext.join(wx_path, videoPath) + if not AttachmentContext.exists(original_img_path): return ReJson(5002) # 复制文件到临时文件夹 - video_save_path = os.path.join(video_tmp_path, videoPath) - if not os.path.exists(os.path.dirname(video_save_path)): - os.makedirs(os.path.dirname(video_save_path)) - if os.path.exists(video_save_path): - return send_file(video_save_path) - shutil.copy(original_img_path, video_save_path) - return send_file(original_img_path) + video_save_path = AttachmentContext.join(video_tmp_path, videoPath) + if not AttachmentContext.exists(AttachmentContext.dirname(video_save_path)): + os.makedirs(AttachmentContext.dirname(video_save_path)) + if AttachmentContext.exists(video_save_path): + return AttachmentContext.send_attachment(video_save_path) + AttachmentContext.download_file(original_img_path,video_save_path) + return AttachmentContext.send_attachment(original_img_path) @api.route('/api/audio/', methods=["GET", 'POST']) @@ -444,25 +445,25 @@ def get_audio(savePath): if not my_wxid: return ReJson(1001, body="my_wxid is required") merge_path = read_session(g.sf, my_wxid, "merge_path") - savePath = os.path.join(g.tmp_path, my_wxid, "audio", savePath) # 这个是从url中获取的 - if os.path.exists(savePath): - return send_file(savePath) + savePath = AttachmentContext.join(g.tmp_path, my_wxid, "audio", savePath) # 这个是从url中获取的 + if AttachmentContext.exists(savePath): + return AttachmentContext.send_attachment(savePath) MsgSvrID = savePath.split("_")[-1].replace(".wav", "") if not savePath: return ReJson(1002) # 判断savePath路径的文件夹是否存在 - if not os.path.exists(os.path.dirname(savePath)): - os.makedirs(os.path.dirname(savePath)) + if not AttachmentContext.exists(AttachmentContext.dirname(savePath)): + os.makedirs(AttachmentContext.dirname(savePath)) parsing_media_msg = ParsingMediaMSG(merge_path) wave_data = parsing_media_msg.get_audio(MsgSvrID, is_play=False, is_wave=True, save_path=savePath, rate=24000) if not wave_data: return ReJson(1001, body="wave_data is required") - if os.path.exists(savePath): - return send_file(savePath) + if AttachmentContext.exists(savePath): + return AttachmentContext.send_attachment(savePath) else: return ReJson(4004, body=savePath) @@ -478,8 +479,8 @@ def get_file_info(): if not my_wxid: return ReJson(1001, body="my_wxid is required") wx_path = read_session(g.sf, my_wxid, "wx_path") - all_file_path = os.path.join(wx_path, file_path) - if not os.path.exists(all_file_path): + all_file_path = AttachmentContext.join(wx_path, file_path) + if not AttachmentContext.exists(all_file_path): return ReJson(5002) file_name = os.path.basename(all_file_path) file_size = os.path.getsize(all_file_path) @@ -492,10 +493,10 @@ def get_file(filePath): if not my_wxid: return ReJson(1001, body="my_wxid is required") wx_path = read_session(g.sf, my_wxid, "wx_path") - all_file_path = os.path.join(wx_path, filePath) - if not os.path.exists(all_file_path): + all_file_path = AttachmentContext.join(wx_path, filePath) + if not AttachmentContext.exists(all_file_path): return ReJson(5002) - return send_file(all_file_path) + return AttachmentContext.send_attachment(all_file_path) # end 以上为聊天记录相关api ********************************************************************************************* @@ -515,7 +516,7 @@ def get_export_endb(): if not wx_path: return ReJson(1002, body=f"wx_path is required: {wx_path}") - if not os.path.exists(wx_path): + if not AttachmentContext.exists(wx_path): return ReJson(1001, body=f"wx_path not exists: {wx_path}") # 分割wx_path的文件名和父目录 @@ -523,14 +524,14 @@ def get_export_endb(): if not code: return ReJson(2001, body=wxdbpaths) - outpath = os.path.join(g.tmp_path, "export", my_wxid, "endb") - if not os.path.exists(outpath): + outpath = AttachmentContext.join(g.tmp_path, "export", my_wxid, "endb") + if not AttachmentContext.exists(outpath): os.makedirs(outpath) for wxdb in wxdbpaths: # 复制wxdb->outpath, os.path.basename(wxdb) assert isinstance(outpath, str) # 为了解决pycharm的警告, 无实际意义 - shutil.copy(wxdb, os.path.join(outpath, os.path.basename(wxdb))) + shutil.copy(wxdb, AttachmentContext.join(outpath, os.path.basename(wxdb))) return ReJson(0, body=outpath) @@ -550,11 +551,11 @@ def get_export_dedb(): return ReJson(1002, body=f"key is required: {key}") if not wx_path: return ReJson(1002, body=f"wx_path is required: {wx_path}") - if not os.path.exists(wx_path): + if not AttachmentContext.exists(wx_path): return ReJson(1001, body=f"wx_path not exists: {wx_path}") - outpath = os.path.join(g.tmp_path, "export", my_wxid, "dedb") - if not os.path.exists(outpath): + outpath = AttachmentContext.join(g.tmp_path, "export", my_wxid, "dedb") + if not AttachmentContext.exists(outpath): os.makedirs(outpath) code, merge_save_path = decrypt_merge(wx_path=wx_path, key=key, outpath=outpath) @@ -584,8 +585,8 @@ def get_export_csv(): # if not isinstance(start, int) or not isinstance(end, int) or start >= end: # return ReJson(1002, body=f"datetime is required: {st_ed_time}") - outpath = os.path.join(g.tmp_path, "export", my_wxid, "csv", wxid) - if not os.path.exists(outpath): + outpath = AttachmentContext.join(g.tmp_path, "export", my_wxid, "csv", wxid) + if not AttachmentContext.exists(outpath): os.makedirs(outpath) code, ret = export_csv(wxid, outpath, read_session(g.sf, my_wxid, "merge_path")) @@ -608,8 +609,8 @@ def get_export_json(): if not wxid: return ReJson(1002, body=f"username is required: {wxid}") - outpath = os.path.join(g.tmp_path, "export", my_wxid, "json", wxid) - if not os.path.exists(outpath): + outpath = AttachmentContext.join(g.tmp_path, "export", my_wxid, "json", wxid) + if not AttachmentContext.exists(outpath): os.makedirs(outpath) code, ret = export_json(wxid, outpath, read_session(g.sf, my_wxid, "merge_path")) @@ -639,15 +640,15 @@ def get_export_json(): # return ReJson(1002) # # # 导出路径 -# outpath = os.path.join(g.tmp_path, "export", export_type) -# if not os.path.exists(outpath): +# outpath = AttachmentContext.join(g.tmp_path, "export", export_type) +# if not AttachmentContext.exists(outpath): # os.makedirs(outpath) # # if export_type == "endb": # 导出加密数据库 # # 获取微信文件夹路径 # if not wx_path: # return ReJson(1002) -# if not os.path.exists(wx_path): +# if not AttachmentContext.exists(wx_path): # return ReJson(1001, body=wx_path) # # # 分割wx_path的文件名和父目录 @@ -657,7 +658,7 @@ def get_export_json(): # # for wxdb in wxdbpaths: # # 复制wxdb->outpath, os.path.basename(wxdb) -# shutil.copy(wxdb, os.path.join(outpath, os.path.basename(wxdb))) +# shutil.copy(wxdb, AttachmentContext.join(outpath, os.path.basename(wxdb))) # return ReJson(0, body=outpath) # # elif export_type == "dedb": @@ -667,22 +668,22 @@ def get_export_json(): # media_path = read_session(g.sf, "media_path") # dbpaths = [msg_path, media_path, micro_path] # dbpaths = list(set(dbpaths)) -# mergepath = merge_db(dbpaths, os.path.join(outpath, "merge.db"), start_time, end_time) +# mergepath = merge_db(dbpaths, AttachmentContext.join(outpath, "merge.db"), start_time, end_time) # return ReJson(0, body=mergepath) # # if msg_path == media_path and msg_path == media_path: -# # shutil.copy(msg_path, os.path.join(outpath, "merge.db")) +# # shutil.copy(msg_path, AttachmentContext.join(outpath, "merge.db")) # # return ReJson(0, body=msg_path) # # else: # # dbpaths = [msg_path, msg_path, micro_path] # # dbpaths = list(set(dbpaths)) -# # mergepath = merge_db(dbpaths, os.path.join(outpath, "merge.db"), start_time, end_time) +# # mergepath = merge_db(dbpaths, AttachmentContext.join(outpath, "merge.db"), start_time, end_time) # # return ReJson(0, body=mergepath) # else: # return ReJson(1002, body={"start_time": start_time, "end_time": end_time}) # # elif export_type == "csv": -# outpath = os.path.join(outpath, username) -# if not os.path.exists(outpath): +# outpath = AttachmentContext.join(outpath, username) +# if not AttachmentContext.exists(outpath): # os.makedirs(outpath) # code, ret = analyzer.export_csv(username, outpath, read_session(g.sf, "msg_path")) # if code: @@ -690,8 +691,8 @@ def get_export_json(): # else: # return ReJson(2001, body=ret) # elif export_type == "json": -# outpath = os.path.join(outpath, username) -# if not os.path.exists(outpath): +# outpath = AttachmentContext.join(outpath, username) +# if not AttachmentContext.exists(outpath): # os.makedirs(outpath) # code, ret = analyzer.export_json(username, outpath, read_session(g.sf, "msg_path")) # if code: @@ -699,10 +700,10 @@ def get_export_json(): # else: # return ReJson(2001, body=ret) # elif export_type == "html": -# outpath = os.path.join(outpath, username) -# if os.path.exists(outpath): +# outpath = AttachmentContext.join(outpath, username) +# if AttachmentContext.exists(outpath): # shutil.rmtree(outpath) -# if not os.path.exists(outpath): +# if not AttachmentContext.exists(outpath): # os.makedirs(outpath) # # chat_type_tups = [] # # for ct in chat_type: @@ -713,26 +714,26 @@ def get_export_json(): # # return ReJson(1002) # # # 复制文件 html -# export_html = os.path.join(os.path.dirname(pywxdump.VERSION_LIST_PATH), "ui", "export") -# indexhtml_path = os.path.join(export_html, "index.html") -# assets_path = os.path.join(export_html, "assets") -# if not os.path.exists(indexhtml_path) or not os.path.exists(assets_path): +# export_html = AttachmentContext.join(AttachmentContext.dirname(pywxdump.VERSION_LIST_PATH), "ui", "export") +# indexhtml_path = AttachmentContext.join(export_html, "index.html") +# assets_path = AttachmentContext.join(export_html, "assets") +# if not AttachmentContext.exists(indexhtml_path) or not AttachmentContext.exists(assets_path): # return ReJson(1001) # js_path = "" # css_path = "" # for file in os.listdir(assets_path): # if file.endswith('.js'): -# js_path = os.path.join(assets_path, file) +# js_path = AttachmentContext.join(assets_path, file) # elif file.endswith('.css'): -# css_path = os.path.join(assets_path, file) +# css_path = AttachmentContext.join(assets_path, file) # else: # continue # # 读取html,js,css -# with open(indexhtml_path, 'r', encoding='utf-8') as f: +# with AttachmentContext.open_file(indexhtml_path, 'r', encoding='utf-8') as f: # html = f.read() -# with open(js_path, 'r', encoding='utf-8') as f: +# with AttachmentContext.open_file(js_path, 'r', encoding='utf-8') as f: # js = f.read() -# with open(css_path, 'r', encoding='utf-8') as f: +# with AttachmentContext.open_file(css_path, 'r', encoding='utf-8') as f: # css = f.read() # # html = re.sub(r'', '', html) # 删除所有的script标签 @@ -756,39 +757,39 @@ def get_export_json(): # if not wave_data: # continue # # 判断savePath路径的文件夹是否存在 -# savePath = os.path.join(outpath, savePath) -# if not os.path.exists(os.path.dirname(savePath)): -# os.makedirs(os.path.dirname(savePath)) -# with open(savePath, "wb") as f: +# savePath = AttachmentContext.join(outpath, savePath) +# if not AttachmentContext.exists(AttachmentContext.dirname(savePath)): +# os.makedirs(AttachmentContext.dirname(savePath)) +# with AttachmentContext.open_file(savePath, "wb") as f: # f.write(wave_data) # elif msg_list[i]["type_name"] == "图片": # img_path = msg_list[i]["content"]["src"] # wx_path = read_session(g.sf, "wx_path") -# img_path_all = os.path.join(wx_path, img_path) +# img_path_all = AttachmentContext.join(wx_path, img_path) # -# if os.path.exists(img_path_all): +# if AttachmentContext.exists(img_path_all): # fomt, md5, out_bytes = read_img_dat(img_path_all) -# imgsavepath = os.path.join(outpath, "img", img_path + "_" + ".".join([md5, fomt])) -# if not os.path.exists(os.path.dirname(imgsavepath)): -# os.makedirs(os.path.dirname(imgsavepath)) -# with open(imgsavepath, "wb") as f: +# imgsavepath = AttachmentContext.join(outpath, "img", img_path + "_" + ".".join([md5, fomt])) +# if not AttachmentContext.exists(AttachmentContext.dirname(imgsavepath)): +# os.makedirs(AttachmentContext.dirname(imgsavepath)) +# with AttachmentContext.open_file(imgsavepath, "wb") as f: # f.write(out_bytes) -# msg_list[i]["content"]["src"] = os.path.join("img", img_path + "_" + ".".join([md5, fomt])) +# msg_list[i]["content"]["src"] = AttachmentContext.join("img", img_path + "_" + ".".join([md5, fomt])) # # rdata["msg_list"] = msg_list # rdata["myuserdata"] = rdata["user_list"][rdata["my_wxid"]] # rdata["myuserdata"]["chat_count"] = len(rdata["msg_list"]) # save_data = rdata -# save_json_path = os.path.join(outpath, "data") -# if not os.path.exists(save_json_path): +# save_json_path = AttachmentContext.join(outpath, "data") +# if not AttachmentContext.exists(save_json_path): # os.makedirs(save_json_path) -# with open(os.path.join(save_json_path, "msg_user.json"), "w", encoding="utf-8") as f: +# with AttachmentContext.open_file(AttachmentContext.join(save_json_path, "msg_user.json"), "w", encoding="utf-8") as f: # json.dump(save_data, f, ensure_ascii=False) # -# json_base64 = gen_base64(os.path.join(save_json_path, "msg_user.json")) +# json_base64 = gen_base64(AttachmentContext.join(save_json_path, "msg_user.json")) # html = html.replace('"./data/msg_user.json"', f'"{json_base64}"') # -# with open(os.path.join(outpath, "index.html"), 'w', encoding='utf-8') as f: +# with AttachmentContext.open_file(AttachmentContext.join(outpath, "index.html"), 'w', encoding='utf-8') as f: # f.write(html) # return ReJson(0, outpath) # diff --git a/pywxdump/api/utils.py b/pywxdump/api/utils.py index d668eeb..e6d6d82 100644 --- a/pywxdump/api/utils.py +++ b/pywxdump/api/utils.py @@ -14,6 +14,8 @@ import traceback from .rjson import ReJson from functools import wraps +from ..file import AttachmentContext + def read_session_local_wxid(session_file): try: @@ -31,7 +33,7 @@ def read_session_local_wxid(session_file): def read_session(session_file, wxid, arg): try: - with open(session_file, 'r') as f: + with AttachmentContext.open_file(session_file, 'r') as f: session = json.load(f) except FileNotFoundError: logging.error(f"Session file not found: {session_file}") @@ -44,7 +46,7 @@ def read_session(session_file, wxid, arg): def get_session_wxids(session_file): try: - with open(session_file, 'r') as f: + with AttachmentContext.open_file(session_file, 'r') as f: session = json.load(f) except FileNotFoundError: logging.error(f"Session file not found: {session_file}") @@ -57,7 +59,7 @@ def get_session_wxids(session_file): def save_session(session_file, wxid, arg, value): try: - with open(session_file, 'r') as f: + with AttachmentContext.open_file(session_file, 'r') as f: session = json.load(f) except FileNotFoundError: session = {} @@ -71,7 +73,7 @@ def save_session(session_file, wxid, arg, value): session[wxid] = {} session[wxid][arg] = value try: - with open(session_file, 'w') as f: + with AttachmentContext.open_file(session_file, 'w') as f: json.dump(session, f, indent=4, ensure_ascii=False) except Exception as e: logging.error(f"Error writing to file: {e}") @@ -116,7 +118,7 @@ def gen_base64(path): else: start_str = 'data:text/plain;base64,' - with open(path, 'rb') as file: + with AttachmentContext.open_file(path, 'rb') as file: js_code = file.read() base64_encoded_js = base64.b64encode(js_code).decode('utf-8') diff --git a/pywxdump/dbpreprocess/dbbase.py b/pywxdump/dbpreprocess/dbbase.py index 035af61..40b6ef9 100644 --- a/pywxdump/dbpreprocess/dbbase.py +++ b/pywxdump/dbpreprocess/dbbase.py @@ -5,9 +5,14 @@ # Author: xaoyaoo # Date: 2024/04/15 # ------------------------------------------------------------------------------- +import glob import os import sqlite3 import logging +import tempfile +import uuid + +from pywxdump.file import AttachmentContext class DatabaseBase: @@ -26,12 +31,19 @@ class DatabaseBase: @classmethod def _connect_to_database(cls, db_path): - if not os.path.exists(db_path): + if not AttachmentContext.exists(db_path): raise FileNotFoundError(f"文件不存在: {db_path}") if db_path in cls._connection_pool and cls._connection_pool[db_path] is not None: return cls._connection_pool[db_path] - connection = sqlite3.connect(db_path, check_same_thread=False) + if not AttachmentContext.isLocalPath(db_path): + temp_dir = tempfile.gettempdir() + local_path = os.path.join(temp_dir, f"{uuid.uuid1()}.db") + AttachmentContext.download_file(db_path, local_path) + else: + local_path = db_path + connection = sqlite3.connect(local_path, check_same_thread=False) logging.info(f"{connection} 连接句柄创建 {db_path}") + cls._connection_pool[db_path] = connection return connection def execute_sql(self, sql, params=None): @@ -74,6 +86,9 @@ class DatabaseBase: self._db_connection.close() logging.info(f"关闭数据库 - {self._db_path}") self._db_connection = None + if not AttachmentContext.isLocalPath(self._db_path): + # 删除tmp目录下的db文件 + self.clearTmpDb() def close_all_connection(self): for db_path in self._connection_pool: @@ -81,6 +96,19 @@ class DatabaseBase: self._connection_pool[db_path].close() logging.info(f"关闭数据库 - {db_path}") self._connection_pool[db_path] = None + # 删除tmp目录下的db文件 + self.clearTmpDb() + def clearTmpDb(self): + # 清理 tmp目录下.db文件 + temp_dir = tempfile.gettempdir() + db_files = glob.glob(os.path.join(temp_dir, '*.db')) + for db_file in db_files: + try: + os.remove(db_file) + print(f"Deleted: {db_file}") + except Exception as e: + print(f"Error deleting {db_file}: {e}") + def show__singleton_instances(self): print(self._singleton_instances) diff --git a/pywxdump/dbpreprocess/utils.py b/pywxdump/dbpreprocess/utils.py index 78e4a4e..3d795d6 100644 --- a/pywxdump/dbpreprocess/utils.py +++ b/pywxdump/dbpreprocess/utils.py @@ -6,7 +6,6 @@ # Date: 2024/04/15 # ------------------------------------------------------------------------------- import hashlib -import os import re import time import wave @@ -17,6 +16,8 @@ import pysilk import lxml.etree as ET # 这个模块更健壮些,微信XML格式有时有非标格式,会导致xml.etree.ElementTree处理失败 from collections import defaultdict +from pywxdump.file import AttachmentContext + def type_converter(type_id_or_name: [str, tuple]): """ @@ -154,7 +155,7 @@ def dat2img(input_data): } if isinstance(input_data, str): - with open(input_data, "rb") as f: + with AttachmentContext.open_file(input_data, "rb") as f: input_bytes = f.read() else: input_bytes = input_data @@ -250,9 +251,9 @@ def download_file(url, save_path=None): data = r.content if save_path and isinstance(save_path, str): # 创建文件夹 - if not os.path.exists(os.path.dirname(save_path)): - os.makedirs(os.path.dirname(save_path)) - with open(save_path, "wb") as f: + if not AttachmentContext.exists(AttachmentContext.dirname(save_path)): + AttachmentContext.makedirs(AttachmentContext.dirname(save_path)) + with AttachmentContext.open_file(save_path, "wb") as f: f.write(data) return data diff --git a/requirements.txt b/requirements.txt index 325c432..3a633ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,6 @@ blackboxprotobuf lz4 lxml flask_cors -pandas \ No newline at end of file +pandas +smart_open[s3] +boto3 \ No newline at end of file