提交 390b718b authored 作者: 刘擎阳's avatar 刘擎阳

1.优化

上级 60e247a1
...@@ -8,7 +8,7 @@ import re ...@@ -8,7 +8,7 @@ import re
import tempfile import tempfile
from datetime import datetime, timedelta from datetime import datetime, timedelta
from io import BytesIO from io import BytesIO
import csv # 确保导入csv处理工具
import os import os
# 引入你的本地脚本函 # 引入你的本地脚本函
from ..pdf_tools.pod_indexer import index_pod_directory from ..pdf_tools.pod_indexer import index_pod_directory
...@@ -250,46 +250,29 @@ class OrderStateChangeRule(models.Model): ...@@ -250,46 +250,29 @@ class OrderStateChangeRule(models.Model):
return data_arr return data_arr
def fetch_final_mail_dlv(self, **kwargs): def fetch_final_mail_dlv(self, **kwargs):
"""尾程交接邮件提取""" """尾程交接邮件提取 - 增强版(支持仅凭附件识别同步)"""
email_body = kwargs['email_body'] email_body = kwargs['email_body']
email_body = html.unescape(email_body) email_body = html.unescape(email_body)
# 1. 提取邮件正文中的单号
text_arr = self.find_final_email_text(email_body) text_arr = self.find_final_email_text(email_body)
logging.info('data_arr: %s' % text_arr) logging.info('邮件正文提取单号: %s' % text_arr)
attachment_arr = kwargs['attachment_arr'] attachment_arr = kwargs['attachment_arr']
# attachment_tuple = attachment_arr[0] if attachment_arr else []
attachment_tuple_arr = attachment_arr if attachment_arr else [] attachment_tuple_arr = attachment_arr if attachment_arr else []
# order_obj_arr = []
try: try:
# 清洗邮件正文提取的单号
text_arr = [i.replace('-', '').replace(' ', '').replace('\xa0', '') for i in text_arr] text_arr = [i.replace('-', '').replace(' ', '').replace('\xa0', '') for i in text_arr]
ids = [] # 2. 如果有附件,无论邮件正文有没有单号,都要进行 PDF 拆分识别
if text_arr: if attachment_tuple_arr:
sql = "select id from cc_bl where UPPER(REPLACE(REPLACE(REPLACE(bl_no, ' ', ''), '-', ''), '/', '')) in %s"
self._cr.execute(sql, (tuple(text_arr),))
result = self._cr.fetchall()
ids = [i[0] for i in result]
bl_objs = self.env['cc.bl'].sudo().search([('id', 'in', ids)]) if ids else False
not_bl_pdf_arr = []
if bl_objs:
# 提单对象 bl_no提单号
# attachment_tuple_arr [('11.pdf', 'pdf数据')]
# 1. 开启临时文件夹 (with 块结束时,所有临时文件会自动销毁)
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
# 构建临时目录结构
pod_dir = os.path.join(temp_dir, "POD") pod_dir = os.path.join(temp_dir, "POD")
pages_dir = os.path.join(pod_dir, "pages") pages_dir = os.path.join(pod_dir, "pages")
output_dir = os.path.join(temp_dir, "Output") output_dir = os.path.join(temp_dir, "Output")
os.makedirs(pod_dir) os.makedirs(pod_dir)
os.makedirs(output_dir) os.makedirs(output_dir)
# 2. 将内存中的 PDF 数据写入临时目录
for file_name, pdf_data in attachment_tuple_arr: for file_name, pdf_data in attachment_tuple_arr:
pdf_path = os.path.join(pod_dir, file_name) pdf_path = os.path.join(pod_dir, file_name)
with open(pdf_path, 'wb') as f: with open(pdf_path, 'wb') as f:
# 注意:Odoo 里的附件通常是 base64 编码的。
# 如果你的 'pdf数据' 是 base64 字符串/bytes,请用 base64.b64decode(pdf_data)
# 如果已经是纯二进制流(rb读取的),直接写入即可:f.write(pdf_data)
# f.write(base64.b64decode(pdf_data))
f.write(pdf_data) f.write(pdf_data)
# 3. 对这些 PDF 进行集中拆分和识别(只执行一次,非常关键)
ctx_index = { ctx_index = {
"dir_path": pod_dir, "dir_path": pod_dir,
"output_index_csv": os.path.join(pod_dir, "pod_index.csv"), "output_index_csv": os.path.join(pod_dir, "pod_index.csv"),
...@@ -298,52 +281,56 @@ class OrderStateChangeRule(models.Model): ...@@ -298,52 +281,56 @@ class OrderStateChangeRule(models.Model):
"page_output_dir": pages_dir, "page_output_dir": pages_dir,
"pipeline_split_first": True "pipeline_split_first": True
} }
# 这一步会消耗一点时间,它会生成单页 PDF 和索引 CSV # 这一步执行 OCR 识别和拆分
index_pod_directory(ctx_index) index_pod_directory(ctx_index)
# 4. 遍历你的提单对象,按需提取 PDF # --- 【新增逻辑:从识别结果 CSV 中提取单号】 ---
pdf_detected_awbs = []
index_csv_path = ctx_index["output_index_csv"]
if os.path.exists(index_csv_path):
with open(index_csv_path, 'r', encoding='utf-8') as f:
# 自动处理标题行前后的空格
reader = csv.DictReader(f)
for row in reader:
# 获取 'awbs' 列的内容
awb_raw_str = row.get('awbs', '')
if awb_raw_str:
# 按逗号分割字符串,并对每个单号进行清洗
split_awbs = [a.strip().replace('-', '').replace(' ', '')
for a in awb_raw_str.split(',') if a.strip()]
pdf_detected_awbs.extend(split_awbs)
# 合并邮件正文单号和 PDF 识别单号,去重
combined_text_arr = list(set(text_arr + pdf_detected_awbs))
logging.info('合并后的待查询单号池: %s' % combined_text_arr)
# ----------------------------------------------
# 3. 根据合并后的单号去数据库查提单对象
ids = []
if combined_text_arr:
sql = "select id from cc_bl where UPPER(REPLACE(REPLACE(REPLACE(bl_no, ' ', ''), '-', ''), '/', '')) in %s"
self._cr.execute(sql, (tuple(combined_text_arr),))
result = self._cr.fetchall()
ids = [i[0] for i in result]
bl_objs = self.env['cc.bl'].sudo().search([('id', 'in', ids)]) if ids else False
not_bl_pdf_arr = []
if bl_objs:
for bl_obj in bl_objs: for bl_obj in bl_objs:
target_awb = bl_obj.bl_no # 获取提单号,例如 '436-10353136' target_awb = bl_obj.bl_no
if not target_awb: if not target_awb:
continue continue
# 调用拼合工具
ctx_merge = { ctx_merge = {
"awb": target_awb, "awb": target_awb,
"index_file": ctx_index["output_index_csv"], "index_file": ctx_index["output_index_csv"],
"pages_dir": pages_dir, "pages_dir": pages_dir,
"output_dir": output_dir "output_dir": output_dir
} }
result = merge_awb_pages(ctx_merge) result_merge = merge_awb_pages(ctx_merge)
# 5. 检查是否成功生成了对应的单号 PDF if result_merge.get("output") and os.path.exists(result_merge["output"]):
if result.get("output") and os.path.exists(result["output"]): with open(result_merge["output"], 'rb') as f:
# 将生成的 PDF 重新读回内存
with open(result["output"], 'rb') as f:
extracted_pdf_bytes = f.read() extracted_pdf_bytes = f.read()
# 重新转为 base64,准备存入 Odoo
# extracted_pdf_b64 = base64.b64encode(extracted_pdf_bytes)
# print(extracted_pdf_bytes)
self.upload_pod_attachment(bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes) self.upload_pod_attachment(bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes)
# bl_pdf_arr.append((bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes))
else: else:
# 没找到这个单号对应的页面
not_bl_pdf_arr.append(bl_obj.bl_no) not_bl_pdf_arr.append(bl_obj.bl_no)
# 这里可以记个日志,或者给 bl_obj 打个“未找到凭证”的标签
self._cr.commit() self._cr.commit()
# 屏蔽 2026-03-26以下 # 4. 异常报警逻辑
# if bl_objs and attachment_tuple_arr:
# file_objs = self.env['cc.clearance.file'].sudo().search(
# [('file_name', '=', '尾程交接POD(待大包数量和箱号)'),
# ('bl_id', 'in', bl_objs.ids)])
# file_objs.unlink()
# for attachment_tuple in attachment_tuple_arr:
# attachment_name, attachment_data = attachment_tuple
# self.upload_pod_attachment(bl_objs, attachment_name, attachment_data)
# 屏蔽 2026-03-26 以上
# redis_conn = self.env['common.common'].sudo().get_redis()
# if redis_conn == 'no':
# raise ValidationError('未连接redis')
# else:
# redis_conn.lpush('mail_push_package_list', json.dumps({'id': bl_obj.id, 'utc_time': utc_time.strftime("%Y-%m-%d %H:%M:%S")}))
if not bl_objs or not_bl_pdf_arr: if not bl_objs or not_bl_pdf_arr:
mail_time = (datetime.utcnow() + timedelta(hours=8)).strftime("%Y-%m-%d %H:%M:%S") mail_time = (datetime.utcnow() + timedelta(hours=8)).strftime("%Y-%m-%d %H:%M:%S")
content = f"""<p>您好: content = f"""<p>您好:
...@@ -353,7 +340,8 @@ class OrderStateChangeRule(models.Model): ...@@ -353,7 +340,8 @@ class OrderStateChangeRule(models.Model):
if not_bl_pdf_arr: if not_bl_pdf_arr:
content += f"\n 以下提单未提取到PDF文件 {'/'.join(not_bl_pdf_arr)}" content += f"\n 以下提单未提取到PDF文件 {'/'.join(not_bl_pdf_arr)}"
# 给客户配置的每个邮箱都发送邮件 # 给客户配置的每个邮箱都发送邮件
patrol_sender_email = self.env["ir.config_parameter"].sudo().get_param('patrol_sender_email') or '' patrol_sender_email = self.env["ir.config_parameter"].sudo().get_param(
'patrol_sender_email') or ''
patrol_receiver_emails = self.env["ir.config_parameter"].sudo().get_param( patrol_receiver_emails = self.env["ir.config_parameter"].sudo().get_param(
'patrol_receiver_emails') or '' 'patrol_receiver_emails') or ''
mail = self.env['mail.mail'].sudo().create({ mail = self.env['mail.mail'].sudo().create({
...@@ -372,6 +360,129 @@ class OrderStateChangeRule(models.Model): ...@@ -372,6 +360,129 @@ class OrderStateChangeRule(models.Model):
except Exception as err: except Exception as err:
logging.error('fetch_final_mail_dlv--error:%s' % str(err)) logging.error('fetch_final_mail_dlv--error:%s' % str(err))
# def fetch_final_mail_dlv(self, **kwargs):
# """尾程交接邮件提取"""
# email_body = kwargs['email_body']
# email_body = html.unescape(email_body)
# text_arr = self.find_final_email_text(email_body)
# logging.info('data_arr: %s' % text_arr)
# attachment_arr = kwargs['attachment_arr']
# # attachment_tuple = attachment_arr[0] if attachment_arr else []
# attachment_tuple_arr = attachment_arr if attachment_arr else []
# # order_obj_arr = []
# try:
# text_arr = [i.replace('-', '').replace(' ', '').replace('\xa0', '') for i in text_arr]
# ids = []
# if text_arr:
# sql = "select id from cc_bl where UPPER(REPLACE(REPLACE(REPLACE(bl_no, ' ', ''), '-', ''), '/', '')) in %s"
# self._cr.execute(sql, (tuple(text_arr),))
# result = self._cr.fetchall()
# ids = [i[0] for i in result]
# bl_objs = self.env['cc.bl'].sudo().search([('id', 'in', ids)]) if ids else False
# not_bl_pdf_arr = []
# if bl_objs:
# # 提单对象 bl_no提单号
# # attachment_tuple_arr [('11.pdf', 'pdf数据')]
# # 1. 开启临时文件夹 (with 块结束时,所有临时文件会自动销毁)
# with tempfile.TemporaryDirectory() as temp_dir:
# # 构建临时目录结构
# pod_dir = os.path.join(temp_dir, "POD")
# pages_dir = os.path.join(pod_dir, "pages")
# output_dir = os.path.join(temp_dir, "Output")
# os.makedirs(pod_dir)
# os.makedirs(output_dir)
# # 2. 将内存中的 PDF 数据写入临时目录
# for file_name, pdf_data in attachment_tuple_arr:
# pdf_path = os.path.join(pod_dir, file_name)
# with open(pdf_path, 'wb') as f:
# # 注意:Odoo 里的附件通常是 base64 编码的。
# # 如果你的 'pdf数据' 是 base64 字符串/bytes,请用 base64.b64decode(pdf_data)
# # 如果已经是纯二进制流(rb读取的),直接写入即可:f.write(pdf_data)
# # f.write(base64.b64decode(pdf_data))
# f.write(pdf_data)
# # 3. 对这些 PDF 进行集中拆分和识别(只执行一次,非常关键)
# ctx_index = {
# "dir_path": pod_dir,
# "output_index_csv": os.path.join(pod_dir, "pod_index.csv"),
# "output_summary_csv": os.path.join(pod_dir, "summary.csv"),
# "save_pages": True,
# "page_output_dir": pages_dir,
# "pipeline_split_first": True
# }
# # 这一步会消耗一点时间,它会生成单页 PDF 和索引 CSV
# index_pod_directory(ctx_index)
# # 4. 遍历你的提单对象,按需提取 PDF
# for bl_obj in bl_objs:
# target_awb = bl_obj.bl_no # 获取提单号,例如 '436-10353136'
# if not target_awb:
# continue
# # 调用拼合工具
# ctx_merge = {
# "awb": target_awb,
# "index_file": ctx_index["output_index_csv"],
# "pages_dir": pages_dir,
# "output_dir": output_dir
# }
# result = merge_awb_pages(ctx_merge)
# # 5. 检查是否成功生成了对应的单号 PDF
# if result.get("output") and os.path.exists(result["output"]):
# # 将生成的 PDF 重新读回内存
# with open(result["output"], 'rb') as f:
# extracted_pdf_bytes = f.read()
# # 重新转为 base64,准备存入 Odoo
# # extracted_pdf_b64 = base64.b64encode(extracted_pdf_bytes)
# # print(extracted_pdf_bytes)
# self.upload_pod_attachment(bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes)
# # bl_pdf_arr.append((bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes))
#
# else:
# # 没找到这个单号对应的页面
# not_bl_pdf_arr.append(bl_obj.bl_no)
# # 这里可以记个日志,或者给 bl_obj 打个“未找到凭证”的标签
# self._cr.commit()
# # 屏蔽 2026-03-26以下
# # if bl_objs and attachment_tuple_arr:
# # file_objs = self.env['cc.clearance.file'].sudo().search(
# # [('file_name', '=', '尾程交接POD(待大包数量和箱号)'),
# # ('bl_id', 'in', bl_objs.ids)])
# # file_objs.unlink()
# # for attachment_tuple in attachment_tuple_arr:
# # attachment_name, attachment_data = attachment_tuple
# # self.upload_pod_attachment(bl_objs, attachment_name, attachment_data)
# # 屏蔽 2026-03-26 以上
# # redis_conn = self.env['common.common'].sudo().get_redis()
# # if redis_conn == 'no':
# # raise ValidationError('未连接redis')
# # else:
# # redis_conn.lpush('mail_push_package_list', json.dumps({'id': bl_obj.id, 'utc_time': utc_time.strftime("%Y-%m-%d %H:%M:%S")}))
# if not bl_objs or not_bl_pdf_arr:
# mail_time = (datetime.utcnow() + timedelta(hours=8)).strftime("%Y-%m-%d %H:%M:%S")
# content = f"""<p>您好:
# 邮箱在{mail_time}(+8)时间接收到主题为POD的邮件,但未识别到对应的提单,请检查
# 避免推送超时!</p>
# """
# if not_bl_pdf_arr:
# content += f"\n 以下提单未提取到PDF文件 {'/'.join(not_bl_pdf_arr)}"
# # 给客户配置的每个邮箱都发送邮件
# patrol_sender_email = self.env["ir.config_parameter"].sudo().get_param('patrol_sender_email') or ''
# patrol_receiver_emails = self.env["ir.config_parameter"].sudo().get_param(
# 'patrol_receiver_emails') or ''
# mail = self.env['mail.mail'].sudo().create({
# "email_from": patrol_sender_email,
# 'subject': 'POD邮件未提取到提单',
# 'body_html': content,
# 'email_to': patrol_receiver_emails
# # 'email_to': "1663490807@qq.com,820656583@qq.com"
# })
# mail.send()
# for i in range(2):
# if mail.failure_reason:
# logging.info('邮件发送失败原因:%s' % mail.failure_reason)
# mail.write({'state': 'outgoing'})
# mail.send()
# except Exception as err:
# logging.error('fetch_final_mail_dlv--error:%s' % str(err))
def fetch_mail_dlv(self, **kwargs): def fetch_mail_dlv(self, **kwargs):
email_body = kwargs['email_body'] email_body = kwargs['email_body']
year = kwargs['year'] year = kwargs['year']
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论