1.优化

390b718b · 刘擎阳 · 60e247a1 · 390b718b
--- a/ccs_base/models/order_state_change_rule.py
+++ b/ccs_base/models/order_state_change_rule.py
@@ -8,7 +8,7 @@ import re
 import tempfile
 from datetime import datetime, timedelta
 from io import BytesIO
+import csv  # 确保导入csv处理工具
 import os
 # 引入你的本地脚本函
 from ..pdf_tools.pod_indexer import index_pod_directory
@@ -250,46 +250,29 @@ class OrderStateChangeRule(models.Model):
        return data_arr
    def fetch_final_mail_dlv(self, **kwargs):
-        """尾程交接邮件提取"""
+        """尾程交接邮件提取 - 增强版（支持仅凭附件识别同步）"""
        email_body = kwargs['email_body']
        email_body = html.unescape(email_body)
+        # 1. 提取邮件正文中的单号
        text_arr = self.find_final_email_text(email_body)
-        logging.info('data_arr: %s' % text_arr)
+        logging.info('邮件正文提取单号: %s' % text_arr)
        attachment_arr = kwargs['attachment_arr']
-        # attachment_tuple = attachment_arr[0] if attachment_arr else []
        attachment_tuple_arr = attachment_arr if attachment_arr else []
-        # order_obj_arr = []
        try:
+            # 清洗邮件正文提取的单号
            text_arr = [i.replace('-', '').replace(' ', '').replace('\xa0', '') for i in text_arr]
-            ids = []
+            # 2. 如果有附件，无论邮件正文有没有单号，都要进行 PDF 拆分识别
-            if text_arr:
+            if attachment_tuple_arr:
-                sql = "select id from cc_bl where UPPER(REPLACE(REPLACE(REPLACE(bl_no, ' ', ''), '-', ''), '/', '')) in %s"
-                self._cr.execute(sql, (tuple(text_arr),))
-                result = self._cr.fetchall()
-                ids = [i[0] for i in result]
-            bl_objs = self.env['cc.bl'].sudo().search([('id', 'in', ids)]) if ids else False
-            not_bl_pdf_arr = []
-            if bl_objs:
-                # 提单对象  bl_no提单号
-                # attachment_tuple_arr [('11.pdf', 'pdf数据')]
-                # 1. 开启临时文件夹 (with 块结束时，所有临时文件会自动销毁)
                with tempfile.TemporaryDirectory() as temp_dir:
-                    # 构建临时目录结构
                    pod_dir = os.path.join(temp_dir, "POD")
                    pages_dir = os.path.join(pod_dir, "pages")
                    output_dir = os.path.join(temp_dir, "Output")
                    os.makedirs(pod_dir)
                    os.makedirs(output_dir)
-                    # 2. 将内存中的 PDF 数据写入临时目录
                    for file_name, pdf_data in attachment_tuple_arr:
                        pdf_path = os.path.join(pod_dir, file_name)
                        with open(pdf_path, 'wb') as f:
-                            # 注意：Odoo 里的附件通常是 base64 编码的。
-                            # 如果你的 'pdf数据' 是 base64 字符串/bytes，请用 base64.b64decode(pdf_data)
-                            # 如果已经是纯二进制流(rb读取的)，直接写入即可：f.write(pdf_data)
-                            # f.write(base64.b64decode(pdf_data))
                            f.write(pdf_data)
-                            # 3. 对这些 PDF 进行集中拆分和识别（只执行一次，非常关键）
                    ctx_index = {
                        "dir_path": pod_dir,
                        "output_index_csv": os.path.join(pod_dir, "pod_index.csv"),
@@ -298,80 +281,208 @@ class OrderStateChangeRule(models.Model):
                        "page_output_dir": pages_dir,
                        "pipeline_split_first": True
                    }
-                    # 这一步会消耗一点时间，它会生成单页 PDF 和索引 CSV
+                    # 这一步执行 OCR 识别和拆分
                    index_pod_directory(ctx_index)
-                    # 4. 遍历你的提单对象，按需提取 PDF
+                    # --- 【新增逻辑：从识别结果 CSV 中提取单号】 ---
-                    for bl_obj in bl_objs:
+                    pdf_detected_awbs = []
-                        target_awb = bl_obj.bl_no  # 获取提单号，例如 '436-10353136'
+                    index_csv_path = ctx_index["output_index_csv"]
-                        if not target_awb:
+                    if os.path.exists(index_csv_path):
-                            continue
+                        with open(index_csv_path, 'r', encoding='utf-8') as f:
-                        # 调用拼合工具
+                            # 自动处理标题行前后的空格
-                        ctx_merge = {
+                            reader = csv.DictReader(f)
-                            "awb": target_awb,
+                            for row in reader:
-                            "index_file": ctx_index["output_index_csv"],
+                                # 获取 'awbs' 列的内容
-                            "pages_dir": pages_dir,
+                                awb_raw_str = row.get('awbs', '')
-                            "output_dir": output_dir
+                                if awb_raw_str:
-                        }
+                                    # 按逗号分割字符串，并对每个单号进行清洗
-                        result = merge_awb_pages(ctx_merge)
+                                    split_awbs = [a.strip().replace('-', '').replace(' ', '')
-                        # 5. 检查是否成功生成了对应的单号 PDF
+                                                  for a in awb_raw_str.split(',') if a.strip()]
-                        if result.get("output") and os.path.exists(result["output"]):
+                                    pdf_detected_awbs.extend(split_awbs)
-                            # 将生成的 PDF 重新读回内存
+                    # 合并邮件正文单号和 PDF 识别单号，去重
-                            with open(result["output"], 'rb') as f:
+                    combined_text_arr = list(set(text_arr + pdf_detected_awbs))
-                                extracted_pdf_bytes = f.read()
+                    logging.info('合并后的待查询单号池: %s' % combined_text_arr)
-                            # 重新转为 base64，准备存入 Odoo
+                    # ----------------------------------------------
-                            # extracted_pdf_b64 = base64.b64encode(extracted_pdf_bytes)
+                    # 3. 根据合并后的单号去数据库查提单对象
-                            # print(extracted_pdf_bytes)
+                    ids = []
-                            self.upload_pod_attachment(bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes)
+                    if combined_text_arr:
-                            # bl_pdf_arr.append((bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes))
+                        sql = "select id from cc_bl where UPPER(REPLACE(REPLACE(REPLACE(bl_no, ' ', ''), '-', ''), '/', '')) in %s"
+                        self._cr.execute(sql, (tuple(combined_text_arr),))
-                        else:
+                        result = self._cr.fetchall()
-                            # 没找到这个单号对应的页面
+                        ids = [i[0] for i in result]
-                            not_bl_pdf_arr.append(bl_obj.bl_no)
+                    bl_objs = self.env['cc.bl'].sudo().search([('id', 'in', ids)]) if ids else False
-                            # 这里可以记个日志，或者给 bl_obj 打个“未找到凭证”的标签
+                    not_bl_pdf_arr = []
-                self._cr.commit()
+                    if bl_objs:
-            # 屏蔽 2026-03-26以下
+                        for bl_obj in bl_objs:
-            # if bl_objs and attachment_tuple_arr:
+                            target_awb = bl_obj.bl_no
-            #     file_objs = self.env['cc.clearance.file'].sudo().search(
+                            if not target_awb:
-            #         [('file_name', '=', '尾程交接POD(待大包数量和箱号)'),
+                                continue
-            #          ('bl_id', 'in', bl_objs.ids)])
+                            ctx_merge = {
-            #     file_objs.unlink()
+                                "awb": target_awb,
-            #     for attachment_tuple in attachment_tuple_arr:
+                                "index_file": ctx_index["output_index_csv"],
-            #         attachment_name, attachment_data = attachment_tuple
+                                "pages_dir": pages_dir,
-            #         self.upload_pod_attachment(bl_objs, attachment_name, attachment_data)
+                                "output_dir": output_dir
-            # 屏蔽 2026-03-26 以上
+                            }
-                # redis_conn = self.env['common.common'].sudo().get_redis()
+                            result_merge = merge_awb_pages(ctx_merge)
-                # if redis_conn == 'no':
+                            if result_merge.get("output") and os.path.exists(result_merge["output"]):
-                #     raise ValidationError('未连接redis')
+                                with open(result_merge["output"], 'rb') as f:
-                # else:
+                                    extracted_pdf_bytes = f.read()
-                #     redis_conn.lpush('mail_push_package_list', json.dumps({'id': bl_obj.id, 'utc_time': utc_time.strftime("%Y-%m-%d %H:%M:%S")}))
+                                self.upload_pod_attachment(bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes)
-            if not bl_objs or not_bl_pdf_arr:
+                            else:
-                mail_time = (datetime.utcnow() + timedelta(hours=8)).strftime("%Y-%m-%d %H:%M:%S")
+                                not_bl_pdf_arr.append(bl_obj.bl_no)
-                content = f"""<p>您好：
+                        self._cr.commit()
-                邮箱在{mail_time}(+8)时间接收到主题为POD的邮件，但未识别到对应的提单，请检查
+                    # 4. 异常报警逻辑
-                避免推送超时！</p>
+                    if not bl_objs or not_bl_pdf_arr:
-                """
+                        mail_time = (datetime.utcnow() + timedelta(hours=8)).strftime("%Y-%m-%d %H:%M:%S")
-                if not_bl_pdf_arr:
+                        content = f"""<p>您好：
-                    content += f"\n    以下提单未提取到PDF文件 {'/'.join(not_bl_pdf_arr)}"
+                        邮箱在{mail_time}(+8)时间接收到主题为POD的邮件，但未识别到对应的提单，请检查
-                # 给客户配置的每个邮箱都发送邮件
+                        避免推送超时！</p>
-                patrol_sender_email = self.env["ir.config_parameter"].sudo().get_param('patrol_sender_email') or ''
+                        """
-                patrol_receiver_emails = self.env["ir.config_parameter"].sudo().get_param(
+                        if not_bl_pdf_arr:
-                    'patrol_receiver_emails') or ''
+                            content += f"\n    以下提单未提取到PDF文件 {'/'.join(not_bl_pdf_arr)}"
-                mail = self.env['mail.mail'].sudo().create({
+                        # 给客户配置的每个邮箱都发送邮件
-                    "email_from": patrol_sender_email,
+                        patrol_sender_email = self.env["ir.config_parameter"].sudo().get_param(
-                    'subject': 'POD邮件未提取到提单',
+                            'patrol_sender_email') or ''
-                    'body_html': content,
+                        patrol_receiver_emails = self.env["ir.config_parameter"].sudo().get_param(
-                    'email_to': patrol_receiver_emails
+                            'patrol_receiver_emails') or ''
-                    # 'email_to': "1663490807@qq.com,820656583@qq.com"
+                        mail = self.env['mail.mail'].sudo().create({
-                })
+                            "email_from": patrol_sender_email,
-                mail.send()
+                            'subject': 'POD邮件未提取到提单',
-                for i in range(2):
+                            'body_html': content,
-                    if mail.failure_reason:
+                            'email_to': patrol_receiver_emails
-                        logging.info('邮件发送失败原因:%s' % mail.failure_reason)
+                            # 'email_to': "1663490807@qq.com,820656583@qq.com"
-                        mail.write({'state': 'outgoing'})
+                        })
                        mail.send()
+                        for i in range(2):
+                            if mail.failure_reason:
+                                logging.info('邮件发送失败原因:%s' % mail.failure_reason)
+                                mail.write({'state': 'outgoing'})
+                                mail.send()
        except Exception as err:
            logging.error('fetch_final_mail_dlv--error:%s' % str(err))
+    # def fetch_final_mail_dlv(self, **kwargs):
+    #     """尾程交接邮件提取"""
+    #     email_body = kwargs['email_body']
+    #     email_body = html.unescape(email_body)
+    #     text_arr = self.find_final_email_text(email_body)
+    #     logging.info('data_arr: %s' % text_arr)
+    #     attachment_arr = kwargs['attachment_arr']
+    #     # attachment_tuple = attachment_arr[0] if attachment_arr else []
+    #     attachment_tuple_arr = attachment_arr if attachment_arr else []
+    #     # order_obj_arr = []
+    #     try:
+    #         text_arr = [i.replace('-', '').replace(' ', '').replace('\xa0', '') for i in text_arr]
+    #         ids = []
+    #         if text_arr:
+    #             sql = "select id from cc_bl where UPPER(REPLACE(REPLACE(REPLACE(bl_no, ' ', ''), '-', ''), '/', '')) in %s"
+    #             self._cr.execute(sql, (tuple(text_arr),))
+    #             result = self._cr.fetchall()
+    #             ids = [i[0] for i in result]
+    #         bl_objs = self.env['cc.bl'].sudo().search([('id', 'in', ids)]) if ids else False
+    #         not_bl_pdf_arr = []
+    #         if bl_objs:
+    #             # 提单对象  bl_no提单号
+    #             # attachment_tuple_arr [('11.pdf', 'pdf数据')]
+    #             # 1. 开启临时文件夹 (with 块结束时，所有临时文件会自动销毁)
+    #             with tempfile.TemporaryDirectory() as temp_dir:
+    #                 # 构建临时目录结构
+    #                 pod_dir = os.path.join(temp_dir, "POD")
+    #                 pages_dir = os.path.join(pod_dir, "pages")
+    #                 output_dir = os.path.join(temp_dir, "Output")
+    #                 os.makedirs(pod_dir)
+    #                 os.makedirs(output_dir)
+    #                 # 2. 将内存中的 PDF 数据写入临时目录
+    #                 for file_name, pdf_data in attachment_tuple_arr:
+    #                     pdf_path = os.path.join(pod_dir, file_name)
+    #                     with open(pdf_path, 'wb') as f:
+    #                         # 注意：Odoo 里的附件通常是 base64 编码的。
+    #                         # 如果你的 'pdf数据' 是 base64 字符串/bytes，请用 base64.b64decode(pdf_data)
+    #                         # 如果已经是纯二进制流(rb读取的)，直接写入即可：f.write(pdf_data)
+    #                         # f.write(base64.b64decode(pdf_data))
+    #                         f.write(pdf_data)
+    #                         # 3. 对这些 PDF 进行集中拆分和识别（只执行一次，非常关键）
+    #                 ctx_index = {
+    #                     "dir_path": pod_dir,
+    #                     "output_index_csv": os.path.join(pod_dir, "pod_index.csv"),
+    #                     "output_summary_csv": os.path.join(pod_dir, "summary.csv"),
+    #                     "save_pages": True,
+    #                     "page_output_dir": pages_dir,
+    #                     "pipeline_split_first": True
+    #                 }
+    #                 # 这一步会消耗一点时间，它会生成单页 PDF 和索引 CSV
+    #                 index_pod_directory(ctx_index)
+    #                 # 4. 遍历你的提单对象，按需提取 PDF
+    #                 for bl_obj in bl_objs:
+    #                     target_awb = bl_obj.bl_no  # 获取提单号，例如 '436-10353136'
+    #                     if not target_awb:
+    #                         continue
+    #                     # 调用拼合工具
+    #                     ctx_merge = {
+    #                         "awb": target_awb,
+    #                         "index_file": ctx_index["output_index_csv"],
+    #                         "pages_dir": pages_dir,
+    #                         "output_dir": output_dir
+    #                     }
+    #                     result = merge_awb_pages(ctx_merge)
+    #                     # 5. 检查是否成功生成了对应的单号 PDF
+    #                     if result.get("output") and os.path.exists(result["output"]):
+    #                         # 将生成的 PDF 重新读回内存
+    #                         with open(result["output"], 'rb') as f:
+    #                             extracted_pdf_bytes = f.read()
+    #                         # 重新转为 base64，准备存入 Odoo
+    #                         # extracted_pdf_b64 = base64.b64encode(extracted_pdf_bytes)
+    #                         # print(extracted_pdf_bytes)
+    #                         self.upload_pod_attachment(bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes)
+    #                         # bl_pdf_arr.append((bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes))
+    #
+    #                     else:
+    #                         # 没找到这个单号对应的页面
+    #                         not_bl_pdf_arr.append(bl_obj.bl_no)
+    #                         # 这里可以记个日志，或者给 bl_obj 打个“未找到凭证”的标签
+    #             self._cr.commit()
+    #         # 屏蔽 2026-03-26以下
+    #         # if bl_objs and attachment_tuple_arr:
+    #         #     file_objs = self.env['cc.clearance.file'].sudo().search(
+    #         #         [('file_name', '=', '尾程交接POD(待大包数量和箱号)'),
+    #         #          ('bl_id', 'in', bl_objs.ids)])
+    #         #     file_objs.unlink()
+    #         #     for attachment_tuple in attachment_tuple_arr:
+    #         #         attachment_name, attachment_data = attachment_tuple
+    #         #         self.upload_pod_attachment(bl_objs, attachment_name, attachment_data)
+    #         # 屏蔽 2026-03-26 以上
+    #             # redis_conn = self.env['common.common'].sudo().get_redis()
+    #             # if redis_conn == 'no':
+    #             #     raise ValidationError('未连接redis')
+    #             # else:
+    #             #     redis_conn.lpush('mail_push_package_list', json.dumps({'id': bl_obj.id, 'utc_time': utc_time.strftime("%Y-%m-%d %H:%M:%S")}))
+    #         if not bl_objs or not_bl_pdf_arr:
+    #             mail_time = (datetime.utcnow() + timedelta(hours=8)).strftime("%Y-%m-%d %H:%M:%S")
+    #             content = f"""<p>您好：
+    #             邮箱在{mail_time}(+8)时间接收到主题为POD的邮件，但未识别到对应的提单，请检查
+    #             避免推送超时！</p>
+    #             """
+    #             if not_bl_pdf_arr:
+    #                 content += f"\n    以下提单未提取到PDF文件 {'/'.join(not_bl_pdf_arr)}"
+    #             # 给客户配置的每个邮箱都发送邮件
+    #             patrol_sender_email = self.env["ir.config_parameter"].sudo().get_param('patrol_sender_email') or ''
+    #             patrol_receiver_emails = self.env["ir.config_parameter"].sudo().get_param(
+    #                 'patrol_receiver_emails') or ''
+    #             mail = self.env['mail.mail'].sudo().create({
+    #                 "email_from": patrol_sender_email,
+    #                 'subject': 'POD邮件未提取到提单',
+    #                 'body_html': content,
+    #                 'email_to': patrol_receiver_emails
+    #                 # 'email_to': "1663490807@qq.com,820656583@qq.com"
+    #             })
+    #             mail.send()
+    #             for i in range(2):
+    #                 if mail.failure_reason:
+    #                     logging.info('邮件发送失败原因:%s' % mail.failure_reason)
+    #                     mail.write({'state': 'outgoing'})
+    #                     mail.send()
+    #     except Exception as err:
+    #         logging.error('fetch_final_mail_dlv--error:%s' % str(err))
    def fetch_mail_dlv(self, **kwargs):
        email_body = kwargs['email_body']
        year = kwargs['year']