提交 2972a296 authored 作者: 刘擎阳's avatar 刘擎阳

1.优化

上级 d82f2dbc
......@@ -9,6 +9,11 @@ import tempfile
from datetime import datetime, timedelta
from io import BytesIO
import os
# 引入你的本地脚本函
from ..pdf_tools.pod_indexer import index_pod_directory
from ..pdf_tools.awb_page_merger import merge_awb_pages
import pdfplumber
import xlrd
from aip.ocr import AipOcr
......@@ -238,7 +243,7 @@ class OrderStateChangeRule(models.Model):
pattern = re.compile("\\d{3}-\\d{8}\s*")
data_re = re.compile(pattern)
data_arr = data_re.findall(email_body)
data_arr = [i.replace('\r\n', '') for i in data_arr]
data_arr = [i.replace('\r\n', '').replace('\xa0', '') for i in data_arr]
return data_arr
def fetch_final_mail_dlv(self, **kwargs):
......@@ -252,7 +257,7 @@ class OrderStateChangeRule(models.Model):
attachment_tuple_arr = attachment_arr if attachment_arr else []
# order_obj_arr = []
try:
text_arr = [i.replace('-', '').replace(' ', '') for i in text_arr]
text_arr = [i.replace('-', '').replace(' ', '').replace('\xa0', '') for i in text_arr]
ids = []
if text_arr:
sql = "select id from cc_bl where UPPER(REPLACE(REPLACE(REPLACE(bl_no, ' ', ''), '-', ''), '/', '')) in %s"
......@@ -260,24 +265,88 @@ class OrderStateChangeRule(models.Model):
result = self._cr.fetchall()
ids = [i[0] for i in result]
bl_objs = self.env['cc.bl'].sudo().search([('id', 'in', ids)]) if ids else False
if bl_objs and attachment_tuple_arr:
file_objs = self.env['cc.clearance.file'].sudo().search(
[('file_name', '=', '尾程交接POD(待大包数量和箱号)'),
('bl_id', 'in', bl_objs.ids)])
file_objs.unlink()
for attachment_tuple in attachment_tuple_arr:
attachment_name, attachment_data = attachment_tuple
self.upload_pod_attachment(bl_objs, attachment_name, attachment_data)
not_bl_pdf_arr = []
if bl_objs:
# 提单对象 bl_no提单号
# attachment_tuple_arr [('11.pdf', 'pdf数据')]
# 1. 开启临时文件夹 (with 块结束时,所有临时文件会自动销毁)
with tempfile.TemporaryDirectory() as temp_dir:
# 构建临时目录结构
pod_dir = os.path.join(temp_dir, "POD")
pages_dir = os.path.join(pod_dir, "pages")
output_dir = os.path.join(temp_dir, "Output")
os.makedirs(pod_dir)
os.makedirs(output_dir)
# 2. 将内存中的 PDF 数据写入临时目录
for file_name, pdf_data in attachment_tuple_arr:
pdf_path = os.path.join(pod_dir, file_name)
with open(pdf_path, 'wb') as f:
# 注意:Odoo 里的附件通常是 base64 编码的。
# 如果你的 'pdf数据' 是 base64 字符串/bytes,请用 base64.b64decode(pdf_data)
# 如果已经是纯二进制流(rb读取的),直接写入即可:f.write(pdf_data)
# f.write(base64.b64decode(pdf_data))
f.write(pdf_data)
# 3. 对这些 PDF 进行集中拆分和识别(只执行一次,非常关键)
ctx_index = {
"dir_path": pod_dir,
"output_index_csv": os.path.join(pod_dir, "pod_index.csv"),
"output_summary_csv": os.path.join(pod_dir, "summary.csv"),
"save_pages": True,
"page_output_dir": pages_dir,
"pipeline_split_first": True
}
# 这一步会消耗一点时间,它会生成单页 PDF 和索引 CSV
index_pod_directory(ctx_index)
# 4. 遍历你的提单对象,按需提取 PDF
for bl_obj in bl_objs:
target_awb = bl_obj.bl_no # 获取提单号,例如 '436-10353136'
if not target_awb:
continue
# 调用拼合工具
ctx_merge = {
"awb": target_awb,
"index_file": ctx_index["output_index_csv"],
"pages_dir": pages_dir,
"output_dir": output_dir
}
result = merge_awb_pages(ctx_merge)
# 5. 检查是否成功生成了对应的单号 PDF
if result.get("output") and os.path.exists(result["output"]):
# 将生成的 PDF 重新读回内存
with open(result["output"], 'rb') as f:
extracted_pdf_bytes = f.read()
# 重新转为 base64,准备存入 Odoo
# extracted_pdf_b64 = base64.b64encode(extracted_pdf_bytes)
# print(extracted_pdf_bytes)
self.upload_pod_attachment(bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes)
# bl_pdf_arr.append((bl_obj, f'{bl_obj.bl_no}.pdf', extracted_pdf_bytes))
else:
# 没找到这个单号对应的页面
not_bl_pdf_arr.append(bl_obj.bl_no)
# 这里可以记个日志,或者给 bl_obj 打个“未找到凭证”的标签
# 屏蔽 2026-03-26以下
# if bl_objs and attachment_tuple_arr:
# file_objs = self.env['cc.clearance.file'].sudo().search(
# [('file_name', '=', '尾程交接POD(待大包数量和箱号)'),
# ('bl_id', 'in', bl_objs.ids)])
# file_objs.unlink()
# for attachment_tuple in attachment_tuple_arr:
# attachment_name, attachment_data = attachment_tuple
# self.upload_pod_attachment(bl_objs, attachment_name, attachment_data)
# 屏蔽 2026-03-26 以上
# redis_conn = self.env['common.common'].sudo().get_redis()
# if redis_conn == 'no':
# raise ValidationError('未连接redis')
# else:
# redis_conn.lpush('mail_push_package_list', json.dumps({'id': bl_obj.id, 'utc_time': utc_time.strftime("%Y-%m-%d %H:%M:%S")}))
if not bl_objs:
if not bl_objs or not_bl_pdf_arr:
mail_time = (datetime.utcnow() + timedelta(hours=8)).strftime("%Y-%m-%d %H:%M:%S")
content = f"""<p>您好:
邮箱在{mail_time}(+8)时间接收到主题为POD的邮件,但未识别到对应的提单,请检查
避免推送超时!</p>"""
避免推送超时!</p>
"""
if not_bl_pdf_arr:
content += f"\n 以下提单未提取到PDF文件 {'/'.join(not_bl_pdf_arr)}"
# 给客户配置的每个邮箱都发送邮件
patrol_sender_email = self.env["ir.config_parameter"].sudo().get_param('patrol_sender_email') or ''
patrol_receiver_emails = self.env["ir.config_parameter"].sudo().get_param(
......
差异被折叠。
{
"baidu_ocr_app_id": "118782515",
"baidu_ocr_api_key": "gWnGCmjJYzaYwhph8sJEdiRJ",
"baidu_ocr_secret_key": "mjgUUgbxXK8UHcRi5MTlPrb4BWM8NrOu",
"ocr_enabled": true,
"ocr_timeout": 30,
"max_retries": 3
}
\ No newline at end of file
#!/usr/bin/env python3
"""
百度OCR配置文件
用于管理百度OCR API的相关配置参数
"""
import os
import json
from typing import Dict, Optional
class BaiduOCRConfig:
"""百度OCR配置管理类"""
def __init__(self, config_file: str = None):
"""
初始化配置管理器
Args:
config_file: 配置文件路径,默认为当前目录下的baidu_ocr_config.json
"""
if config_file is None:
config_file = os.path.join(os.path.dirname(__file__), 'baidu_ocr_config.json')
self.config_file = config_file
self._config = self._load_config()
def _load_config(self) -> Dict:
"""
从配置文件加载配置
Returns:
配置字典
"""
# 默认配置
default_config = {
"baidu_ocr_app_id": "118782515",
"baidu_ocr_api_key": "gWnGCmjJYzaYwhph8sJEdiRJ",
"baidu_ocr_secret_key": "mjgUUgbxXK8UHcRi5MTlPrb4BWM8NrOu",
"ocr_enabled": True,
"ocr_timeout": 30,
"max_retries": 3
}
# 如果配置文件存在,则加载
if os.path.exists(self.config_file):
try:
with open(self.config_file, 'r', encoding='utf-8') as f:
file_config = json.load(f)
# 合并默认配置和文件配置
default_config.update(file_config)
except (json.JSONDecodeError, IOError) as e:
print(f"警告:无法读取配置文件 {self.config_file}: {e}")
print("使用默认配置")
else:
# 创建默认配置文件
self._save_config(default_config)
return default_config
def _save_config(self, config: Dict) -> None:
"""
保存配置到文件
Args:
config: 配置字典
"""
try:
with open(self.config_file, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=4, ensure_ascii=False)
except IOError as e:
print(f"警告:无法保存配置文件 {self.config_file}: {e}")
def get(self, key: str, default=None):
"""
获取配置值
Args:
key: 配置键
default: 默认值
Returns:
配置值
"""
# 优先从环境变量获取
env_value = os.getenv(key.upper())
if env_value:
return env_value
return self._config.get(key, default)
def set(self, key: str, value) -> None:
"""
设置配置值
Args:
key: 配置键
value: 配置值
"""
self._config[key] = value
self._save_config(self._config)
def get_app_id(self) -> str:
"""获取百度OCR App ID"""
return self.get('baidu_ocr_app_id', '')
def get_api_key(self) -> str:
"""获取百度OCR API Key"""
return self.get('baidu_ocr_api_key', '')
def get_secret_key(self) -> str:
"""获取百度OCR Secret Key"""
return self.get('baidu_ocr_secret_key', '')
def is_ocr_enabled(self) -> bool:
"""检查OCR是否启用"""
return self.get('ocr_enabled', True)
def get_timeout(self) -> int:
"""获取OCR请求超时时间"""
return self.get('ocr_timeout', 30)
def get_max_retries(self) -> int:
"""获取最大重试次数"""
return self.get('max_retries', 3)
def is_configured(self) -> bool:
"""
检查百度OCR是否已正确配置
Returns:
True如果配置完整,False否则
"""
app_id = self.get_app_id()
api_key = self.get_api_key()
secret_key = self.get_secret_key()
return bool(app_id and api_key and secret_key)
def get_all_config(self) -> Dict:
"""获取所有配置"""
return self._config.copy()
def update_config(self, config_dict: Dict) -> None:
"""
批量更新配置
Args:
config_dict: 配置字典
"""
self._config.update(config_dict)
self._save_config(self._config)
# 全局配置实例
baidu_ocr_config = BaiduOCRConfig()
def get_baidu_ocr_config() -> BaiduOCRConfig:
"""
获取百度OCR配置实例
Returns:
BaiduOCRConfig实例
"""
return baidu_ocr_config
def check_baidu_ocr_config() -> bool:
"""
检查百度OCR配置是否可用
Returns:
True如果配置可用,False否则
"""
try:
config = get_baidu_ocr_config()
# 检查基本配置
if not config.is_configured():
return False
# 检查是否启用OCR
if not config.is_ocr_enabled():
return False
# 尝试导入百度OCR SDK
try:
from aip import AipOcr
# 尝试初始化客户端
client = AipOcr(
config.get_app_id(),
config.get_api_key(),
config.get_secret_key()
)
# 如果能成功创建客户端,认为配置可用
return True
except ImportError as e:
import sys
print(f"警告: 百度OCR SDK (baidu-aip) 导入失败: {e}", file=sys.stderr)
print("请运行: pip install baidu-aip", file=sys.stderr)
# 如果是在Docker中,提示重建
if os.path.exists('/.dockerenv'):
print("提示: 检测到Docker环境,请尝试重新构建镜像: docker build --no-cache -t david-customs-data .", file=sys.stderr)
return False
except Exception as e:
print(f"警告: 百度OCR客户端初始化失败: {e}")
return False
except Exception as e:
print(f"警告: 百度OCR配置检查失败: {e}")
return False
def get_config_status() -> Dict:
"""
获取详细的配置状态信息
Returns:
配置状态字典
"""
config = get_baidu_ocr_config()
status = {
'app_id_configured': bool(config.get_app_id()),
'api_key_configured': bool(config.get_api_key()),
'secret_key_configured': bool(config.get_secret_key()),
'ocr_enabled': config.is_ocr_enabled(),
'timeout': config.get_timeout(),
'max_retries': config.get_max_retries(),
'fully_configured': config.is_configured()
}
# 添加部分配置信息(隐藏敏感信息)
if status['app_id_configured']:
app_id = config.get_app_id()
status['app_id_preview'] = app_id[:8] + '...' if len(app_id) > 8 else app_id
if status['api_key_configured']:
api_key = config.get_api_key()
status['api_key_preview'] = api_key[:8] + '...' if len(api_key) > 8 else api_key
if status['secret_key_configured']:
secret_key = config.get_secret_key()
status['secret_key_preview'] = secret_key[:8] + '...' if len(secret_key) > 8 else secret_key
# 检查SDK可用性
try:
from aip import AipOcr
status['sdk_available'] = True
except ImportError:
status['sdk_available'] = False
return status
if __name__ == "__main__":
# 测试配置
config = get_baidu_ocr_config()
print("百度OCR配置测试:")
print(f"App ID: {config.get_app_id()}")
print(f"API Key: {config.get_api_key()}")
print(f"Secret Key: {config.get_secret_key()}")
print(f"OCR启用: {config.is_ocr_enabled()}")
print(f"超时时间: {config.get_timeout()}秒")
print(f"最大重试: {config.get_max_retries()}次")
print(f"配置完整: {config.is_configured()}")
\ No newline at end of file
差异被折叠。
差异被折叠。
......@@ -105,6 +105,7 @@ class TT(models.Model):
}
request_url = tt_url + url
logging.info('request_url: %s' % request_url)
if 'clearance_file_feedback' not in request_url:
logging.info('request_data: %s' % parameter)
response = requests.post(request_url, headers=headers, data=parameter)
logging.info('response: %s' % response.text)
......
......@@ -6,6 +6,7 @@ numpy
Pillow
tesseract
pytesseract
baidu-aip
# 系统依赖安装说明:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论